1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-19 08:17:05 +01:00

Merge branch 'develop' of github.com:fionnoh/Grid into feature/A2A_current_insertion

Peter's GPU branch changes merged with A2A CI code
This commit is contained in:
Fionn O hOgain
2019-09-30 16:53:44 +01:00
785 changed files with 41312 additions and 51680 deletions

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -27,114 +27,112 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_BASE_H
#define GRID_QCD_BASE_H
namespace Grid{
namespace QCD {
*************************************************************************************/
/* END LEGAL */
#pragma once
static const int Xdir = 0;
static const int Ydir = 1;
static const int Zdir = 2;
static const int Tdir = 3;
NAMESPACE_BEGIN(Grid);
static const int Xp = 0;
static const int Yp = 1;
static const int Zp = 2;
static const int Tp = 3;
static const int Xm = 4;
static const int Ym = 5;
static const int Zm = 6;
static const int Tm = 7;
static constexpr int Xdir = 0;
static constexpr int Ydir = 1;
static constexpr int Zdir = 2;
static constexpr int Tdir = 3;
static const int Nc=3;
static const int Ns=4;
static const int Nd=4;
static const int Nhs=2; // half spinor
static const int Nds=8; // double stored gauge field
static const int Ngp=2; // gparity index range
static constexpr int Xp = 0;
static constexpr int Yp = 1;
static constexpr int Zp = 2;
static constexpr int Tp = 3;
static constexpr int Xm = 4;
static constexpr int Ym = 5;
static constexpr int Zm = 6;
static constexpr int Tm = 7;
//////////////////////////////////////////////////////////////////////////////
// QCD iMatrix types
// Index conventions: Lorentz x Spin x Colour
// note: static const int or constexpr will work for type deductions
// with the intel compiler (up to version 17)
//////////////////////////////////////////////////////////////////////////////
#define ColourIndex 2
#define SpinIndex 1
#define LorentzIndex 0
static constexpr int Nc=3;
static constexpr int Ns=4;
static constexpr int Nd=4;
static constexpr int Nhs=2; // half spinor
static constexpr int Nds=8; // double stored gauge field
static constexpr int Ngp=2; // gparity index range
// Also should make these a named enum type
static const int DaggerNo=0;
static const int DaggerYes=1;
static const int InverseNo=0;
static const int InverseYes=1;
//////////////////////////////////////////////////////////////////////////////
// QCD iMatrix types
// Index conventions: Lorentz x Spin x Colour
// note: static constexpr int or constexpr will work for type deductions
// with the intel compiler (up to version 17)
//////////////////////////////////////////////////////////////////////////////
#define ColourIndex (2)
#define SpinIndex (1)
#define LorentzIndex (0)
// Useful traits is this a spin index
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
// Also should make these a named enum type
static constexpr int DaggerNo=0;
static constexpr int DaggerYes=1;
static constexpr int InverseNo=0;
static constexpr int InverseYes=1;
const int SpinorIndex = 2;
template<typename T> struct isSpinor {
static const bool value = (SpinorIndex==T::TensorLevel);
};
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
// Useful traits is this a spin index
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
// ChrisK very keen to add extra space for Gparity doubling.
//
// Also add domain wall index, in a way where Wilson operator
// naturally distributes across the 5th dimensions.
//
// That probably makes for GridRedBlack4dCartesian grid.
const int SpinorIndex = 2;
template<typename T> struct isSpinor {
static constexpr bool value = (SpinorIndex==T::TensorLevel);
};
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
// s,sp,c,spc,lc
// ChrisK very keen to add extra space for Gparity doubling.
//
// Also add domain wall index, in a way where Wilson operator
// naturally distributes across the 5th dimensions.
//
// That probably makes for GridRedBlack4dCartesian grid.
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
// s,sp,c,spc,lc
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
template<typename vtype> using iSpinColourSpinColourMatrix = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
// Spin matrix
typedef iSpinMatrix<Complex > SpinMatrix;
typedef iSpinMatrix<ComplexF > SpinMatrixF;
typedef iSpinMatrix<ComplexD > SpinMatrixD;
// Spin matrix
typedef iSpinMatrix<Complex > SpinMatrix;
typedef iSpinMatrix<ComplexF > SpinMatrixF;
typedef iSpinMatrix<ComplexD > SpinMatrixD;
typedef iSpinMatrix<vComplex > vSpinMatrix;
typedef iSpinMatrix<vComplexF> vSpinMatrixF;
typedef iSpinMatrix<vComplexD> vSpinMatrixD;
typedef iSpinMatrix<vComplex > vSpinMatrix;
typedef iSpinMatrix<vComplexF> vSpinMatrixF;
typedef iSpinMatrix<vComplexD> vSpinMatrixD;
// Colour Matrix
typedef iColourMatrix<Complex > ColourMatrix;
typedef iColourMatrix<ComplexF > ColourMatrixF;
typedef iColourMatrix<ComplexD > ColourMatrixD;
// Colour Matrix
typedef iColourMatrix<Complex > ColourMatrix;
typedef iColourMatrix<ComplexF > ColourMatrixF;
typedef iColourMatrix<ComplexD > ColourMatrixD;
typedef iColourMatrix<vComplex > vColourMatrix;
typedef iColourMatrix<vComplexF> vColourMatrixF;
typedef iColourMatrix<vComplexD> vColourMatrixD;
typedef iColourMatrix<vComplex > vColourMatrix;
typedef iColourMatrix<vComplexF> vColourMatrixF;
typedef iColourMatrix<vComplexD> vColourMatrixD;
// SpinColour matrix
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
// SpinColour matrix
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
// SpinColourSpinColour matrix
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
@ -153,383 +151,379 @@ namespace QCD {
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
// LorentzColour
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD;
// LorentzColour
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD;
typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
// DoubleStored gauge field
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<ComplexD > DoubleStoredColourMatrixD;
// DoubleStored gauge field
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<ComplexD > DoubleStoredColourMatrixD;
typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
// Spin vector
typedef iSpinVector<Complex > SpinVector;
typedef iSpinVector<ComplexF> SpinVectorF;
typedef iSpinVector<ComplexD> SpinVectorD;
// Spin vector
typedef iSpinVector<Complex > SpinVector;
typedef iSpinVector<ComplexF> SpinVectorF;
typedef iSpinVector<ComplexD> SpinVectorD;
typedef iSpinVector<vComplex > vSpinVector;
typedef iSpinVector<vComplexF> vSpinVectorF;
typedef iSpinVector<vComplexD> vSpinVectorD;
typedef iSpinVector<vComplex > vSpinVector;
typedef iSpinVector<vComplexF> vSpinVectorF;
typedef iSpinVector<vComplexD> vSpinVectorD;
// Colour vector
typedef iColourVector<Complex > ColourVector;
typedef iColourVector<ComplexF> ColourVectorF;
typedef iColourVector<ComplexD> ColourVectorD;
// Colour vector
typedef iColourVector<Complex > ColourVector;
typedef iColourVector<ComplexF> ColourVectorF;
typedef iColourVector<ComplexD> ColourVectorD;
typedef iColourVector<vComplex > vColourVector;
typedef iColourVector<vComplexF> vColourVectorF;
typedef iColourVector<vComplexD> vColourVectorD;
typedef iColourVector<vComplex > vColourVector;
typedef iColourVector<vComplexF> vColourVectorF;
typedef iColourVector<vComplexD> vColourVectorD;
// SpinColourVector
typedef iSpinColourVector<Complex > SpinColourVector;
typedef iSpinColourVector<ComplexF> SpinColourVectorF;
typedef iSpinColourVector<ComplexD> SpinColourVectorD;
// SpinColourVector
typedef iSpinColourVector<Complex > SpinColourVector;
typedef iSpinColourVector<ComplexF> SpinColourVectorF;
typedef iSpinColourVector<ComplexD> SpinColourVectorD;
typedef iSpinColourVector<vComplex > vSpinColourVector;
typedef iSpinColourVector<vComplexF> vSpinColourVectorF;
typedef iSpinColourVector<vComplexD> vSpinColourVectorD;
typedef iSpinColourVector<vComplex > vSpinColourVector;
typedef iSpinColourVector<vComplexF> vSpinColourVectorF;
typedef iSpinColourVector<vComplexD> vSpinColourVectorD;
// HalfSpin vector
typedef iHalfSpinVector<Complex > HalfSpinVector;
typedef iHalfSpinVector<ComplexF> HalfSpinVectorF;
typedef iHalfSpinVector<ComplexD> HalfSpinVectorD;
// HalfSpin vector
typedef iHalfSpinVector<Complex > HalfSpinVector;
typedef iHalfSpinVector<ComplexF> HalfSpinVectorF;
typedef iHalfSpinVector<ComplexD> HalfSpinVectorD;
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
typedef iHalfSpinVector<vComplexF> vHalfSpinVectorF;
typedef iHalfSpinVector<vComplexD> vHalfSpinVectorD;
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
typedef iHalfSpinVector<vComplexF> vHalfSpinVectorF;
typedef iHalfSpinVector<vComplexD> vHalfSpinVectorD;
// HalfSpinColour vector
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
typedef iHalfSpinColourVector<ComplexF> HalfSpinColourVectorF;
typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
// HalfSpinColour vector
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
typedef iHalfSpinColourVector<ComplexF> HalfSpinColourVectorF;
typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
// singlets
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
// singlets
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<vComplex > vTComplex ; // what if we don't know the tensor structure
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
typedef iSinglet<vComplex > vTComplex ; // what if we don't know the tensor structure
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
typedef iSinglet<vReal > vTReal;
typedef iSinglet<vRealF> vTRealF;
typedef iSinglet<vRealD> vTRealD;
typedef iSinglet<vReal > vTReal;
typedef iSinglet<vRealF> vTRealF;
typedef iSinglet<vRealD> vTRealD;
typedef iSinglet<vInteger> vTInteger;
typedef iSinglet<Integer > TInteger;
typedef iSinglet<vInteger> vTInteger;
typedef iSinglet<Integer > TInteger;
// Lattices of these
typedef Lattice<vColourMatrix> LatticeColourMatrix;
typedef Lattice<vColourMatrixF> LatticeColourMatrixF;
typedef Lattice<vColourMatrixD> LatticeColourMatrixD;
// Lattices of these
typedef Lattice<vColourMatrix> LatticeColourMatrix;
typedef Lattice<vColourMatrixF> LatticeColourMatrixF;
typedef Lattice<vColourMatrixD> LatticeColourMatrixD;
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
typedef Lattice<vSpinMatrixF> LatticeSpinMatrixF;
typedef Lattice<vSpinMatrixD> LatticeSpinMatrixD;
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
typedef Lattice<vSpinMatrixF> LatticeSpinMatrixF;
typedef Lattice<vSpinMatrixD> LatticeSpinMatrixD;
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
typedef Lattice<vSpinColourSpinColourMatrix> LatticeSpinColourSpinColourMatrix;
typedef Lattice<vSpinColourSpinColourMatrixF> LatticeSpinColourSpinColourMatrixF;
typedef Lattice<vSpinColourSpinColourMatrixD> LatticeSpinColourSpinColourMatrixD;
typedef Lattice<vSpinColourSpinColourMatrix> LatticeSpinColourSpinColourMatrix;
typedef Lattice<vSpinColourSpinColourMatrixF> LatticeSpinColourSpinColourMatrixF;
typedef Lattice<vSpinColourSpinColourMatrixD> LatticeSpinColourSpinColourMatrixD;
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
// DoubleStored gauge field
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;
typedef Lattice<vDoubleStoredColourMatrixD> LatticeDoubleStoredColourMatrixD;
// DoubleStored gauge field
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;
typedef Lattice<vDoubleStoredColourMatrixD> LatticeDoubleStoredColourMatrixD;
typedef Lattice<vSpinVector> LatticeSpinVector;
typedef Lattice<vSpinVectorF> LatticeSpinVectorF;
typedef Lattice<vSpinVectorD> LatticeSpinVectorD;
typedef Lattice<vSpinVector> LatticeSpinVector;
typedef Lattice<vSpinVectorF> LatticeSpinVectorF;
typedef Lattice<vSpinVectorD> LatticeSpinVectorD;
typedef Lattice<vColourVector> LatticeColourVector;
typedef Lattice<vColourVectorF> LatticeColourVectorF;
typedef Lattice<vColourVectorD> LatticeColourVectorD;
typedef Lattice<vColourVector> LatticeColourVector;
typedef Lattice<vColourVectorF> LatticeColourVectorF;
typedef Lattice<vColourVectorD> LatticeColourVectorD;
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
typedef Lattice<vSpinColourVectorF> LatticeSpinColourVectorF;
typedef Lattice<vSpinColourVectorD> LatticeSpinColourVectorD;
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
typedef Lattice<vSpinColourVectorF> LatticeSpinColourVectorF;
typedef Lattice<vSpinColourVectorD> LatticeSpinColourVectorD;
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
typedef Lattice<vHalfSpinVectorF> LatticeHalfSpinVectorF;
typedef Lattice<vHalfSpinVectorD> LatticeHalfSpinVectorD;
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
typedef Lattice<vHalfSpinVectorF> LatticeHalfSpinVectorF;
typedef Lattice<vHalfSpinVectorD> LatticeHalfSpinVectorD;
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
typedef Lattice<vHalfSpinColourVectorF> LatticeHalfSpinColourVectorF;
typedef Lattice<vHalfSpinColourVectorD> LatticeHalfSpinColourVectorD;
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
typedef Lattice<vHalfSpinColourVectorF> LatticeHalfSpinColourVectorF;
typedef Lattice<vHalfSpinColourVectorD> LatticeHalfSpinColourVectorD;
typedef Lattice<vTReal> LatticeReal;
typedef Lattice<vTRealF> LatticeRealF;
typedef Lattice<vTRealD> LatticeRealD;
typedef Lattice<vTReal> LatticeReal;
typedef Lattice<vTRealF> LatticeRealF;
typedef Lattice<vTRealD> LatticeRealD;
typedef Lattice<vTComplex> LatticeComplex;
typedef Lattice<vTComplexF> LatticeComplexF;
typedef Lattice<vTComplexD> LatticeComplexD;
typedef Lattice<vTComplex> LatticeComplex;
typedef Lattice<vTComplexF> LatticeComplexF;
typedef Lattice<vTComplexD> LatticeComplexD;
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
///////////////////////////////////////////
// Physical names for things
///////////////////////////////////////////
typedef LatticeHalfSpinColourVector LatticeHalfFermion;
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF;
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD;
///////////////////////////////////////////
// Physical names for things
///////////////////////////////////////////
typedef LatticeHalfSpinColourVector LatticeHalfFermion;
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF;
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD;
typedef LatticeSpinColourVector LatticeFermion;
typedef LatticeSpinColourVectorF LatticeFermionF;
typedef LatticeSpinColourVectorD LatticeFermionD;
typedef LatticeSpinColourVector LatticeFermion;
typedef LatticeSpinColourVectorF LatticeFermionF;
typedef LatticeSpinColourVectorD LatticeFermionD;
typedef LatticeSpinColourMatrix LatticePropagator;
typedef LatticeSpinColourMatrixF LatticePropagatorF;
typedef LatticeSpinColourMatrixD LatticePropagatorD;
typedef LatticeSpinColourMatrix LatticePropagator;
typedef LatticeSpinColourMatrixF LatticePropagatorF;
typedef LatticeSpinColourMatrixD LatticePropagatorD;
typedef LatticeLorentzColourMatrix LatticeGaugeField;
typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF;
typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD;
typedef LatticeLorentzColourMatrix LatticeGaugeField;
typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF;
typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD;
typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField;
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField;
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
template<class GF> using LorentzScalar = Lattice<iScalar<typename GF::vector_object::element> >;
template<class GF> using LorentzScalar = Lattice<iScalar<typename GF::vector_object::element> >;
// Uhgg... typing this hurt ;)
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
typedef Lattice<vColourVector> LatticeStaggeredFermion;
typedef Lattice<vColourVectorF> LatticeStaggeredFermionF;
typedef Lattice<vColourVectorD> LatticeStaggeredFermionD;
// Uhgg... typing this hurt ;)
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
typedef Lattice<vColourVector> LatticeStaggeredFermion;
typedef Lattice<vColourVectorF> LatticeStaggeredFermionF;
typedef Lattice<vColourVectorD> LatticeStaggeredFermionD;
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
typedef Lattice<vColourMatrixF> LatticeStaggeredPropagatorF;
typedef Lattice<vColourMatrixD> LatticeStaggeredPropagatorD;
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
typedef Lattice<vColourMatrixF> LatticeStaggeredPropagatorF;
typedef Lattice<vColourMatrixD> LatticeStaggeredPropagatorD;
//////////////////////////////////////////////////////////////////////////////
// Peek and Poke named after physics attributes
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// Peek and Poke named after physics attributes
//////////////////////////////////////////////////////////////////////////////
//spin
template<class vobj> auto peekSpin(const vobj &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
{
return PeekIndex<SpinIndex>(rhs,i);
}
template<class vobj> auto peekSpin(const vobj &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
{
return PeekIndex<SpinIndex>(rhs,i,j);
}
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
{
return PeekIndex<SpinIndex>(rhs,i);
}
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
{
return PeekIndex<SpinIndex>(rhs,i,j);
}
//colour
template<class vobj> auto peekColour(const vobj &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
{
return PeekIndex<ColourIndex>(rhs,i);
}
template<class vobj> auto peekColour(const vobj &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
{
return PeekIndex<ColourIndex>(rhs,i,j);
}
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
{
return PeekIndex<ColourIndex>(rhs,i);
}
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
{
return PeekIndex<ColourIndex>(rhs,i,j);
}
//lorentz
template<class vobj> auto peekLorentz(const vobj &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
{
return PeekIndex<LorentzIndex>(rhs,i);
}
template<class vobj> auto peekLorentz(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
{
return PeekIndex<LorentzIndex>(rhs,i);
}
//spin
template<class vobj> auto peekSpin(const vobj &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
{
return PeekIndex<SpinIndex>(rhs,i);
}
template<class vobj> auto peekSpin(const vobj &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
{
return PeekIndex<SpinIndex>(rhs,i,j);
}
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
{
return PeekIndex<SpinIndex>(rhs,i);
}
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
{
return PeekIndex<SpinIndex>(rhs,i,j);
}
//colour
template<class vobj> auto peekColour(const vobj &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
{
return PeekIndex<ColourIndex>(rhs,i);
}
template<class vobj> auto peekColour(const vobj &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
{
return PeekIndex<ColourIndex>(rhs,i,j);
}
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
{
return PeekIndex<ColourIndex>(rhs,i);
}
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
{
return PeekIndex<ColourIndex>(rhs,i,j);
}
//lorentz
template<class vobj> auto peekLorentz(const vobj &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
{
return PeekIndex<LorentzIndex>(rhs,i);
}
template<class vobj> auto peekLorentz(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
{
return PeekIndex<LorentzIndex>(rhs,i);
}
//////////////////////////////////////////////
// Poke lattice
//////////////////////////////////////////////
template<class vobj>
void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs,
//////////////////////////////////////////////
// Poke lattice
//////////////////////////////////////////////
template<class vobj>
void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(vobj(),0))> & rhs,
int i)
{
PokeIndex<ColourIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(vobj(),0,0))> & rhs,
int i,int j)
{
PokeIndex<ColourIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(vobj(),0))> & rhs,
int i)
{
PokeIndex<ColourIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs,
{
PokeIndex<SpinIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(vobj(),0,0))> & rhs,
int i,int j)
{
PokeIndex<ColourIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs,
int i)
{
PokeIndex<SpinIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j)
{
PokeIndex<SpinIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeLorentz(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs,
int i)
{
PokeIndex<LorentzIndex>(lhs,rhs,i);
}
{
PokeIndex<SpinIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeLorentz(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<LorentzIndex>(vobj(),0))> & rhs,
int i)
{
PokeIndex<LorentzIndex>(lhs,rhs,i);
}
//////////////////////////////////////////////
// Poke scalars
//////////////////////////////////////////////
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<SpinIndex>(lhs,rhs,i);
}
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0,0)) & rhs,int i,int j)
{
pokeIndex<SpinIndex>(lhs,rhs,i,j);
}
//////////////////////////////////////////////
// Poke scalars
//////////////////////////////////////////////
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<SpinIndex>(lhs,rhs,i);
}
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0,0)) & rhs,int i,int j)
{
pokeIndex<SpinIndex>(lhs,rhs,i,j);
}
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<ColourIndex>(lhs,rhs,i);
}
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0,0)) & rhs,int i,int j)
{
pokeIndex<ColourIndex>(lhs,rhs,i,j);
}
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<ColourIndex>(lhs,rhs,i);
}
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0,0)) & rhs,int i,int j)
{
pokeIndex<ColourIndex>(lhs,rhs,i,j);
}
template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<LorentzIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<LorentzIndex>(lhs,rhs,i);
}
template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<LorentzIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<LorentzIndex>(lhs,rhs,i);
}
//////////////////////////////////////////////
// Fermion <-> propagator assignements
//////////////////////////////////////////////
//////////////////////////////////////////////
// Fermion <-> propagator assignements
//////////////////////////////////////////////
//template <class Prop, class Ferm>
template <class Fimpl>
void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c)
{
for(int j = 0; j < Ns; ++j)
{
for(int j = 0; j < Ns; ++j)
{
auto pjs = peekSpin(p, j, s);
auto fj = peekSpin(f, j);
auto pjs = peekSpin(p, j, s);
auto fj = peekSpin(f, j);
for(int i = 0; i < Fimpl::Dimension; ++i)
{
pokeColour(pjs, peekColour(fj, i), i, c);
}
pokeSpin(p, pjs, j, s);
}
{
pokeColour(pjs, peekColour(fj, i), i, c);
}
pokeSpin(p, pjs, j, s);
}
}
//template <class Prop, class Ferm>
template <class Fimpl>
void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c)
{
for(int j = 0; j < Ns; ++j)
{
for(int j = 0; j < Ns; ++j)
{
auto pjs = peekSpin(p, j, s);
auto fj = peekSpin(f, j);
auto pjs = peekSpin(p, j, s);
auto fj = peekSpin(f, j);
for(int i = 0; i < Fimpl::Dimension; ++i)
{
pokeColour(fj, peekColour(pjs, i, c), i);
}
pokeSpin(f, fj, j);
}
{
pokeColour(fj, peekColour(pjs, i, c), i);
}
pokeSpin(f, fj, j);
}
}
//////////////////////////////////////////////
// transpose array and scalar
//////////////////////////////////////////////
template<int Index,class vobj> inline Lattice<vobj> transposeSpin(const Lattice<vobj> &lhs){
return transposeIndex<SpinIndex>(lhs);
}
template<int Index,class vobj> inline Lattice<vobj> transposeColour(const Lattice<vobj> &lhs){
return transposeIndex<ColourIndex>(lhs);
}
template<int Index,class vobj> inline vobj transposeSpin(const vobj &lhs){
return transposeIndex<SpinIndex>(lhs);
}
template<int Index,class vobj> inline vobj transposeColour(const vobj &lhs){
return transposeIndex<ColourIndex>(lhs);
}
//////////////////////////////////////////////
// transpose array and scalar
//////////////////////////////////////////////
template<int Index,class vobj> inline Lattice<vobj> transposeSpin(const Lattice<vobj> &lhs){
return transposeIndex<SpinIndex>(lhs);
}
template<int Index,class vobj> inline Lattice<vobj> transposeColour(const Lattice<vobj> &lhs){
return transposeIndex<ColourIndex>(lhs);
}
template<int Index,class vobj> inline vobj transposeSpin(const vobj &lhs){
return transposeIndex<SpinIndex>(lhs);
}
template<int Index,class vobj> inline vobj transposeColour(const vobj &lhs){
return transposeIndex<ColourIndex>(lhs);
}
//////////////////////////////////////////
// Trace lattice and non-lattice
//////////////////////////////////////////
template<int Index,class vobj>
inline auto traceSpin(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(lhs._odata[0]))>
{
return traceIndex<SpinIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceColour(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(lhs._odata[0]))>
{
return traceIndex<ColourIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceSpin(const vobj &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(lhs))>
{
return traceIndex<SpinIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceColour(const vobj &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(lhs))>
{
return traceIndex<ColourIndex>(lhs);
}
//////////////////////////////////////////
// Trace lattice and non-lattice
//////////////////////////////////////////
template<int Index,class vobj>
inline auto traceSpin(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(vobj()))>
{
return traceIndex<SpinIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceColour(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(vobj()))>
{
return traceIndex<ColourIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceSpin(const vobj &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(lhs))>
{
return traceIndex<SpinIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceColour(const vobj &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(lhs))>
{
return traceIndex<ColourIndex>(lhs);
}
//////////////////////////////////////////
// Current types
//////////////////////////////////////////
GRID_SERIALIZABLE_ENUM(Current, undef,
Vector, 0,
Axial, 1,
Tadpole, 2);
//////////////////////////////////////////
// Current types
//////////////////////////////////////////
GRID_SERIALIZABLE_ENUM(Current, undef,
Vector, 0,
Axial, 1,
Tadpole, 2);
} //namespace QCD
} // Grid
NAMESPACE_END(Grid);
#endif

View File

@ -37,14 +37,18 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
// Abstract base interface
////////////////////////////////////////////
#include <Grid/qcd/action/ActionCore.h>
NAMESPACE_CHECK(ActionCore);
////////////////////////////////////////////////////////////////////////
// Fermion actions; prevent coupling fermion.cc files to other headers
////////////////////////////////////////////////////////////////////////
#include <Grid/qcd/action/fermion/FermionCore.h>
NAMESPACE_CHECK(FermionCore);
#include <Grid/qcd/action/fermion/Fermion.h>
NAMESPACE_CHECK(Fermion);
////////////////////////////////////////
// Pseudo fermion combinations for HMC
////////////////////////////////////////
#include <Grid/qcd/action/pseudofermion/PseudoFermion.h>
NAMESPACE_CHECK(PseudoFermion);
#endif

View File

@ -27,19 +27,18 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef ACTION_BASE_H
#define ACTION_BASE_H
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template <class GaugeField >
class Action
{
public:
public:
bool is_smeared = false;
// Heatbath?
virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
@ -50,7 +49,6 @@ class Action
virtual ~Action(){}
};
}
}
NAMESPACE_END(Grid);
#endif // ACTION_BASE_H

View File

@ -31,29 +31,37 @@ directory
#define QCD_ACTION_CORE
#include <Grid/qcd/action/ActionBase.h>
NAMESPACE_CHECK(ActionBase);
#include <Grid/qcd/action/ActionSet.h>
NAMESPACE_CHECK(ActionSet);
#include <Grid/qcd/action/ActionParams.h>
NAMESPACE_CHECK(ActionParams);
////////////////////////////////////////////
// Gauge Actions
////////////////////////////////////////////
#include <Grid/qcd/action/gauge/Gauge.h>
NAMESPACE_CHECK(Gauge);
////////////////////////////////////////////
// Fermion prereqs
////////////////////////////////////////////
#include <Grid/qcd/action/fermion/FermionCore.h>
NAMESPACE_CHECK(ActionFermionCore);
////////////////////////////////////////////
// Scalar Actions
////////////////////////////////////////////
#include <Grid/qcd/action/scalar/Scalar.h>
NAMESPACE_CHECK(Scalar);
////////////////////////////////////////////
// Utility functions
////////////////////////////////////////////
#include <Grid/qcd/utils/Metric.h>
NAMESPACE_CHECK(Metric);
#include <Grid/qcd/utils/CovariantLaplacian.h>
NAMESPACE_CHECK(CovariantLaplacian);

View File

@ -27,37 +27,35 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef GRID_QCD_ACTION_PARAMS_H
#define GRID_QCD_ACTION_PARAMS_H
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
// These can move into a params header and be given MacroMagic serialisation
struct GparityWilsonImplParams {
bool overlapCommsCompute;
std::vector<int> twists;
GparityWilsonImplParams() : twists(Nd, 0), overlapCommsCompute(false){};
};
// These can move into a params header and be given MacroMagic serialisation
struct GparityWilsonImplParams {
Coordinate twists;
GparityWilsonImplParams() : twists(Nd, 0) {};
};
struct WilsonImplParams {
bool overlapCommsCompute;
std::vector<Real> twist_n_2pi_L;
std::vector<Complex> boundary_phases;
WilsonImplParams() : overlapCommsCompute(false) {
boundary_phases.resize(Nd, 1.0);
struct WilsonImplParams {
bool overlapCommsCompute;
AcceleratorVector<Real,Nd> twist_n_2pi_L;
AcceleratorVector<Complex,Nd> boundary_phases;
WilsonImplParams() {
boundary_phases.resize(Nd, 1.0);
twist_n_2pi_L.resize(Nd, 0.0);
};
WilsonImplParams(const std::vector<Complex> phi) : boundary_phases(phi), overlapCommsCompute(false) {
twist_n_2pi_L.resize(Nd, 0.0);
}
};
WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) {
twist_n_2pi_L.resize(Nd, 0.0);
}
};
struct StaggeredImplParams {
StaggeredImplParams() {};
};
struct StaggeredImplParams {
StaggeredImplParams() {};
};
struct OneFlavourRationalParams : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,
@ -69,10 +67,10 @@ namespace QCD {
int, precision,
int, BoundsCheckFreq);
// MaxIter and tolerance, vectors??
// MaxIter and tolerance, vectors??
// constructor
OneFlavourRationalParams( RealD _lo = 0.0,
// constructor
OneFlavourRationalParams( RealD _lo = 0.0,
RealD _hi = 1.0,
int _maxit = 1000,
RealD tol = 1.0e-8,
@ -88,11 +86,6 @@ namespace QCD {
BoundsCheckFreq(_BoundsCheckFreq){};
};
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -26,14 +26,11 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef ACTION_SET_H
#define ACTION_SET_H
namespace Grid {
// Should drop this namespace here
namespace QCD {
NAMESPACE_BEGIN(Grid);
//////////////////////////////////
// Indexing of tuple types
@ -62,7 +59,7 @@ struct Index<T, std::tuple<U, Types...>> {
template <class Field, class Repr = NoHirep >
struct ActionLevel {
public:
public:
unsigned int multiplier;
// Fundamental repr actions separated because of the smearing
@ -77,7 +74,7 @@ struct ActionLevel {
std::vector<ActPtr>& actions;
explicit ActionLevel(unsigned int mul = 1) :
actions(std::get<0>(actions_hirep)), multiplier(mul) {
actions(std::get<0>(actions_hirep)), multiplier(mul) {
// initialize the hirep vectors to zero.
// apply(this->resize, actions_hirep, 0); //need a working resize
assert(mul >= 1);
@ -87,7 +84,7 @@ struct ActionLevel {
void push_back(Action<GenField>* ptr) {
// insert only in the correct vector
std::get< Index < GenField, action_hirep_types>::value >(actions_hirep).push_back(ptr);
};
}
template <class ActPtr>
static void resize(ActPtr ap, unsigned int n) {
@ -110,7 +107,6 @@ struct ActionLevel {
template <class GaugeField, class R>
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
} // QCD
} // Grid
NAMESPACE_END(Grid);
#endif // ACTION_SET_H

View File

@ -26,75 +26,75 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef GRID_QCD_ABSTRACT_EOFA_FERMION_H
#define GRID_QCD_ABSTRACT_EOFA_FERMION_H
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
// DJM: Abstract base class for EOFA fermion types.
// Defines layout of additional EOFA-specific parameters and operators.
// Use to construct EOFA pseudofermion actions that are agnostic to
// Shamir / Mobius / etc., and ensure that no one can construct EOFA
// pseudofermion action with non-EOFA fermion type.
template<class Impl>
class AbstractEOFAFermion : public CayleyFermion5D<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
// DJM: Abstract base class for EOFA fermion types.
// Defines layout of additional EOFA-specific parameters and operators.
// Use to construct EOFA pseudofermion actions that are agnostic to
// Shamir / Mobius / etc., and ensure that no one can construct EOFA
// pseudofermion action with non-EOFA fermion type.
template<class Impl>
class AbstractEOFAFermion : public CayleyFermion5D<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm}
RealD mq1;
RealD mq2;
RealD mq3;
RealD shift;
int pm;
public:
// Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm}
RealD mq1;
RealD mq2;
RealD mq3;
RealD shift;
int pm;
RealD alpha; // Mobius scale
RealD k; // EOFA normalization constant
RealD alpha; // Mobius scale
RealD k; // EOFA normalization constant
virtual void Instantiatable(void) = 0;
virtual void Instantiatable(void) = 0;
// EOFA-specific operations
// Force user to implement in derived classes
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0;
virtual void Dtilde (const FermionField& in, FermionField& out) = 0;
virtual void DtildeInv(const FermionField& in, FermionField& out) = 0;
// EOFA-specific operations
// Force user to implement in derived classes
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0;
virtual void Dtilde (const FermionField& in, FermionField& out) = 0;
virtual void DtildeInv(const FermionField& in, FermionField& out) = 0;
// Implement derivatives in base class:
// for EOFA both DWF and Mobius just need d(Dw)/dU
virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDeriv(mat, U, V, dag);
};
virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDerivOE(mat, U, V, dag);
};
virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDerivEO(mat, U, V, dag);
};
// Recompute 5D coefficients for different value of shift constant
// (needed for heatbath loop over poles)
virtual void RefreshShiftCoefficients(RealD new_shift) = 0;
// Constructors
AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm,
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams())
: CayleyFermion5D<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid,
_mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm)
{
int Ls = this->Ls;
this->alpha = _b + _c;
this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
};
// Implement derivatives in base class:
// for EOFA both DWF and Mobius just need d(Dw)/dU
virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDeriv(mat, U, V, dag);
};
}}
virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDerivOE(mat, U, V, dag);
};
virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDerivEO(mat, U, V, dag);
};
// Recompute 5D coefficients for different value of shift constant
// (needed for heatbath loop over poles)
virtual void RefreshShiftCoefficients(RealD new_shift) = 0;
// Constructors
AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm,
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams())
: CayleyFermion5D<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid,
_mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm)
{
int Ls = this->Ls;
this->alpha = _b + _c;
this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
};
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,203 +24,146 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_CAYLEY_FERMION_H
#define GRID_QCD_CAYLEY_FERMION_H
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class CayleyFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<typename T> struct switcheroo {
static inline int iscomplex() { return 0; }
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
template<class vec>
static inline vec mult(vec a, vec b) {
return real_mult(a,b);
}
};
template<> struct switcheroo<ComplexD> {
static inline int iscomplex() { return 1; }
// half checkerboard operations
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
virtual void Meo5D (const FermionField &psi, FermionField &chi);
template<class vec>
static inline vec mult(vec a, vec b) {
return a*b;
}
};
template<> struct switcheroo<ComplexF> {
static inline int iscomplex() { return 1; }
template<class vec>
static inline vec mult(vec a, vec b) {
return a*b;
}
};
virtual void M5D (const FermionField &psi, FermionField &chi);
virtual void M5Ddag(const FermionField &psi, FermionField &chi);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
virtual void Dminus(const FermionField &psi, FermionField &chi);
virtual void DminusDag(const FermionField &psi, FermionField &chi);
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d);
virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d);
virtual void ImportUnphysicalFermion(const FermionField &solution5d, FermionField &exported4d);
template<class Impl>
class CayleyFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
///////////////////////////////////////////////////////////////
// Support for MADWF tricks
///////////////////////////////////////////////////////////////
RealD Mass(void) { return mass; };
void SetMass(RealD _mass) {
mass=_mass;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
} ;
void P(const FermionField &psi, FermionField &chi);
void Pdag(const FermionField &psi, FermionField &chi);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper);
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
void M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper);
// half checkerboard operations
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
virtual void Meo5D (const FermionField &psi, FermionField &chi);
virtual void Instantiatable(void)=0;
virtual void M5D (const FermionField &psi, FermionField &chi);
virtual void M5Ddag(const FermionField &psi, FermionField &chi);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
virtual void Dminus(const FermionField &psi, FermionField &chi);
virtual void DminusDag(const FermionField &psi, FermionField &chi);
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d);
virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d);
virtual void ImportUnphysicalFermion(const FermionField &solution5d, FermionField &exported4d);
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
///////////////////////////////////////////////////////////////
// Support for MADWF tricks
///////////////////////////////////////////////////////////////
RealD Mass(void) { return mass; };
void SetMass(RealD _mass) {
mass=_mass;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
} ;
void P(const FermionField &psi, FermionField &chi);
void Pdag(const FermionField &psi, FermionField &chi);
void Meooe5D (const FermionField &in, FermionField &out);
void MeooeDag5D (const FermionField &in, FermionField &out);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper);
// protected:
RealD mass;
void M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper);
// Save arguments to SetCoefficientsInternal
Vector<Coeff_t> _gamma;
RealD _zolo_hi;
RealD _b;
RealD _c;
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd> > & Matp, Vector<iSinglet<Simd> > & Matm);
// Cayley form Moebius (tanh and zolotarev)
Vector<Coeff_t> omega;
Vector<Coeff_t> bs; // S dependent coeffs
Vector<Coeff_t> cs;
Vector<Coeff_t> as;
// For preconditioning Cayley form
Vector<Coeff_t> bee;
Vector<Coeff_t> cee;
Vector<Coeff_t> aee;
Vector<Coeff_t> beo;
Vector<Coeff_t> ceo;
Vector<Coeff_t> aeo;
// LDU factorisation of the eeoo matrix
Vector<Coeff_t> lee;
Vector<Coeff_t> leem;
Vector<Coeff_t> uee;
Vector<Coeff_t> ueem;
Vector<Coeff_t> dee;
void MooeeInternalAsm(const FermionField &in, FermionField &out,
int LLs, int site,
Vector<iSinglet<Simd> > &Matp,
Vector<iSinglet<Simd> > &Matm);
void MooeeInternalZAsm(const FermionField &in, FermionField &out,
int LLs, int site,
Vector<iSinglet<Simd> > &Matp,
Vector<iSinglet<Simd> > &Matm);
// Matrices of 5d ee inverse params
Vector<iSinglet<Simd> > MatpInv;
Vector<iSinglet<Simd> > MatmInv;
Vector<iSinglet<Simd> > MatpInvDag;
Vector<iSinglet<Simd> > MatmInvDag;
// Constructors
CayleyFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
virtual void Instantiatable(void)=0;
void CayleyReport(void);
void CayleyZeroCounters(void);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
double M5Dflops;
double M5Dcalls;
double M5Dtime;
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
double MooeeInvFlops;
double MooeeInvCalls;
double MooeeInvTime;
void Meooe5D (const FermionField &in, FermionField &out);
void MeooeDag5D (const FermionField &in, FermionField &out);
protected:
virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
virtual void SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t> & gamma,RealD b,RealD c);
};
// protected:
RealD mass;
NAMESPACE_END(Grid);
// Save arguments to SetCoefficientsInternal
std::vector<Coeff_t> _gamma;
RealD _zolo_hi;
RealD _b;
RealD _c;
// Cayley form Moebius (tanh and zolotarev)
std::vector<Coeff_t> omega;
std::vector<Coeff_t> bs; // S dependent coeffs
std::vector<Coeff_t> cs;
std::vector<Coeff_t> as;
// For preconditioning Cayley form
std::vector<Coeff_t> bee;
std::vector<Coeff_t> cee;
std::vector<Coeff_t> aee;
std::vector<Coeff_t> beo;
std::vector<Coeff_t> ceo;
std::vector<Coeff_t> aeo;
// LDU factorisation of the eeoo matrix
std::vector<Coeff_t> lee;
std::vector<Coeff_t> leem;
std::vector<Coeff_t> uee;
std::vector<Coeff_t> ueem;
std::vector<Coeff_t> dee;
// Matrices of 5d ee inverse params
Vector<iSinglet<Simd> > MatpInv;
Vector<iSinglet<Simd> > MatmInv;
Vector<iSinglet<Simd> > MatpInvDag;
Vector<iSinglet<Simd> > MatmInvDag;
// Constructors
CayleyFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
void CayleyReport(void);
void CayleyZeroCounters(void);
double M5Dflops;
double M5Dcalls;
double M5Dtime;
double MooeeInvFlops;
double MooeeInvCalls;
double MooeeInvTime;
protected:
virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
virtual void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
};
}
}
#define INSTANTIATE_DPERP(A)\
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);
#undef CAYLEY_DPERP_DENSE
#define CAYLEY_DPERP_CACHE
#undef CAYLEY_DPERP_LINALG
#define CAYLEY_DPERP_VEC
#endif

View File

@ -1,249 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards..
template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls =this->Ls;
GridBase *grid=psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
M5Dcalls++;
M5Dtime-=usecond();
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
for(int s=0;s<Ls;s++){
auto tmp = psi._odata[0];
if ( s==0 ) {
spProj5m(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi._odata[ss+Ls-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else if ( s==(Ls-1)) {
spProj5m(tmp,psi._odata[ss+0]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else {
spProj5m(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
}
}
}
M5Dtime+=usecond();
}
template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls =this->Ls;
GridBase *grid=psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
M5Dcalls++;
M5Dtime-=usecond();
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
for(int s=0;s<Ls;s++){
if ( s==0 ) {
spProj5p(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5m(tmp,psi._odata[ss+Ls-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else if ( s==(Ls-1)) {
spProj5p(tmp,psi._odata[ss+0]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5m(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else {
spProj5p(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5m(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
}
}
}
M5Dtime+=usecond();
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
GridBase *grid=psi._grid;
int Ls=this->Ls;
chi.checkerboard=psi.checkerboard;
MooeeInvCalls++;
MooeeInvTime-=usecond();
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
// Apply (L^{\prime})^{-1}
chi[ss]=psi[ss]; // chi[0]=psi[0]
for(int s=1;s<Ls;s++){
spProj5p(tmp,chi[ss+s-1]);
chi[ss+s] = psi[ss+s]-lee[s-1]*tmp;
}
// L_m^{-1}
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp,chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - leem[s]*tmp;
}
// U_m^{-1} D^{-1}
for (int s=0;s<Ls-1;s++){
// Chi[s] + 1/d chi[s]
spProj5p(tmp,chi[ss+Ls-1]);
chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(ueem[s]/dee[Ls-1])*tmp;
}
chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1];
// Apply U^{-1}
for (int s=Ls-2;s>=0;s--){
spProj5m(tmp,chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - uee[s]*tmp;
}
}
MooeeInvTime+=usecond();
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
GridBase *grid=psi._grid;
int Ls=this->Ls;
assert(psi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
std::vector<Coeff_t> ueec(Ls);
std::vector<Coeff_t> deec(Ls);
std::vector<Coeff_t> leec(Ls);
std::vector<Coeff_t> ueemc(Ls);
std::vector<Coeff_t> leemc(Ls);
for(int s=0;s<ueec.size();s++){
ueec[s] = conjugate(uee[s]);
deec[s] = conjugate(dee[s]);
leec[s] = conjugate(lee[s]);
ueemc[s]= conjugate(ueem[s]);
leemc[s]= conjugate(leem[s]);
}
MooeeInvCalls++;
MooeeInvTime-=usecond();
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
// Apply (U^{\prime})^{-dagger}
chi[ss]=psi[ss];
for (int s=1;s<Ls;s++){
spProj5m(tmp,chi[ss+s-1]);
chi[ss+s] = psi[ss+s]-ueec[s-1]*tmp;
}
// U_m^{-\dagger}
for (int s=0;s<Ls-1;s++){
spProj5p(tmp,chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - ueemc[s]*tmp;
}
// L_m^{-\dagger} D^{-dagger}
for (int s=0;s<Ls-1;s++){
spProj5m(tmp,chi[ss+Ls-1]);
chi[ss+s] = (1.0/deec[s])*chi[ss+s]-(leemc[s]/deec[Ls-1])*tmp;
}
chi[ss+Ls-1]= (1.0/deec[Ls-1])*chi[ss+Ls-1];
// Apply L^{-dagger}
for (int s=Ls-2;s>=0;s--){
spProj5p(tmp,chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - leec[s]*tmp;
}
}
MooeeInvTime+=usecond();
}
#ifdef CAYLEY_DPERP_CACHE
INSTANTIATE_DPERP(WilsonImplF);
INSTANTIATE_DPERP(WilsonImplD);
INSTANTIATE_DPERP(GparityWilsonImplF);
INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
#endif
}}

View File

@ -1,828 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
{
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd= Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs==nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
scalar_type * l_p = (scalar_type *)&l[0];
scalar_type * d_p = (scalar_type *)&d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o+i*LLs;
int ss = o*nsimd+i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
M5Dcalls++;
M5Dtime-=usecond();
assert(Nc==3);
parallel_for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0;v<LLs;v++){
int vp=(v+1)%LLs;
int vm=(v+LLs-1)%LLs;
spProj5m(hp,psi[ss+vp]);
spProj5p(hm,psi[ss+vm]);
if ( vp<=v ) rotate(hp,hp,1);
if ( vm>=v ) rotate(hm,hm,nsimd-1);
hp=0.5*hp;
hm=0.5*hm;
spRecon5m(fp,hp);
spRecon5p(fm,hm);
chi[ss+v] = d[v]*phi[ss+v];
chi[ss+v] = chi[ss+v] +u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0;v<LLs;v++){
vprefetch(psi[ss+v+LLs]);
int vp= (v==LLs-1) ? 0 : v+1;
int vm= (v==0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if ( vp<=v ) {
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if ( vm>=v ) {
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_12);
vstream(chi[ss+v]()(0)(0),p_00);
vstream(chi[ss+v]()(0)(1),p_01);
vstream(chi[ss+v]()(0)(2),p_02);
vstream(chi[ss+v]()(1)(0),p_10);
vstream(chi[ss+v]()(1)(1),p_11);
vstream(chi[ss+v]()(1)(2),p_12);
vstream(chi[ss+v]()(2)(0),p_20);
vstream(chi[ss+v]()(2)(1),p_21);
vstream(chi[ss+v]()(2)(2),p_22);
vstream(chi[ss+v]()(3)(0),p_30);
vstream(chi[ss+v]()(3)(1),p_31);
vstream(chi[ss+v]()(3)(2),p_32);
}
#endif
}
M5Dtime+=usecond();
}
template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd= Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs==nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
scalar_type * l_p = (scalar_type *)&l[0];
scalar_type * d_p = (scalar_type *)&d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o+i*LLs;
int ss = o*nsimd+i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
M5Dcalls++;
M5Dtime-=usecond();
parallel_for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0;v<LLs;v++){
int vp=(v+1)%LLs;
int vm=(v+LLs-1)%LLs;
spProj5p(hp,psi[ss+vp]);
spProj5m(hm,psi[ss+vm]);
if ( vp<=v ) rotate(hp,hp,1);
if ( vm>=v ) rotate(hm,hm,nsimd-1);
hp=hp*0.5;
hm=hm*0.5;
spRecon5p(fp,hp);
spRecon5m(fm,hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0;v<LLs;v++){
vprefetch(psi[ss+v+LLs]);
int vp= (v==LLs-1) ? 0 : v+1;
int vm= (v==0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if ( vp<=v ) {
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if ( vm>=v ) {
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_12);
vstream(chi[ss+v]()(0)(0),p_00);
vstream(chi[ss+v]()(0)(1),p_01);
vstream(chi[ss+v]()(0)(2),p_02);
vstream(chi[ss+v]()(1)(0),p_10);
vstream(chi[ss+v]()(1)(1),p_11);
vstream(chi[ss+v]()(1)(2),p_12);
vstream(chi[ss+v]()(2)(0),p_20);
vstream(chi[ss+v]()(2)(1),p_21);
vstream(chi[ss+v]()(2)(2),p_22);
vstream(chi[ss+v]()(3)(0),p_30);
vstream(chi[ss+v]()(3)(1),p_31);
vstream(chi[ss+v]()(3)(2),p_32);
}
#endif
}
M5Dtime+=usecond();
}
#ifdef AVX512
#include <simd/Intel512common.h>
#include <simd/Intel512avx.h>
#include <simd/Intel512single.h>
#endif
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionField &chi,
int LLs, int site,
Vector<iSinglet<Simd> > &Matp,
Vector<iSinglet<Simd> > &Matm)
{
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
int s=s2+l*LLs;
int lex=s2+LLs*site;
if ( s2==0 && l==0) {
SiteChiP=zero;
SiteChiM=zero;
}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastP()(sp )(co),psi[lex]()(sp)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastM()(sp )(co),psi[lex]()(sp+2)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
SiteChiP()(sp)(co)=real_madd(Matp[LLs*s+s1]()()(),BcastP()(sp)(co),SiteChiP()(sp)(co)); // 1100 us.
SiteChiM()(sp)(co)=real_madd(Matm[LLs*s+s1]()()(),BcastM()(sp)(co),SiteChiM()(sp)(co)); // each found by commenting out
}}
}}
{
int lex = s1+LLs*site;
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %%zmm1
#define Chi_01 %%zmm2
#define Chi_02 %%zmm3
#define Chi_10 %%zmm4
#define Chi_11 %%zmm5
#define Chi_12 %%zmm6
#define Chi_20 %%zmm7
#define Chi_21 %%zmm8
#define Chi_22 %%zmm9
#define Chi_30 %%zmm10
#define Chi_31 %%zmm11
#define Chi_32 %%zmm12
#define BCAST0 %%zmm13
#define BCAST1 %%zmm14
#define BCAST2 %%zmm15
#define BCAST3 %%zmm16
#define BCAST4 %%zmm17
#define BCAST5 %%zmm18
#define BCAST6 %%zmm19
#define BCAST7 %%zmm20
#define BCAST8 %%zmm21
#define BCAST9 %%zmm22
#define BCAST10 %%zmm23
#define BCAST11 %%zmm24
int incr=LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
int lex=s2+LLs*site;
uint64_t a0 = (uint64_t)&Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t)&Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t)&psi[lex];
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
if ( (s2+l)==0 ) {
asm (
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
VBCASTCDUP(0,%2,BCAST0)
VBCASTCDUP(1,%2,BCAST1)
VBCASTCDUP(2,%2,BCAST2)
VBCASTCDUP(3,%2,BCAST3)
VBCASTCDUP(4,%2,BCAST4) VMULMEM (0,%0,BCAST0,Chi_00)
VBCASTCDUP(5,%2,BCAST5) VMULMEM (0,%0,BCAST1,Chi_01)
VBCASTCDUP(6,%2,BCAST6) VMULMEM (0,%0,BCAST2,Chi_02)
VBCASTCDUP(7,%2,BCAST7) VMULMEM (0,%0,BCAST3,Chi_10)
VBCASTCDUP(8,%2,BCAST8) VMULMEM (0,%0,BCAST4,Chi_11)
VBCASTCDUP(9,%2,BCAST9) VMULMEM (0,%0,BCAST5,Chi_12)
VBCASTCDUP(10,%2,BCAST10) VMULMEM (0,%1,BCAST6,Chi_20)
VBCASTCDUP(11,%2,BCAST11) VMULMEM (0,%1,BCAST7,Chi_21)
VMULMEM (0,%1,BCAST8,Chi_22)
VMULMEM (0,%1,BCAST9,Chi_30)
VMULMEM (0,%1,BCAST10,Chi_31)
VMULMEM (0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
} else {
asm (
VBCASTCDUP(0,%2,BCAST0) VMADDMEM (0,%0,BCAST0,Chi_00)
VBCASTCDUP(1,%2,BCAST1) VMADDMEM (0,%0,BCAST1,Chi_01)
VBCASTCDUP(2,%2,BCAST2) VMADDMEM (0,%0,BCAST2,Chi_02)
VBCASTCDUP(3,%2,BCAST3) VMADDMEM (0,%0,BCAST3,Chi_10)
VBCASTCDUP(4,%2,BCAST4) VMADDMEM (0,%0,BCAST4,Chi_11)
VBCASTCDUP(5,%2,BCAST5) VMADDMEM (0,%0,BCAST5,Chi_12)
VBCASTCDUP(6,%2,BCAST6) VMADDMEM (0,%1,BCAST6,Chi_20)
VBCASTCDUP(7,%2,BCAST7) VMADDMEM (0,%1,BCAST7,Chi_21)
VBCASTCDUP(8,%2,BCAST8) VMADDMEM (0,%1,BCAST8,Chi_22)
VBCASTCDUP(9,%2,BCAST9) VMADDMEM (0,%1,BCAST9,Chi_30)
VBCASTCDUP(10,%2,BCAST10) VMADDMEM (0,%1,BCAST10,Chi_31)
VBCASTCDUP(11,%2,BCAST11) VMADDMEM (0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
}
a0 = a0+incr;
a1 = a1+incr;
a2 = a2+sizeof(typename Simd::scalar_type);
}}
{
int lexa = s1+LLs*site;
asm (
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
// Z-mobius version
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionField &chi,
int LLs, int site, Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matm)
{
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
int s=s2+l*LLs;
int lex=s2+LLs*site;
if ( s2==0 && l==0) {
SiteChiP=zero;
SiteChiM=zero;
}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastP()(sp )(co),psi[lex]()(sp)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastM()(sp )(co),psi[lex]()(sp+2)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
SiteChiP()(sp)(co)=SiteChiP()(sp)(co)+ Matp[LLs*s+s1]()()()*BcastP()(sp)(co);
SiteChiM()(sp)(co)=SiteChiM()(sp)(co)+ Matm[LLs*s+s1]()()()*BcastM()(sp)(co);
}}
}}
{
int lex = s1+LLs*site;
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %zmm0
#define Chi_01 %zmm1
#define Chi_02 %zmm2
#define Chi_10 %zmm3
#define Chi_11 %zmm4
#define Chi_12 %zmm5
#define Chi_20 %zmm6
#define Chi_21 %zmm7
#define Chi_22 %zmm8
#define Chi_30 %zmm9
#define Chi_31 %zmm10
#define Chi_32 %zmm11
#define pChi_00 %%zmm0
#define pChi_01 %%zmm1
#define pChi_02 %%zmm2
#define pChi_10 %%zmm3
#define pChi_11 %%zmm4
#define pChi_12 %%zmm5
#define pChi_20 %%zmm6
#define pChi_21 %%zmm7
#define pChi_22 %%zmm8
#define pChi_30 %%zmm9
#define pChi_31 %%zmm10
#define pChi_32 %%zmm11
#define BCAST_00 %zmm12
#define SHUF_00 %zmm13
#define BCAST_01 %zmm14
#define SHUF_01 %zmm15
#define BCAST_02 %zmm16
#define SHUF_02 %zmm17
#define BCAST_10 %zmm18
#define SHUF_10 %zmm19
#define BCAST_11 %zmm20
#define SHUF_11 %zmm21
#define BCAST_12 %zmm22
#define SHUF_12 %zmm23
#define Mp %zmm24
#define Mps %zmm25
#define Mm %zmm26
#define Mms %zmm27
#define N 8
int incr=LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
int lex=s2+LLs*site;
uint64_t a0 = (uint64_t)&Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t)&Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t)&psi[lex];
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
if ( (s2+l)==0 ) {
LOAD64(%r8,a0);
LOAD64(%r9,a1);
LOAD64(%r10,a2);
asm (
VLOAD(0,%r8,Mp)// i r
VLOAD(0,%r9,Mm)
VSHUF(Mp,Mps) // r i
VSHUF(Mm,Mms)
VPREFETCH1(12,%r10) VPREFETCH1(13,%r10)
VPREFETCH1(14,%r10) VPREFETCH1(15,%r10)
VMULIDUP(0*N,%r10,Mps,Chi_00)
VMULIDUP(1*N,%r10,Mps,Chi_01)
VMULIDUP(2*N,%r10,Mps,Chi_02)
VMULIDUP(3*N,%r10,Mps,Chi_10)
VMULIDUP(4*N,%r10,Mps,Chi_11)
VMULIDUP(5*N,%r10,Mps,Chi_12)
VMULIDUP(6*N ,%r10,Mms,Chi_20)
VMULIDUP(7*N ,%r10,Mms,Chi_21)
VMULIDUP(8*N ,%r10,Mms,Chi_22)
VMULIDUP(9*N ,%r10,Mms,Chi_30)
VMULIDUP(10*N,%r10,Mms,Chi_31)
VMULIDUP(11*N,%r10,Mms,Chi_32)
VMADDSUBRDUP(0*N,%r10,Mp,Chi_00)
VMADDSUBRDUP(1*N,%r10,Mp,Chi_01)
VMADDSUBRDUP(2*N,%r10,Mp,Chi_02)
VMADDSUBRDUP(3*N,%r10,Mp,Chi_10)
VMADDSUBRDUP(4*N,%r10,Mp,Chi_11)
VMADDSUBRDUP(5*N,%r10,Mp,Chi_12)
VMADDSUBRDUP(6*N ,%r10,Mm,Chi_20)
VMADDSUBRDUP(7*N ,%r10,Mm,Chi_21)
VMADDSUBRDUP(8*N ,%r10,Mm,Chi_22)
VMADDSUBRDUP(9*N ,%r10,Mm,Chi_30)
VMADDSUBRDUP(10*N,%r10,Mm,Chi_31)
VMADDSUBRDUP(11*N,%r10,Mm,Chi_32)
);
} else {
LOAD64(%r8,a0);
LOAD64(%r9,a1);
LOAD64(%r10,a2);
asm (
VLOAD(0,%r8,Mp)
VSHUF(Mp,Mps)
VLOAD(0,%r9,Mm)
VSHUF(Mm,Mms)
VMADDSUBIDUP(0*N,%r10,Mps,Chi_00) // Mri * Pii +- Cir
VMADDSUBIDUP(1*N,%r10,Mps,Chi_01)
VMADDSUBIDUP(2*N,%r10,Mps,Chi_02)
VMADDSUBIDUP(3*N,%r10,Mps,Chi_10)
VMADDSUBIDUP(4*N,%r10,Mps,Chi_11)
VMADDSUBIDUP(5*N,%r10,Mps,Chi_12)
VMADDSUBIDUP(6 *N,%r10,Mms,Chi_20)
VMADDSUBIDUP(7 *N,%r10,Mms,Chi_21)
VMADDSUBIDUP(8 *N,%r10,Mms,Chi_22)
VMADDSUBIDUP(9 *N,%r10,Mms,Chi_30)
VMADDSUBIDUP(10*N,%r10,Mms,Chi_31)
VMADDSUBIDUP(11*N,%r10,Mms,Chi_32)
VMADDSUBRDUP(0*N,%r10,Mp,Chi_00) // Cir = Mir * Prr +- ( Mri * Pii +- Cir)
VMADDSUBRDUP(1*N,%r10,Mp,Chi_01) // Ci = MiPr + Ci + MrPi ; Cr = MrPr - ( MiPi - Cr)
VMADDSUBRDUP(2*N,%r10,Mp,Chi_02)
VMADDSUBRDUP(3*N,%r10,Mp,Chi_10)
VMADDSUBRDUP(4*N,%r10,Mp,Chi_11)
VMADDSUBRDUP(5*N,%r10,Mp,Chi_12)
VMADDSUBRDUP(6 *N,%r10,Mm,Chi_20)
VMADDSUBRDUP(7 *N,%r10,Mm,Chi_21)
VMADDSUBRDUP(8 *N,%r10,Mm,Chi_22)
VMADDSUBRDUP(9 *N,%r10,Mm,Chi_30)
VMADDSUBRDUP(10*N,%r10,Mm,Chi_31)
VMADDSUBRDUP(11*N,%r10,Mm,Chi_32)
);
}
a0 = a0+incr;
a1 = a1+incr;
a2 = a2+sizeof(typename Simd::scalar_type);
}}
{
int lexa = s1+LLs*site;
/*
SiteSpinor tmp;
asm (
VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02)
VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12)
VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22)
VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32)
: : "r" ((uint64_t)&tmp) : "memory" );
*/
asm (
VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02)
VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12)
VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22)
VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
// if ( 1 || (site==0) ) {
// std::cout<<site << " s1 "<<s1<<"\n\t"<<tmp << "\n't" << chi[lexa] <<"\n\t"<<tmp-chi[lexa]<<std::endl;
// }
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{
int Ls=this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard=psi.checkerboard;
Vector<iSinglet<Simd> > Matp;
Vector<iSinglet<Simd> > Matm;
Vector<iSinglet<Simd> > *_Matp;
Vector<iSinglet<Simd> > *_Matm;
// MooeeInternalCompute(dag,inv,Matp,Matm);
if ( inv && dag ) {
_Matp = &MatpInvDag;
_Matm = &MatmInvDag;
}
if ( inv && (!dag) ) {
_Matp = &MatpInv;
_Matm = &MatmInv;
}
if ( !inv ) {
MooeeInternalCompute(dag,inv,Matp,Matm);
_Matp = &Matp;
_Matm = &Matm;
}
assert(_Matp->size()==Ls*LLs);
MooeeInvCalls++;
MooeeInvTime-=usecond();
if ( switcheroo<Coeff_t>::iscomplex() ) {
parallel_for(auto site=0;site<vol;site++){
MooeeInternalZAsm(psi,chi,LLs,site,*_Matp,*_Matm);
}
} else {
parallel_for(auto site=0;site<vol;site++){
MooeeInternalAsm(psi,chi,LLs,site,*_Matp,*_Matm);
}
}
MooeeInvTime+=usecond();
}
INSTANTIATE_DPERP(DomainWallVec5dImplD);
INSTANTIATE_DPERP(DomainWallVec5dImplF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
INSTANTIATE_DPERP(DomainWallVec5dImplDF);
INSTANTIATE_DPERP(DomainWallVec5dImplFH);
INSTANTIATE_DPERP(ZDomainWallVec5dImplDF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplFH);
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
}}

View File

@ -1,323 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>
namespace Grid {
namespace QCD {
template<class Impl>
void ContinuedFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale)
{
SetCoefficientsZolotarev(1.0/scale,zdata);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
{
// How to check Ls matches??
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
int Ls = this->Ls;
assert(zdata->db==Ls);// Beta has Ls coeffs
R=(1+this->mass)/(1-this->mass);
Beta.resize(Ls);
cc.resize(Ls);
cc_d.resize(Ls);
sqrt_cc.resize(Ls);
for(int i=0; i < Ls ; i++){
Beta[i] = zdata -> beta[i];
cc[i] = 1.0/Beta[i];
cc_d[i]=sqrt(cc[i]);
}
cc_d[Ls-1]=1.0;
for(int i=0; i < Ls-1 ; i++){
sqrt_cc[i]= sqrt(cc[i]*cc[i+1]);
}
sqrt_cc[Ls-2]=sqrt(cc[Ls-2]);
ZoloHiInv =1.0/zolo_hi;
dw_diag = (4.0-this->M5)*ZoloHiInv;
See.resize(Ls);
Aee.resize(Ls);
int sign=1;
for(int s=0;s<Ls;s++){
Aee[s] = sign * Beta[s] * dw_diag;
sign = - sign;
}
Aee[Ls-1] += R;
See[0] = Aee[0];
for(int s=1;s<Ls;s++){
See[s] = Aee[s] - 1.0/See[s-1];
}
for(int s=0;s<Ls;s++){
std::cout<<GridLogMessage <<"s = "<<s<<" Beta "<<Beta[s]<<" Aee "<<Aee[s] <<" See "<<See[s] <<std::endl;
}
}
template<class Impl>
RealD ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
FermionField D(psi._grid);
this->DW(psi,D,DaggerNo);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*ZoloHiInv,D,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
} else if ( s==(Ls-1) ){
RealD R=(1.0+mass)/(1.0-mass);
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,D,sqrt_cc[s-1],psi,s,s-1);
ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,D,sqrt_cc[s],psi,s,s+1);
axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1);
}
sign=-sign;
}
return norm2(chi);
}
template<class Impl>
RealD ContinuedFractionFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
{
// This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
// The rest of matrix is symmetric.
// Can ignore "dag"
return M(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
int Ls = this->Ls;
this->DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,chi,0.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,chi,0.0,chi,s,s);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
this->DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
} else {
this->DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
}
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,chi,0.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,chi,0.0,chi,s,s);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
{
this->Meooe(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*dw_diag,psi,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
} else if ( s==(Ls-1) ){
// Drop the CC here.
double R=(1+mass)/(1-mass);
ag5xpby_ssp(chi,Beta[s]*dw_diag,psi,sqrt_cc[s-1],psi,s,s-1);
ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*dw_diag,psi,sqrt_cc[s],psi,s,s+1);
axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
this->Mooee(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
// Apply Linv
axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0);
for(int s=1;s<Ls;s++){
axpbg5y_ssp(chi,1.0/cc_d[s],psi,-1.0/See[s-1],chi,s,s-1);
}
// Apply Dinv
for(int s=0;s<Ls;s++){
ag5xpby_ssp(chi,1.0/See[s],chi,0.0,chi,s,s); //only appearance of See[0]
}
// Apply Uinv = (Linv)^T
axpby_ssp(chi,1.0/cc_d[Ls-1],chi,0.0,chi,Ls-1,Ls-1);
for(int s=Ls-2;s>=0;s--){
axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1);
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
this->MooeeInv(psi,chi);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDeriv(mat,D,V,DaggerNo);
};
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDerivOE(mat,D,V,DaggerNo);
};
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDerivEO(mat,D,V,DaggerNo);
};
// Constructors
template<class Impl>
ContinuedFractionFermion5D<Impl>::ContinuedFractionFermion5D(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5,p),
mass(_mass)
{
int Ls = this->Ls;
assert((Ls&0x1)==1); // Odd Ls required
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
conformable(solution5d._grid,this->FermionGrid());
conformable(exported4d._grid,this->GaugeGrid());
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
int Ls = this->Ls;
conformable(imported5d._grid,this->FermionGrid());
conformable(input4d._grid ,this->GaugeGrid());
FermionField tmp(this->FermionGrid());
tmp=zero;
InsertSlice(input4d, tmp, Ls-1, Ls-1);
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
this->Dminus(tmp,imported5d);
}
FermOpTemplateInstantiate(ContinuedFractionFermion5D);
}
}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,46 +24,44 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_CONTINUED_FRACTION_H
#define GRID_QCD_CONTINUED_FRACTION_H
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class ContinuedFractionFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class ContinuedFractionFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// virtual void Instantiatable(void)=0;
virtual void Instantiatable(void) =0;
// virtual void Instantiatable(void)=0;
virtual void Instantiatable(void) =0;
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
@ -73,35 +71,34 @@ namespace Grid {
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
// Constructors
ContinuedFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
// Constructors
ContinuedFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
protected:
protected:
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);;
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);;
// Cont frac
RealD dw_diag;
RealD mass;
RealD R;
RealD ZoloHiInv;
std::vector<double> Beta;
std::vector<double> cc;;
std::vector<double> cc_d;;
std::vector<double> sqrt_cc;
std::vector<double> See;
std::vector<double> Aee;
// Cont frac
RealD dw_diag;
RealD mass;
RealD R;
RealD ZoloHiInv;
Vector<double> Beta;
Vector<double> cc;;
Vector<double> cc_d;;
Vector<double> sqrt_cc;
Vector<double> See;
Vector<double> Aee;
};
};
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -1,438 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
template<class Impl>
DomainWallEOFAFermion<Impl>::DomainWallEOFAFermion(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3,
RealD _shift, int _pm, RealD _M5, const ImplParams &p) :
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
_shift, _pm, _M5, 1.0, 0.0, p)
{
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);
assert(zdata->n == this->Ls);
std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl;
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
Approx::zolotarev_free(zdata);
}
/***************************************************************
* Additional EOFA operators only called outside the inverter.
* Since speed is not essential, simple axpby-style
* implementations should be fine.
***************************************************************/
template<class Impl>
void DomainWallEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
{
int Ls = this->Ls;
Din = zero;
if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); }
else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); }
else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
}
// This is just the identity for DWF
template<class Impl>
void DomainWallEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; }
// This is just the identity for DWF
template<class Impl>
void DomainWallEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; }
/*****************************************************************************************************/
template<class Impl>
RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->Meooe5D(psi, Din);
this->DW(Din, chi, DaggerNo);
axpby(chi, 1.0, 1.0, chi, psi);
this->M5D(psi, chi);
return(norm2(chi));
}
template<class Impl>
RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->DW(psi, Din, DaggerYes);
this->MeooeDag5D(Din, chi);
this->M5Ddag(psi, chi);
axpby(chi, 1.0, 1.0, chi, psi);
return(norm2(chi));
}
/********************************************************************
* Performance critical fermion operators called inside the inverter
********************************************************************/
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int pm = this->pm;
RealD shift = this->shift;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
Coeff_t shiftp(0.0), shiftm(0.0);
if(shift != 0.0){
if(pm == 1){ shiftp = shift*(mq3-mq2); }
else{ shiftm = -shift*(mq3-mq2); }
}
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftp;
#if(0)
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl;
for(int i=0; i<diag.size(); ++i){
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
}
for(int i=0; i<upper.size(); ++i){
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
}
for(int i=0; i<lower.size(); ++i){
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
}
#endif
this->M5D(psi, chi, chi, lower, diag, upper);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int pm = this->pm;
RealD shift = this->shift;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
Coeff_t shiftp(0.0), shiftm(0.0);
if(shift != 0.0){
if(pm == 1){ shiftp = shift*(mq3-mq2); }
else{ shiftm = -shift*(mq3-mq2); }
}
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftm;
#if(0)
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5Ddag(FF&,FF&):" << std::endl;
for(int i=0; i<diag.size(); ++i){
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
}
for(int i=0; i<upper.size(); ++i){
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
}
for(int i=0; i<lower.size(); ++i){
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
}
#endif
this->M5Ddag(psi, chi, chi, lower, diag, upper);
}
// half checkerboard operations
template<class Impl>
void DomainWallEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] = this->dm;
lower[0] = this->dp;
this->M5D(psi, psi, chi, lower, diag, upper);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] = this->dp;
lower[0] = this->dm;
this->M5Ddag(psi, psi, chi, lower, diag, upper);
}
/****************************************************************************************/
//Zolo
template<class Impl>
void DomainWallEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c)
{
int Ls = this->Ls;
int pm = this->pm;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
RealD shift = this->shift;
////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form
////////////////////////////////////////////////////////
this->bs.resize(Ls);
this->cs.resize(Ls);
this->aee.resize(Ls);
this->aeo.resize(Ls);
this->bee.resize(Ls);
this->beo.resize(Ls);
this->cee.resize(Ls);
this->ceo.resize(Ls);
for(int i=0; i<Ls; ++i){
this->bee[i] = 4.0 - this->M5 + 1.0;
this->cee[i] = 1.0;
}
for(int i=0; i<Ls; ++i){
this->aee[i] = this->cee[i];
this->bs[i] = this->beo[i] = 1.0;
this->cs[i] = this->ceo[i] = 0.0;
}
//////////////////////////////////////////
// EOFA shift terms
//////////////////////////////////////////
if(pm == 1){
this->dp = mq1*this->cee[0] + shift*(mq3-mq2);
this->dm = mq1*this->cee[Ls-1];
} else if(this->pm == -1) {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1] - shift*(mq3-mq2);
} else {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1];
}
//////////////////////////////////////////
// LDU decomposition of eeoo
//////////////////////////////////////////
this->dee.resize(Ls+1);
this->lee.resize(Ls);
this->leem.resize(Ls);
this->uee.resize(Ls);
this->ueem.resize(Ls);
for(int i=0; i<Ls; ++i){
if(i < Ls-1){
this->lee[i] = -this->cee[i+1]/this->bee[i]; // sub-diag entry on the ith column
this->leem[i] = this->dm/this->bee[i];
for(int j=0; j<i; j++){ this->leem[i] *= this->aee[j]/this->bee[j]; }
this->dee[i] = this->bee[i];
this->uee[i] = -this->aee[i]/this->bee[i]; // up-diag entry on the ith row
this->ueem[i] = this->dp / this->bee[0];
for(int j=1; j<=i; j++){ this->ueem[i] *= this->cee[j]/this->bee[j]; }
} else {
this->lee[i] = 0.0;
this->leem[i] = 0.0;
this->uee[i] = 0.0;
this->ueem[i] = 0.0;
}
}
{
Coeff_t delta_d = 1.0 / this->bee[0];
for(int j=1; j<Ls-1; j++){ delta_d *= this->cee[j] / this->bee[j]; }
this->dee[Ls-1] = this->bee[Ls-1] + this->cee[0] * this->dm * delta_d;
this->dee[Ls] = this->bee[Ls-1] + this->cee[Ls-1] * this->dp * delta_d;
}
int inv = 1;
this->MooeeInternalCompute(0, inv, this->MatpInv, this->MatmInv);
this->MooeeInternalCompute(1, inv, this->MatpInvDag, this->MatmInvDag);
}
// Recompute Cayley-form coefficients for different shift
template<class Impl>
void DomainWallEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
{
this->shift = new_shift;
Approx::zolotarev_data *zdata = Approx::higham(1.0, this->Ls);
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
int Ls = this->Ls;
GridBase* grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0];
if(LLs == Ls){ return; } // Not vectorised in 5th direction
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0; s<Ls; s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->dp;
Pminus(Ls-1,0) = this->dm;
Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXcd PminusMat;
#if(0)
std::cout << GridLogMessage << "Pplus:" << std::endl;
for(int s=0; s<Ls; ++s){
for(int ss=0; ss<Ls; ++ss){
std::cout << Pplus(s,ss) << "\t";
}
std::cout << std::endl;
}
std::cout << GridLogMessage << "Pminus:" << std::endl;
for(int s=0; s<Ls; ++s){
for(int ss=0; ss<Ls; ++ss){
std::cout << Pminus(s,ss) << "\t";
}
std::cout << std::endl;
}
#endif
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
typedef typename SiteHalfSpinor::scalar_type scalar_type;
const int Nsimd = Simd::Nsimd();
Matp.resize(Ls*LLs);
Matm.resize(Ls*LLs);
for(int s2=0; s2<Ls; s2++){
for(int s1=0; s1<LLs; s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type*) &Vp;
scalar_type *sm = (scalar_type*) &Vm;
for(int l=0; l<Nsimd; l++){
if(switcheroo<Coeff_t>::iscomplex()) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
}
}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
}
FermOpTemplateInstantiate(DomainWallEOFAFermion);
GparityFermOpTemplateInstantiate(DomainWallEOFAFermion);
}}

View File

@ -26,90 +26,65 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
#define GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Impl>
class DomainWallEOFAFermion : public AbstractEOFAFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class DomainWallEOFAFermion : public AbstractEOFAFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Modified (0,Ls-1) and (Ls-1,0) elements of Mooee
// for red-black preconditioned Shamir EOFA
Coeff_t dm;
Coeff_t dp;
public:
// Modified (0,Ls-1) and (Ls-1,0) elements of Mooee
// for red-black preconditioned Shamir EOFA
Coeff_t dm;
Coeff_t dp;
virtual void Instantiatable(void) {};
virtual void Instantiatable(void) {};
// EOFA-specific operations
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
virtual void Dtilde (const FermionField& in, FermionField& out);
virtual void DtildeInv (const FermionField& in, FermionField& out);
// EOFA-specific operations
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
virtual void Dtilde (const FermionField& in, FermionField& out);
virtual void DtildeInv (const FermionField& in, FermionField& out);
// override multiply
virtual RealD M (const FermionField& in, FermionField& out);
virtual RealD Mdag (const FermionField& in, FermionField& out);
// override multiply
virtual RealD M (const FermionField& in, FermionField& out);
virtual RealD Mdag (const FermionField& in, FermionField& out);
// half checkerboard operations
virtual void Mooee (const FermionField& in, FermionField& out);
virtual void MooeeDag (const FermionField& in, FermionField& out);
virtual void MooeeInv (const FermionField& in, FermionField& out);
virtual void MooeeInvDag(const FermionField& in, FermionField& out);
// half checkerboard operations
virtual void Mooee (const FermionField& in, FermionField& out);
virtual void MooeeDag (const FermionField& in, FermionField& out);
virtual void MooeeInv (const FermionField& in, FermionField& out);
virtual void MooeeInvDag(const FermionField& in, FermionField& out);
virtual void M5D (const FermionField& psi, FermionField& chi);
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
virtual void M5D (const FermionField& psi, FermionField& chi);
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper);
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper);
void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
virtual void RefreshShiftCoefficients(RealD new_shift);
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
// Constructors
DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
RealD _M5, const ImplParams& p=ImplParams());
void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
protected:
void SetCoefficientsInternal(RealD zolo_hi, Vector<Coeff_t>& gamma, RealD b, RealD c);
};
void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
NAMESPACE_END(Grid);
virtual void RefreshShiftCoefficients(RealD new_shift);
// Constructors
DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
RealD _M5, const ImplParams& p=ImplParams());
protected:
void SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c);
};
}}
#define INSTANTIATE_DPERP_DWF_EOFA(A)\
template void DomainWallEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void DomainWallEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void DomainWallEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
template void DomainWallEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi);
#undef DOMAIN_WALL_EOFA_DPERP_DENSE
#define DOMAIN_WALL_EOFA_DPERP_CACHE
#undef DOMAIN_WALL_EOFA_DPERP_LINALG
#define DOMAIN_WALL_EOFA_DPERP_VEC
#endif

View File

@ -1,248 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards..
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
int Ls = this->Ls;
GridBase* grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0];
if(s==0) {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5m(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
int Ls = this->Ls;
GridBase* grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){
if(s==0) {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5p(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
// Apply (L^{\prime})^{-1}
chi[ss] = psi[ss]; // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls])*tmp1;
}
spProj5m(tmp2, chi[ss+Ls-1]);
chi[ss+Ls-1] = (1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2;
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp1, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
assert(psi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
std::vector<Coeff_t> ueec(Ls);
std::vector<Coeff_t> deec(Ls+1);
std::vector<Coeff_t> leec(Ls);
std::vector<Coeff_t> ueemc(Ls);
std::vector<Coeff_t> leemc(Ls);
for(int s=0; s<ueec.size(); s++){
ueec[s] = conjugate(this->uee[s]);
deec[s] = conjugate(this->dee[s]);
leec[s] = conjugate(this->lee[s]);
ueemc[s] = conjugate(this->ueem[s]);
leemc[s] = conjugate(this->leem[s]);
}
deec[Ls] = conjugate(this->dee[Ls]);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
// Apply (U^{\prime})^{-dagger}
chi[ss] = psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - ueec[s-1]*tmp1;
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - ueemc[s]*tmp1;
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/deec[s])*chi[ss+s] - (leemc[s]/deec[Ls-1])*tmp1;
}
spProj5p(tmp2, chi[ss+Ls-1]);
chi[ss+Ls-1] = (1.0/deec[Ls-1])*tmp1 + (1.0/deec[Ls])*tmp2;
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp1, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - leec[s]*tmp1;
}
}
this->MooeeInvTime += usecond();
}
#ifdef DOMAIN_WALL_EOFA_DPERP_CACHE
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -1,159 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard = psi.checkerboard;
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->dp;
Pminus(Ls-1,0) = this->dm;
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
for(auto site=0; site<vol; site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = zero;
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
}
if(PminusMat(s1,s2) != 0.0){
spProj5m(SitePminus, psi[lex2]);
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
}
#ifdef DOMAIN_WALL_EOFA_DPERP_DENSE
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
template void DomainWallEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
template void DomainWallEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -1,168 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1);
}
axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1);
axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (U^{\prime})^{-dagger}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1);
axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
}
}
#ifdef DOMAIN_WALL_EOFA_DPERP_LINALG
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -1,605 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
assert(Nc == 3);
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5m(hp, psi[ss+vp]);
spProj5p(hm, psi[ss+vm]);
if (vp <= v){ rotate(hp, hp, 1); }
if (vm >= v){ rotate(hm, hm, nsimd-1); }
hp = 0.5*hp;
hm = 0.5*hm;
spRecon5m(fp, hp);
spRecon5p(fm, hm);
chi[ss+v] = d[v]*phi[ss+v];
chi[ss+v] = chi[ss+v] + u[v]*fp;
chi[ss+v] = chi[ss+v] + l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v==LLs-1) ? 0 : v+1;
int vm = (v==0) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if(vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5p(hp, psi[ss+vp]);
spProj5m(hm, psi[ss+vm]);
if(vp <= v){ rotate(hp, hp, 1); }
if(vm >= v){ rotate(hm, hm, nsimd-1); }
hp = hp*0.5;
hm = hm*0.5;
spRecon5p(fp, hp);
spRecon5m(fm, hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if (vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
#ifdef AVX512
#include<simd/Intel512common.h>
#include<simd/Intel512avx.h>
#include<simd/Intel512single.h>
#endif
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
int s = s2 + l*LLs;
int lex = s2 + LLs*site;
if( s2==0 && l==0 ){
SiteChiP=zero;
SiteChiM=zero;
}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
}}
}}
{
int lex = s1 + LLs*site;
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %%zmm1
#define Chi_01 %%zmm2
#define Chi_02 %%zmm3
#define Chi_10 %%zmm4
#define Chi_11 %%zmm5
#define Chi_12 %%zmm6
#define Chi_20 %%zmm7
#define Chi_21 %%zmm8
#define Chi_22 %%zmm9
#define Chi_30 %%zmm10
#define Chi_31 %%zmm11
#define Chi_32 %%zmm12
#define BCAST0 %%zmm13
#define BCAST1 %%zmm14
#define BCAST2 %%zmm15
#define BCAST3 %%zmm16
#define BCAST4 %%zmm17
#define BCAST5 %%zmm18
#define BCAST6 %%zmm19
#define BCAST7 %%zmm20
#define BCAST8 %%zmm21
#define BCAST9 %%zmm22
#define BCAST10 %%zmm23
#define BCAST11 %%zmm24
int incr = LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
int lex = s2 + LLs*site;
uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t) &psi[lex];
for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
if((s2+l)==0) {
asm(
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
VBCASTCDUP(0,%2,BCAST0)
VBCASTCDUP(1,%2,BCAST1)
VBCASTCDUP(2,%2,BCAST2)
VBCASTCDUP(3,%2,BCAST3)
VBCASTCDUP(4,%2,BCAST4) VMULMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(5,%2,BCAST5) VMULMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(6,%2,BCAST6) VMULMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(7,%2,BCAST7) VMULMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(8,%2,BCAST8) VMULMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(9,%2,BCAST9) VMULMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(10,%2,BCAST10) VMULMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(11,%2,BCAST11) VMULMEM(0,%1,BCAST7,Chi_21)
VMULMEM(0,%1,BCAST8,Chi_22)
VMULMEM(0,%1,BCAST9,Chi_30)
VMULMEM(0,%1,BCAST10,Chi_31)
VMULMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
} else {
asm(
VBCASTCDUP(0,%2,BCAST0) VMADDMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(1,%2,BCAST1) VMADDMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(2,%2,BCAST2) VMADDMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(3,%2,BCAST3) VMADDMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(4,%2,BCAST4) VMADDMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(5,%2,BCAST5) VMADDMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(6,%2,BCAST6) VMADDMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(7,%2,BCAST7) VMADDMEM(0,%1,BCAST7,Chi_21)
VBCASTCDUP(8,%2,BCAST8) VMADDMEM(0,%1,BCAST8,Chi_22)
VBCASTCDUP(9,%2,BCAST9) VMADDMEM(0,%1,BCAST9,Chi_30)
VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
}
a0 = a0 + incr;
a1 = a1 + incr;
a2 = a2 + sizeof(typename Simd::scalar_type);
}
}
{
int lexa = s1+LLs*site;
asm (
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
// Z-mobius version
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
exit(-1);
};
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard = psi.checkerboard;
Vector<iSinglet<Simd> > Matp;
Vector<iSinglet<Simd> > Matm;
Vector<iSinglet<Simd> > *_Matp;
Vector<iSinglet<Simd> > *_Matm;
// MooeeInternalCompute(dag,inv,Matp,Matm);
if(inv && dag){
_Matp = &this->MatpInvDag;
_Matm = &this->MatmInvDag;
}
if(inv && (!dag)){
_Matp = &this->MatpInv;
_Matm = &this->MatmInv;
}
if(!inv){
MooeeInternalCompute(dag, inv, Matp, Matm);
_Matp = &Matp;
_Matm = &Matm;
}
assert(_Matp->size() == Ls*LLs);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
if(switcheroo<Coeff_t>::iscomplex()){
parallel_for(auto site=0; site<vol; site++){
MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
} else {
parallel_for(auto site=0; site<vol; site++){
MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
}
this->MooeeInvTime += usecond();
}
#ifdef DOMAIN_WALL_EOFA_DPERP_VEC
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplD);
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplF);
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplDF);
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplFH);
template void DomainWallEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -25,34 +25,33 @@ Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_DOMAIN_WALL_FERMION_H
#define GRID_QCD_DOMAIN_WALL_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class DomainWallFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class DomainWallFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary, std::vector<double> twist, bool fiveD) {
FermionField in_k(in.Grid());
FermionField prop_k(in.Grid());
void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary, std::vector<double> twist, bool fiveD) {
FermionField in_k(in._grid);
FermionField prop_k(in._grid);
FFT theFFT((GridCartesian *) in._grid);
FFT theFFT((GridCartesian *) in.Grid());
//phase for boundary condition
ComplexField coor(in._grid);
ComplexField ph(in._grid); ph = zero;
FermionField in_buf(in._grid); in_buf = zero;
ComplexField coor(in.Grid());
ComplexField ph(in.Grid()); ph = Zero();
FermionField in_buf(in.Grid()); in_buf = Zero();
typedef typename Simd::scalar_type Scalar;
Scalar ci(0.0,1.0);
assert(twist.size() == Nd);//check that twist is Nd
assert(boundary.size() == Nd);//check that boundary conditions is Nd
@ -63,13 +62,12 @@ namespace Grid {
// Shift coordinate lattice index by 1 to account for 5th dimension.
LatticeCoordinate(coor, nu + shift);
double boundary_phase = ::acos(real(boundary[nu]));
ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu+shift])));
ph = ph + boundary_phase*coor*((1./(in.Grid()->_fdimensions[nu+shift])));
//momenta for propagator shifted by twist+boundary
twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI));
}
in_buf = exp(ci*ph*(-1.0))*in;
if(fiveD){//FFT only on temporal and spatial dimensions
std::vector<int> mask(Nd+1,1); mask[0] = 0;
theFFT.FFT_dim_mask(in_k,in_buf,mask,FFT::forward);
@ -82,7 +80,7 @@ namespace Grid {
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
}
//phase for boundary condition
out = out * exp(ci*ph);
out = out * exp(Scalar(2.0*M_PI)*ci*ph);
};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {
@ -105,38 +103,37 @@ namespace Grid {
FreePropagator(in,out,mass,boundary,twist,fiveD);
};
virtual void Instantiatable(void) {};
// Constructors
DomainWallFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void) {};
// Constructors
DomainWallFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
{
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls);
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls);
std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
// Call base setter
this->SetCoefficientsTanh(zdata,1.0,0.0);
Approx::zolotarev_free(zdata);
}
};
// std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
// Call base setter
this->SetCoefficientsTanh(zdata,1.0,0.0);
Approx::zolotarev_free(zdata);
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -0,0 +1,213 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template<class S,class Representation = FundamentalRepresentation, class Options=CoeffReal>
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Representation::Dimension> > {
public:
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
static const bool LsVectorised=true;
static const int Nhcs = Options::Nhcs;
typedef typename Options::_Coeff_t Coeff_t;
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhcs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nd>;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
/////////////////////////////////////////////////
// Make the doubled gauge field a *scalar*
/////////////////////////////////////////////////
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor,ImplParams> StencilImpl;
typedef typename StencilImpl::View_type StencilView;
ImplParams Params;
DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
vsplat(reg, memory);
}
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi, const SiteDoubledGaugeField &U,
const _Spinor &chi, int mu, StencilEntry *SE,
StencilView &St)
{
#ifdef GPU_VEC
// Gauge link is scalarised
mult(&phi(), &U(mu), &chi());
#else
SiteGaugeLink UU;
for (int i = 0; i < Dimension; i++) {
for (int j = 0; j < Dimension; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
}
}
mult(&phi(), &UU(), &chi());
#endif
}
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu)
{
SiteScalarGaugeField ScalarUmu;
SiteDoubledGaugeField ScalarUds;
GaugeLinkField U(Umu.Grid());
GaugeField Uadj(Umu.Grid());
for (int mu = 0; mu < Nd; mu++) {
U = PeekIndex<LorentzIndex>(Umu, mu);
U = adj(Cshift(U, mu, -1));
PokeIndex<LorentzIndex>(Uadj, U, mu);
}
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
Coordinate lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
peekLocalSite(ScalarUmu, Umu, lcoor);
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
peekLocalSite(ScalarUmu, Uadj, lcoor);
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
pokeLocalSite(ScalarUds, Uds, lcoor);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu)
{
assert(0);
}
inline void outerProductImpl(PropagatorField &mat, const FermionField &Btilde, const FermionField &A){
assert(0);
}
inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) {
assert(0);
}
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
assert(0);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
assert(0);
// Following lines to be revised after Peter's addition of half prec
// missing put lane...
/*
typedef decltype(traceIndex<SpinIndex>(outerProduct(Btilde[0], Atilde[0]))) result_type;
unsigned int LLs = Btilde.Grid()->_rdimensions[0];
conformable(Atilde.Grid(),Btilde.Grid());
GridBase* grid = mat.Grid();
GridBase* Bgrid = Btilde.Grid();
unsigned int dimU = grid->Nd();
unsigned int dimF = Bgrid->Nd();
GaugeLinkField tmp(grid);
tmp = Zero();
// FIXME
// Current implementation works, thread safe, probably suboptimal
// Passing through the local coordinate for grid transformation
// the force grid is in general very different from the Ls vectorized grid
for (int so = 0; so < grid->oSites(); so++) {
std::vector<typename result_type::scalar_object> vres(Bgrid->Nsimd());
std::vector<int> ocoor; grid->oCoorFromOindex(ocoor,so);
for (int si = 0; si < tmp.Grid()->iSites(); si++){
typename result_type::scalar_object scalar_object; scalar_object = Zero();
std::vector<int> local_coor;
std::vector<int> icoor; grid->iCoorFromIindex(icoor,si);
grid->InOutCoorToLocalCoor(ocoor, icoor, local_coor);
for (int s = 0; s < LLs; s++) {
std::vector<int> slocal_coor(dimF);
slocal_coor[0] = s;
for (int s4d = 1; s4d< dimF; s4d++) slocal_coor[s4d] = local_coor[s4d-1];
int sF = Bgrid->oIndexReduced(slocal_coor);
assert(sF < Bgrid->oSites());
extract(traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])), vres);
// sum across the 5d dimension
for (auto v : vres) scalar_object += v;
}
tmp[so].putlane(scalar_object, si);
}
}
PokeIndex<LorentzIndex>(mat, tmp, mu);
*/
}
};
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation, CoeffReal> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation, CoeffReal> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation, CoeffReal> DomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation, CoeffRealHalfComms> DomainWallVec5dImplRL; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation, CoeffRealHalfComms> DomainWallVec5dImplFH; // Float
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation, CoeffRealHalfComms> DomainWallVec5dImplDF; // Double
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation,CoeffComplex> ZDomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation,CoeffComplex> ZDomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation,CoeffComplex> ZDomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation,CoeffComplexHalfComms> ZDomainWallVec5dImplRL; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation,CoeffComplexHalfComms> ZDomainWallVec5dImplFH; // Float
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation,CoeffComplexHalfComms> ZDomainWallVec5dImplDF; // Double
NAMESPACE_END(Grid);

View File

@ -23,10 +23,9 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_FERMION_H
#define GRID_QCD_FERMION_H
*************************************************************************************/
/* END LEGAL */
#pragma once
////////////////////////////////////////////////////////////////////////////////////////////////////
// Explicit explicit template instantiation is still required in the .cc files
@ -50,12 +49,17 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
////////////////////////////////////////////
#include <Grid/qcd/action/fermion/WilsonFermion.h> // 4d wilson like
#include <Grid/qcd/action/fermion/WilsonTMFermion.h> // 4d wilson like
NAMESPACE_CHECK(Wilson);
#include <Grid/qcd/action/fermion/WilsonTMFermion.h> // 4d wilson like
NAMESPACE_CHECK(WilsonTM);
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h> // 4d wilson clover fermions
NAMESPACE_CHECK(WilsonClover);
#include <Grid/qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
NAMESPACE_CHECK(Wilson5D);
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion.h>
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
NAMESPACE_CHECK(Staggered);
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> // Cayley types
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
@ -63,7 +67,8 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
#include <Grid/qcd/action/fermion/MobiusFermion.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
#include <Grid/qcd/action/fermion/SchurDiagTwoKappa.h>
NAMESPACE_CHECK(DomainWall);
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
@ -75,6 +80,7 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
NAMESPACE_CHECK(Overlap);
///////////////////////////////////////////////////////////////////////////////
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
///////////////////////////////////////////////////////////////////////////////
@ -84,14 +90,17 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
// Fourier accelerated Pauli Villars inverse support
///////////////////////////////////////////////////////////////////////////////
#include <Grid/qcd/action/fermion/WilsonTMFermion5D.h>
NAMESPACE_CHECK(WilsonTM5);
////////////////////////////////////////////////////////////////////////////////
// Move this group to a DWF specific tools/algorithms subdir?
////////////////////////////////////////////////////////////////////////////////
#include <Grid/qcd/action/fermion/SchurDiagTwoKappa.h>
#include <Grid/qcd/action/fermion/FourierAcceleratedPV.h>
#include <Grid/qcd/action/fermion/PauliVillarsInverters.h>
#include <Grid/qcd/action/fermion/Reconstruct5Dprop.h>
#include <Grid/qcd/action/fermion/MADWF.h>
NAMESPACE_CHECK(DWFutils);
////////////////////////////////////////////////////////////////////////////////////////////////////
// More maintainable to maintain the following typedef list centrally, as more "impl" targets
@ -99,8 +108,7 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
////////////////////////////////////////////////////////////////////////////////////////////////////
// Cayley 5d
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
typedef WilsonFermion<WilsonImplR> WilsonFermionR;
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
@ -186,46 +194,6 @@ typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH;
typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF;
// Ls vectorised
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallFermionVec5dR;
typedef DomainWallFermion<DomainWallVec5dImplF> DomainWallFermionVec5dF;
typedef DomainWallFermion<DomainWallVec5dImplD> DomainWallFermionVec5dD;
typedef DomainWallFermion<DomainWallVec5dImplRL> DomainWallFermionVec5dRL;
typedef DomainWallFermion<DomainWallVec5dImplFH> DomainWallFermionVec5dFH;
typedef DomainWallFermion<DomainWallVec5dImplDF> DomainWallFermionVec5dDF;
typedef DomainWallEOFAFermion<DomainWallVec5dImplR> DomainWallEOFAFermionVec5dR;
typedef DomainWallEOFAFermion<DomainWallVec5dImplF> DomainWallEOFAFermionVec5dF;
typedef DomainWallEOFAFermion<DomainWallVec5dImplD> DomainWallEOFAFermionVec5dD;
typedef DomainWallEOFAFermion<DomainWallVec5dImplRL> DomainWallEOFAFermionVec5dRL;
typedef DomainWallEOFAFermion<DomainWallVec5dImplFH> DomainWallEOFAFermionVec5dFH;
typedef DomainWallEOFAFermion<DomainWallVec5dImplDF> DomainWallEOFAFermionVec5dDF;
typedef MobiusFermion<DomainWallVec5dImplR> MobiusFermionVec5dR;
typedef MobiusFermion<DomainWallVec5dImplF> MobiusFermionVec5dF;
typedef MobiusFermion<DomainWallVec5dImplD> MobiusFermionVec5dD;
typedef MobiusFermion<DomainWallVec5dImplRL> MobiusFermionVec5dRL;
typedef MobiusFermion<DomainWallVec5dImplFH> MobiusFermionVec5dFH;
typedef MobiusFermion<DomainWallVec5dImplDF> MobiusFermionVec5dDF;
typedef MobiusEOFAFermion<DomainWallVec5dImplR> MobiusEOFAFermionVec5dR;
typedef MobiusEOFAFermion<DomainWallVec5dImplF> MobiusEOFAFermionVec5dF;
typedef MobiusEOFAFermion<DomainWallVec5dImplD> MobiusEOFAFermionVec5dD;
typedef MobiusEOFAFermion<DomainWallVec5dImplRL> MobiusEOFAFermionVec5dRL;
typedef MobiusEOFAFermion<DomainWallVec5dImplFH> MobiusEOFAFermionVec5dFH;
typedef MobiusEOFAFermion<DomainWallVec5dImplDF> MobiusEOFAFermionVec5dDF;
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusFermionVec5dR;
typedef ZMobiusFermion<ZDomainWallVec5dImplF> ZMobiusFermionVec5dF;
typedef ZMobiusFermion<ZDomainWallVec5dImplD> ZMobiusFermionVec5dD;
typedef ZMobiusFermion<ZDomainWallVec5dImplRL> ZMobiusFermionVec5dRL;
typedef ZMobiusFermion<ZDomainWallVec5dImplFH> ZMobiusFermionVec5dFH;
typedef ZMobiusFermion<ZDomainWallVec5dImplDF> ZMobiusFermionVec5dDF;
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
@ -318,12 +286,13 @@ typedef ImprovedStaggeredFermion5D<StaggeredImplR> ImprovedStaggeredFermion5DR;
typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF;
typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD;
#ifndef GRID_NVCC
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplR> ImprovedStaggeredFermionVec5dR;
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplF> ImprovedStaggeredFermionVec5dF;
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplD> ImprovedStaggeredFermionVec5dD;
#endif
}}
NAMESPACE_END(Grid);
////////////////////
// Scalar QED actions
@ -332,4 +301,4 @@ typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplD> ImprovedStaggeredFermion
#include <Grid/qcd/action/scalar/Scalar.h>
#include <Grid/qcd/action/gauge/Photon.h>
#endif

View File

@ -36,58 +36,13 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
// Fermion prereqs
////////////////////////////////////////////
#include <Grid/qcd/action/fermion/WilsonCompressor.h> //used by all wilson type fermions
NAMESPACE_CHECK(Compressor);
#include <Grid/qcd/action/fermion/FermionOperatorImpl.h>
NAMESPACE_CHECK(FermionOperatorImpl);
#include <Grid/qcd/action/fermion/FermionOperator.h>
NAMESPACE_CHECK(FermionOperator);
#include <Grid/qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
#include <Grid/qcd/action/fermion/StaggeredKernels.h> //used by all wilson type fermions
#define FermOpStaggeredTemplateInstantiate(A) \
template class A<StaggeredImplF>; \
template class A<StaggeredImplD>;
#define FermOpStaggeredVec5dTemplateInstantiate(A) \
template class A<StaggeredVec5dImplF>; \
template class A<StaggeredVec5dImplD>;
#define FermOp4dVecTemplateInstantiate(A) \
template class A<WilsonImplF>; \
template class A<WilsonImplD>; \
template class A<ZWilsonImplF>; \
template class A<ZWilsonImplD>; \
template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>; \
template class A<WilsonImplFH>; \
template class A<WilsonImplDF>; \
template class A<ZWilsonImplFH>; \
template class A<ZWilsonImplDF>; \
template class A<GparityWilsonImplFH>; \
template class A<GparityWilsonImplDF>;
#define AdjointFermOpTemplateInstantiate(A) \
template class A<WilsonAdjImplF>; \
template class A<WilsonAdjImplD>;
#define TwoIndexFermOpTemplateInstantiate(A) \
template class A<WilsonTwoIndexSymmetricImplF>; \
template class A<WilsonTwoIndexSymmetricImplD>; \
template class A<WilsonTwoIndexAntiSymmetricImplF>; \
template class A<WilsonTwoIndexAntiSymmetricImplD>;
#define FermOp5dVecTemplateInstantiate(A) \
template class A<DomainWallVec5dImplF>; \
template class A<DomainWallVec5dImplD>; \
template class A<ZDomainWallVec5dImplF>; \
template class A<ZDomainWallVec5dImplD>; \
template class A<DomainWallVec5dImplFH>; \
template class A<DomainWallVec5dImplDF>; \
template class A<ZDomainWallVec5dImplFH>; \
template class A<ZDomainWallVec5dImplDF>;
#define FermOpTemplateInstantiate(A) \
FermOp4dVecTemplateInstantiate(A) \
FermOp5dVecTemplateInstantiate(A)
#define GparityFermOpTemplateInstantiate(A)
NAMESPACE_CHECK(Kernels);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -26,86 +26,87 @@ Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_FERMION_OPERATOR_H
#define GRID_QCD_FERMION_OPERATOR_H
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
////////////////////////////////////////////////////////////////
// Allow to select between gauge representation rank bc's, flavours etc.
// and single/double precision.
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
// Allow to select between gauge representation rank bc's, flavours etc.
// and single/double precision.
////////////////////////////////////////////////////////////////
template<class Impl>
class FermionOperator : public CheckerBoardedSparseMatrixBase<typename Impl::FermionField>, public Impl
{
public:
template<class Impl>
class FermionOperator : public CheckerBoardedSparseMatrixBase<typename Impl::FermionField>, public Impl
{
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_IMPL_TYPES(Impl);
FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
virtual ~FermionOperator(void) = default;
FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
virtual ~FermionOperator(void) = default;
virtual FermionField &tmp(void) = 0;
virtual FermionField &tmp(void) = 0;
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
virtual GridBase *FermionGrid(void) =0;
virtual GridBase *FermionRedBlackGrid(void) =0;
virtual GridBase *GaugeGrid(void) =0;
virtual GridBase *GaugeRedBlackGrid(void) =0;
virtual GridBase *FermionGrid(void) =0;
virtual GridBase *FermionRedBlackGrid(void) =0;
virtual GridBase *GaugeGrid(void) =0;
virtual GridBase *GaugeRedBlackGrid(void) =0;
// override multiply
virtual RealD M (const FermionField &in, FermionField &out)=0;
virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
// override multiply
virtual RealD M (const FermionField &in, FermionField &out)=0;
virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out)=0;
virtual void MeooeDag (const FermionField &in, FermionField &out)=0;
virtual void Mooee (const FermionField &in, FermionField &out)=0;
virtual void MooeeDag (const FermionField &in, FermionField &out)=0;
virtual void MooeeInv (const FermionField &in, FermionField &out)=0;
virtual void MooeeInvDag (const FermionField &in, FermionField &out)=0;
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out)=0;
virtual void MeooeDag (const FermionField &in, FermionField &out)=0;
virtual void Mooee (const FermionField &in, FermionField &out)=0;
virtual void MooeeDag (const FermionField &in, FermionField &out)=0;
virtual void MooeeInv (const FermionField &in, FermionField &out)=0;
virtual void MooeeInvDag (const FermionField &in, FermionField &out)=0;
// non-hermitian hopping term; half cb or both
virtual void Dhop (const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D
// non-hermitian hopping term; half cb or both
virtual void Dhop (const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);};
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);};
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);};
virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;}; // Clover can override these
virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;};
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);};
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);};
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);};
virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=Zero();}; // Clover can override these
virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=Zero();};
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {
FFT theFFT((GridCartesian *) in._grid);
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist)
{
FFT theFFT((GridCartesian *) in.Grid());
FermionField in_k(in._grid);
FermionField prop_k(in._grid);
typedef typename Simd::scalar_type Scalar;
FermionField in_k(in.Grid());
FermionField prop_k(in.Grid());
//phase for boundary condition
ComplexField coor(in._grid);
ComplexField ph(in._grid); ph = zero;
FermionField in_buf(in._grid); in_buf = zero;
ComplexField coor(in.Grid());
ComplexField ph(in.Grid()); ph = Zero();
FermionField in_buf(in.Grid()); in_buf = Zero();
Scalar ci(0.0,1.0);
assert(twist.size() == Nd);//check that twist is Nd
assert(boundary.size() == Nd);//check that boundary conditions is Nd
@ -113,7 +114,7 @@ namespace Grid {
{
LatticeCoordinate(coor, nu);
double boundary_phase = ::acos(real(boundary[nu]));
ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu])));
ph = ph + boundary_phase*coor*((1./(in.Grid()->_fdimensions[nu])));
//momenta for propagator shifted by twist+boundary
twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI));
}
@ -124,43 +125,42 @@ namespace Grid {
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
//phase for boundary condition
out = out * exp(ci*ph);
out = out * exp(Scalar(2.0*M_PI)*ci*ph);
};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
std::vector<Complex> boundary;
for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
FreePropagator(in,out,mass,boundary,twist);
std::vector<Complex> boundary;
for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
FreePropagator(in,out,mass,boundary,twist);
};
///////////////////////////////////////////////
// Updates gauge field during HMC
///////////////////////////////////////////////
virtual void ImportGauge(const GaugeField & _U)=0;
///////////////////////////////////////////////
// Updates gauge field during HMC
///////////////////////////////////////////////
virtual void ImportGauge(const GaugeField & _U)=0;
//////////////////////////////////////////////////////////////////////
// Conserved currents, either contract at sink or insert sequentially.
//////////////////////////////////////////////////////////////////////
virtual void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)=0;
virtual void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx)=0;
//////////////////////////////////////////////////////////////////////
// Conserved currents, either contract at sink or insert sequentially.
//////////////////////////////////////////////////////////////////////
virtual void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)=0;
virtual void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx)=0;
// Only reimplemented in Wilson5D
// Default to just a zero correlation function
virtual void ContractJ5q(FermionField &q_in ,ComplexField &J5q) { J5q=zero; };
virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=zero; };
virtual void ContractJ5q(FermionField &q_in ,ComplexField &J5q) { J5q=Zero(); };
virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=Zero(); };
///////////////////////////////////////////////
// Physical field import/export
@ -183,9 +183,7 @@ namespace Grid {
{
exported=solution;
};
};
};
}
}
NAMESPACE_END(Grid);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -28,8 +28,8 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<typename M>
void get_real_const_bc(M& m, RealD& _b, RealD& _c) {
@ -63,8 +63,8 @@ class FourierAcceleratedPV {
: dwfPV(_dwfPV), Umu(_Umu), cg(_cg), group_in_s(_group_in_s)
{
assert( dwfPV.FermionGrid()->_fdimensions[0] % (2*group_in_s) == 0);
grid5D = QCD::SpaceTimeGrid::makeFiveDimGrid(2*group_in_s, (GridCartesian*)Umu._grid);
gridRB5D = QCD::SpaceTimeGrid::makeFiveDimRedBlackGrid(2*group_in_s, (GridCartesian*)Umu._grid);
grid5D = SpaceTimeGrid::makeFiveDimGrid(2*group_in_s, (GridCartesian*)Umu.Grid());
gridRB5D = SpaceTimeGrid::makeFiveDimRedBlackGrid(2*group_in_s, (GridCartesian*)Umu.Grid());
}
void rotatePV(const Vi& _src, Vi& dst, bool forward) const {
@ -72,13 +72,13 @@ class FourierAcceleratedPV {
GridStopWatch gsw1, gsw2;
typedef typename Vi::scalar_type Coeff_t;
int Ls = dst._grid->_fdimensions[0];
int Ls = dst.Grid()->_fdimensions[0];
Vi _tmp(dst._grid);
Vi _tmp(dst.Grid());
double phase = M_PI / (double)Ls;
Coeff_t bzero(0.0,0.0);
FFT theFFT((GridCartesian*)dst._grid);
FFT theFFT((GridCartesian*)dst.Grid());
if (!forward) {
gsw1.Start();
@ -115,7 +115,7 @@ class FourierAcceleratedPV {
std::cout << GridLogMessage << "Fourier-Accelerated Outer Pauli Villars"<<std::endl;
typedef typename Vi::scalar_type Coeff_t;
int Ls = _dst._grid->_fdimensions[0];
int Ls = _dst.Grid()->_fdimensions[0];
GridStopWatch gswT;
gswT.Start();
@ -126,12 +126,12 @@ class FourierAcceleratedPV {
// U(true) Rightinv TMinv U(false) = Minv
Vi _src_diag(_dst._grid);
Vi _src_diag(_dst.Grid());
Vi _src_diag_slice(dwfPV.GaugeGrid());
Vi _dst_diag_slice(dwfPV.GaugeGrid());
Vi _src_diag_slices(grid5D);
Vi _dst_diag_slices(grid5D);
Vi _dst_diag(_dst._grid);
Vi _dst_diag(_dst.Grid());
rotatePV(_src,_src_diag,false);
@ -163,7 +163,7 @@ class FourierAcceleratedPV {
for (int sidx=0;sidx<group_in_s;sidx++) {
int s = sgroup*group_in_s + sidx;
int sprime = Ls-s-1;
// int sprime = Ls-s-1;
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
RealD cosp = ::cos(phase);
@ -196,7 +196,7 @@ class FourierAcceleratedPV {
GridStopWatch gsw;
gsw.Start();
_dst_diag_slices = zero; // zero guess
_dst_diag_slices = Zero(); // zero guess
sol(tm,_src_diag_slices,_dst_diag_slices);
gsw.Stop();
std::cout << GridLogMessage << "Solve[sgroup=" << sgroup << "] completed in " << gsw.Elapsed() << ", " << gswA.Elapsed() << std::endl;
@ -212,7 +212,7 @@ class FourierAcceleratedPV {
// now rotate with inverse of
Coeff_t pA = b + c*cosp;
Coeff_t pB = - Coeff_t(0.0,1.0)*c*sinp;
Coeff_t pB = - Coeff_t(0.0,1.0)*Coeff_t(c*sinp);
Coeff_t pABden = pA*pA - pB*pB;
// (pA + pB * G5) * (pA - pB*G5) = (pA^2 - pB^2)
@ -234,4 +234,5 @@ class FourierAcceleratedPV {
}
};
}}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,321 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal>
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > {
public:
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
static const int Nhcs = Options::Nhcs;
static const bool LsVectorised=false;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Dimension> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
typedef typename Options::_Coeff_t Coeff_t;
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Dimension>, Ns>, Ngp>;
template <typename vtype> using iImplPropagator = iVector<iMatrix<iMatrix<vtype, Dimension>, Ns>, Ngp>;
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Dimension>, Nhs>, Ngp>;
template <typename vtype> using iImplHalfCommSpinor = iVector<iVector<iVector<vtype, Dimension>, Nhcs>, Ngp>;
template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>, Ngp>;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef GparityWilsonImplParams ImplParams;
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor, ImplParams> StencilImpl;
typedef typename StencilImpl::View_type StencilView;
ImplParams Params;
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
// provide the multiply by link that is differentiated between Gparity (with
// flavour index) and non-Gparity
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu)
{
assert(0);
}
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu,
StencilEntry *SE,
StencilView &St)
{
int direction = St._directions[mu];
int distance = St._distances[mu];
int ptype = St._permute_type[mu];
int sl = St._simd_layout[direction];
Coordinate icoor;
#ifdef __CUDA_ARCH__
_Spinor tmp;
const int Nsimd =SiteDoubledGaugeField::Nsimd();
int s = SIMTlane(Nsimd);
St.iCoorFromIindex(icoor,s);
int mmu = mu % Nd;
if ( SE->_around_the_world && St.parameters.twists[mmu] ) {
int permute_lane = (sl==1)
|| ((distance== 1)&&(icoor[direction]==1))
|| ((distance==-1)&&(icoor[direction]==0));
if ( permute_lane ) {
tmp(0) = chi(1);
tmp(1) = chi(0);
} else {
tmp(0) = chi(0);
tmp(1) = chi(1);
}
auto UU0=coalescedRead(U(0)(mu));
auto UU1=coalescedRead(U(1)(mu));
mult(&phi(0),&UU0,&tmp(0));
mult(&phi(1),&UU1,&tmp(1));
} else {
auto UU0=coalescedRead(U(0)(mu));
auto UU1=coalescedRead(U(1)(mu));
mult(&phi(0),&UU0,&chi(0));
mult(&phi(1),&UU1,&chi(1));
}
#else
typedef _Spinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj;
typedef typename SiteHalfSpinor::vector_type vector_type;
vobj vtmp;
sobj stmp;
const int Nsimd =vector_type::Nsimd();
// Fixme X.Y.Z.T hardcode in stencil
int mmu = mu % Nd;
// assert our assumptions
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
assert((sl == 1) || (sl == 2));
if ( SE->_around_the_world && St.parameters.twists[mmu] ) {
if ( sl == 2 ) {
ExtractBuffer<sobj> vals(Nsimd);
extract(chi,vals);
for(int s=0;s<Nsimd;s++){
St.iCoorFromIindex(icoor,s);
assert((icoor[direction]==0)||(icoor[direction]==1));
int permute_lane;
if ( distance == 1) {
permute_lane = icoor[direction]?1:0;
} else {
permute_lane = icoor[direction]?0:1;
}
if ( permute_lane ) {
stmp(0) = vals[s](1);
stmp(1) = vals[s](0);
vals[s] = stmp;
}
}
merge(vtmp,vals);
} else {
vtmp(0) = chi(1);
vtmp(1) = chi(0);
}
mult(&phi(0),&U(0)(mu),&vtmp(0));
mult(&phi(1),&U(1)(mu),&vtmp(1));
} else {
mult(&phi(0),&U(0)(mu),&chi(0));
mult(&phi(1),&U(1)(mu),&chi(1));
}
#endif
}
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
reg = memory;
}
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds.Grid(),GaugeGrid);
conformable(Umu.Grid(),GaugeGrid);
GaugeLinkField Utmp (GaugeGrid);
GaugeLinkField U (GaugeGrid);
GaugeLinkField Uconj(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu);
U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conjugate(U);
// This phase could come from a simple bc 1,1,-1,1 ..
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
if ( Params.twists[mu] ) {
Uconj = where(coor==neglink,-Uconj,Uconj);
}
auto U_v = U.View();
auto Uds_v = Uds.View();
auto Uconj_v = Uconj.View();
auto Utmp_v= Utmp.View();
thread_foreach(ss,U_v,{
Uds_v[ss](0)(mu) = U_v[ss]();
Uds_v[ss](1)(mu) = Uconj_v[ss]();
});
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
Uconj = adj(Cshift(Uconj,mu,-1));
Utmp = U;
if ( Params.twists[mu] ) {
Utmp = where(coor==0,Uconj,Utmp);
}
thread_foreach(ss,Utmp_v,{
Uds_v[ss](0)(mu+4) = Utmp_v[ss]();
});
Utmp = Uconj;
if ( Params.twists[mu] ) {
Utmp = where(coor==0,U,Utmp);
}
thread_foreach(ss,Utmp_v,{
Uds_v[ss](1)(mu+4) = Utmp_v[ss]();
});
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
// DhopDir provides U or Uconj depending on coor/flavour.
GaugeLinkField link(mat.Grid());
// use lorentz for flavour as hack.
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
auto link_v = link.View();
auto tmp_v = tmp.View();
thread_foreach(ss,tmp_v,{
link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1));
});
PokeIndex<LorentzIndex>(mat, link, mu);
return;
}
inline void outerProductImpl(PropagatorField &mat, const FermionField &Btilde, const FermionField &A){
//mat = outerProduct(Btilde, A);
assert(0);
}
inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) {
assert(0);
/*
auto tmp = TraceIndex<SpinIndex>(P);
parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) {
mat[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1));
}
*/
}
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
assert(0);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
int Ls = Btilde.Grid()->_fdimensions[0];
GaugeLinkField tmp(mat.Grid());
tmp = Zero();
auto tmp_v = tmp.View();
auto Atilde_v = Atilde.View();
auto Btilde_v = Btilde.View();
thread_for(ss,tmp.Grid()->oSites(),{
for (int s = 0; s < Ls; s++) {
int sF = s + Ls * ss;
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
}
});
PokeIndex<LorentzIndex>(mat, tmp, mu);
return;
}
};
typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> GparityWilsonImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD; // Double
typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH; // Float
typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF; // Double
NAMESPACE_END(Grid);

View File

@ -25,16 +25,14 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef GRID_QCD_IMPR_STAG_FERMION_H
#define GRID_QCD_IMPR_STAG_FERMION_H
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
class ImprovedStaggeredFermionStatic {
public:
public:
static const std::vector<int> directions;
static const std::vector<int> displacements;
static const int npoint = 16;
@ -42,7 +40,7 @@ class ImprovedStaggeredFermionStatic {
template <class Impl>
class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedStaggeredFermionStatic {
public:
public:
INHERIT_IMPL_TYPES(Impl);
typedef StaggeredKernels<Impl> Kernels;
@ -139,7 +137,7 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
// DoubleStore impl dependent
void ImportGauge (const GaugeField &_Uthin ) { assert(0); }
void ImportGauge (const GaugeField &_Uthin ,const GaugeField &_Ufat);
void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat);
void ImportGaugeSimple(const GaugeField &_UUU ,const GaugeField &_U);
void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U);
DoubledGaugeField &GetU(void) { return Umu ; } ;
@ -151,7 +149,7 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
///////////////////////////////////////////////////////////////
// protected:
public:
public:
// any other parameters of action ???
virtual int isTrivialEE(void) { return 1; };
virtual RealD Mass(void) { return mass; }
@ -188,11 +186,11 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
};
@ -200,6 +198,6 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -1,5 +1,5 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -25,101 +25,99 @@ Author: AzusaYamaguchi <ayamaguc@staffmail.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H
#define GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
////////////////////////////////////////////////////////////////////////////////
class ImprovedStaggeredFermion5DStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static const std::vector<int> directions;
static const std::vector<int> displacements;
const int npoint = 16;
};
class ImprovedStaggeredFermion5DStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static const std::vector<int> directions;
static const std::vector<int> displacements;
const int npoint = 16;
};
template<class Impl>
class ImprovedStaggeredFermion5D : public StaggeredKernels<Impl>, public ImprovedStaggeredFermion5DStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef StaggeredKernels<Impl> Kernels;
template<class Impl>
class ImprovedStaggeredFermion5D : public StaggeredKernels<Impl>, public ImprovedStaggeredFermion5DStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef StaggeredKernels<Impl> Kernels;
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
////////////////////////////////////////
// Performance monitoring
////////////////////////////////////////
void Report(void);
void ZeroCounters(void);
double DhopTotalTime;
double DhopCalls;
double DhopCommTime;
double DhopComputeTime;
////////////////////////////////////////
// Performance monitoring
////////////////////////////////////////
void Report(void);
void ZeroCounters(void);
double DhopTotalTime;
double DhopCalls;
double DhopCommTime;
double DhopComputeTime;
double DhopComputeTime2;
double DhopFaceTime;
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FiveDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FiveDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
// full checkerboard operations; leave unimplemented as abstract for now
RealD M (const FermionField &in, FermionField &out);
RealD Mdag (const FermionField &in, FermionField &out);
// full checkerboard operations; leave unimplemented as abstract for now
RealD M (const FermionField &in, FermionField &out);
RealD Mdag (const FermionField &in, FermionField &out);
// half checkerboard operations
void Meooe (const FermionField &in, FermionField &out);
void Mooee (const FermionField &in, FermionField &out);
void MooeeInv (const FermionField &in, FermionField &out);
// half checkerboard operations
void Meooe (const FermionField &in, FermionField &out);
void Mooee (const FermionField &in, FermionField &out);
void MooeeInv (const FermionField &in, FermionField &out);
void MeooeDag (const FermionField &in, FermionField &out);
void MooeeDag (const FermionField &in, FermionField &out);
void MooeeInvDag (const FermionField &in, FermionField &out);
void MeooeDag (const FermionField &in, FermionField &out);
void MooeeDag (const FermionField &in, FermionField &out);
void MooeeInvDag (const FermionField &in, FermionField &out);
void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
// These can be overridden by fancy 5d chiral action
void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// These can be overridden by fancy 5d chiral action
void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// Implement hopping term non-hermitian hopping term; half cb or both
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
// Implement hopping term non-hermitian hopping term; half cb or both
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
///////////////////////////////////////////////////////////////
// New methods added
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
DoubledGaugeField & UUU,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
///////////////////////////////////////////////////////////////
// New methods added
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
DoubledGaugeField & UUU,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
void DhopInternal(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternal(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalOverlappedComms(StencilImpl & st,
LebesgueOrder &lo,
@ -138,17 +136,17 @@ namespace QCD {
int dag);
// Constructors
// Constructors
////////////////////////////////////////////////////////////////////////////////////////////////
// Grid internal interface -- Thin link and fat link, with coefficients
////////////////////////////////////////////////////////////////////////////////////////////////
ImprovedStaggeredFermion5D(GaugeField &_Uthin,
GaugeField &_Ufat,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _mass,
ImprovedStaggeredFermion5D(GaugeField &_Uthin,
GaugeField &_Ufat,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _mass,
RealD _c1, RealD _c2,RealD _u0,
const ImplParams &p= ImplParams());
////////////////////////////////////////////////////////////////////////////////////////////////
@ -160,11 +158,11 @@ namespace QCD {
GridRedBlackCartesian &FourDimRedBlackGrid,
double _mass,
RealD _c1=1.0, RealD _c2=1.0,RealD _u0=1.0,
const ImplParams &p= ImplParams());
const ImplParams &p= ImplParams());
// DoubleStore gauge field in operator
void ImportGauge (const GaugeField &_Uthin ) { assert(0); }
void ImportGauge (const GaugeField &_Uthin ,const GaugeField &_Ufat);
void ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat);
void ImportGaugeSimple(const GaugeField &_UUU,const GaugeField &_U);
void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U);
// Give a reference; can be used to do an assignment or copy back out after import
@ -173,62 +171,61 @@ namespace QCD {
DoubledGaugeField &GetUUU(void) { return UUUmu; };
void CopyGaugeCheckerboards(void);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
virtual int isTrivialEE(void) { return 1; };
virtual RealD Mass(void) { return mass; }
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
GridBase *_FiveDimGrid;
GridBase *_FiveDimRedBlackGrid;
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
GridBase *_FiveDimGrid;
GridBase *_FiveDimRedBlackGrid;
RealD mass;
RealD c1;
RealD c2;
RealD u0;
int Ls;
RealD mass;
RealD c1;
RealD c2;
RealD u0;
int Ls;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
DoubledGaugeField UUUmu;
DoubledGaugeField UUUmuEven;
DoubledGaugeField UUUmuOdd;
DoubledGaugeField UUUmu;
DoubledGaugeField UUUmuEven;
DoubledGaugeField UUUmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
///////////////////////////////////////////////////////////////
// Conserved current utilities
///////////////////////////////////////////////////////////////
void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
///////////////////////////////////////////////////////////////
// Conserved current utilities
///////////////////////////////////////////////////////////////
void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
};
};
}}
NAMESPACE_END(Grid);
#endif

View File

@ -27,8 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template <class Fieldi, class Fieldo,IfNotSame<Fieldi,Fieldo> X=0>
inline void convert(const Fieldi &from,Fieldo &to)
@ -109,7 +108,7 @@ class MADWF
std::cout << GridLogMessage << " b " <<norm2(b)<<std::endl;
defect = b;
sol5=zero;
sol5=Zero();
for (int i=0;i<maxiter;i++) {
///////////////////////////////////////
@ -122,7 +121,7 @@ class MADWF
////////////////////////////////////////////////
// Solve the inner system with surface term c0
////////////////////////////////////////////////
ci = zero;
ci = Zero();
convert(c0,c0i); // Possible precison change
InsertSlice(c0i,ci,0, 0);
@ -190,4 +189,4 @@ class MADWF
};
}}
NAMESPACE_END(Grid);

View File

@ -1,502 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
template<class Impl>
MobiusEOFAFermion<Impl>::MobiusEOFAFermion(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3,
RealD _shift, int _pm, RealD _M5,
RealD _b, RealD _c, const ImplParams &p) :
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
_shift, _pm, _M5, _b, _c, p)
{
int Ls = this->Ls;
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps, this->Ls);
assert(zdata->n == this->Ls);
std::cout << GridLogMessage << "MobiusEOFAFermion (b=" << _b <<
",c=" << _c << ") with Ls=" << Ls << std::endl;
this->SetCoefficientsTanh(zdata, _b, _c);
std::cout << GridLogMessage << "EOFA parameters: (mq1=" << _mq1 <<
",mq2=" << _mq2 << ",mq3=" << _mq3 << ",shift=" << _shift <<
",pm=" << _pm << ")" << std::endl;
Approx::zolotarev_free(zdata);
if(_shift != 0.0){
SetCoefficientsPrecondShiftOps();
} else {
Mooee_shift.resize(Ls, 0.0);
MooeeInv_shift_lc.resize(Ls, 0.0);
MooeeInv_shift_norm.resize(Ls, 0.0);
MooeeInvDag_shift_lc.resize(Ls, 0.0);
MooeeInvDag_shift_norm.resize(Ls, 0.0);
}
}
/****************************************************************
* Additional EOFA operators only called outside the inverter.
* Since speed is not essential, simple axpby-style
* implementations should be fine.
***************************************************************/
template<class Impl>
void MobiusEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
{
int Ls = this->Ls;
RealD alpha = this->alpha;
Din = zero;
if((sign == 1) && (dag == 0)) { // \Omega_{+}
for(int s=0; s<Ls; ++s){
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,Ls-s-1)/std::pow(1.0+alpha,Ls-s), psi, s, 0);
}
} else if((sign == -1) && (dag == 0)) { // \Omega_{-}
for(int s=0; s<Ls; ++s){
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,s)/std::pow(1.0+alpha,s+1), psi, s, 0);
}
} else if((sign == 1 ) && (dag == 1)) { // \Omega_{+}^{\dagger}
for(int sp=0; sp<Ls; ++sp){
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,Ls-sp-1)/std::pow(1.0+alpha,Ls-sp), psi, 0, sp);
}
} else if((sign == -1) && (dag == 1)) { // \Omega_{-}^{\dagger}
for(int sp=0; sp<Ls; ++sp){
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,sp)/std::pow(1.0+alpha,sp+1), psi, 0, sp);
}
}
}
// This is the operator relating the usual Ddwf to TWQCD's EOFA Dirac operator (arXiv:1706.05843, Eqn. 6).
// It also relates the preconditioned and unpreconditioned systems described in Appendix B.2.
template<class Impl>
void MobiusEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
RealD b = 0.5 * ( 1.0 + this->alpha );
RealD c = 0.5 * ( 1.0 - this->alpha );
RealD mq1 = this->mq1;
for(int s=0; s<Ls; ++s){
if(s == 0) {
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
axpby_ssp_pplus (chi, 1.0, chi, mq1*c, psi, s, Ls-1);
} else if(s == (Ls-1)) {
axpby_ssp_pminus(chi, b, psi, mq1*c, psi, s, 0);
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
} else {
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
RealD m = this->mq1;
RealD c = 0.5 * this->alpha;
RealD d = 0.5;
RealD DtInv_p(0.0), DtInv_m(0.0);
RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls);
FermionField tmp(this->FermionGrid());
for(int s=0; s<Ls; ++s){
for(int sp=0; sp<Ls; ++sp){
DtInv_p = m * std::pow(-1.0,s-sp+1) * std::pow(c-d,Ls+s-sp) / std::pow(c+d,s-sp+1) / N;
DtInv_p += (s < sp) ? 0.0 : std::pow(-1.0,s-sp) * std::pow(c-d,s-sp) / std::pow(c+d,s-sp+1);
DtInv_m = m * std::pow(-1.0,sp-s+1) * std::pow(c-d,Ls+sp-s) / std::pow(c+d,sp-s+1) / N;
DtInv_m += (s > sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1);
if(sp == 0){
axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp);
axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp);
} else {
axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp);
axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp);
}
}}
}
/*****************************************************************************************************/
template<class Impl>
RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->Meooe5D(psi, Din);
this->DW(Din, chi, DaggerNo);
axpby(chi, 1.0, 1.0, chi, psi);
this->M5D(psi, chi);
return(norm2(chi));
}
template<class Impl>
RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->DW(psi, Din, DaggerYes);
this->MeooeDag5D(Din, chi);
this->M5Ddag(psi, chi);
axpby(chi, 1.0, 1.0, chi, psi);
return(norm2(chi));
}
/********************************************************************
* Performance critical fermion operators called inside the inverter
********************************************************************/
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5Ddag_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
// half checkerboard operations
template<class Impl>
void MobiusEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
// coefficients of Mooee
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] *= -this->mq1;
lower[0] *= -this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, psi, chi, lower, diag, upper); }
// fused M + shift operation
else { this->M5D_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
// coefficients of MooeeDag
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
if(s==0) {
upper[s] = -this->cee[s+1];
lower[s] = this->mq1*this->cee[Ls-1];
} else if(s==(Ls-1)) {
upper[s] = this->mq1*this->cee[0];
lower[s] = -this->cee[s-1];
} else {
upper[s] = -this->cee[s+1];
lower[s] = -this->cee[s-1];
}
}
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, psi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5Ddag_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
/****************************************************************************************/
// Computes coefficients for applying Cayley preconditioned shift operators
// (Mooee + \Delta) --> Mooee_shift
// (Mooee + \Delta)^{-1} --> MooeeInv_shift_lc, MooeeInv_shift_norm
// (Mooee + \Delta)^{-dag} --> MooeeInvDag_shift_lc, MooeeInvDag_shift_norm
// For the latter two cases, the operation takes the form
// [ (Mooee + \Delta)^{-1} \psi ]_{i} = Mooee_{ij} \psi_{j} +
// ( MooeeInv_shift_norm )_{i} ( \sum_{j} [ MooeeInv_shift_lc ]_{j} P_{pm} \psi_{j} )
template<class Impl>
void MobiusEOFAFermion<Impl>::SetCoefficientsPrecondShiftOps()
{
int Ls = this->Ls;
int pm = this->pm;
RealD alpha = this->alpha;
RealD k = this->k;
RealD mq1 = this->mq1;
RealD shift = this->shift;
// Initialize
Mooee_shift.resize(Ls);
MooeeInv_shift_lc.resize(Ls);
MooeeInv_shift_norm.resize(Ls);
MooeeInvDag_shift_lc.resize(Ls);
MooeeInvDag_shift_norm.resize(Ls);
// Construct Mooee_shift
int idx(0);
Coeff_t N = ( (pm == 1) ? 1.0 : -1.0 ) * (2.0*shift*k) *
( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
for(int s=0; s<Ls; ++s){
idx = (pm == 1) ? (s) : (Ls-1-s);
Mooee_shift[idx] = N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1);
}
// Tridiagonal solve for MooeeInvDag_shift_lc
{
Coeff_t m(0.0);
std::vector<Coeff_t> d = Mooee_shift;
std::vector<Coeff_t> u(Ls,0.0);
std::vector<Coeff_t> y(Ls,0.0);
std::vector<Coeff_t> q(Ls,0.0);
if(pm == 1){ u[0] = 1.0; }
else{ u[Ls-1] = 1.0; }
// Tridiagonal matrix algorithm + Sherman-Morrison formula
//
// We solve
// ( Mooee' + u \otimes v ) MooeeInvDag_shift_lc = Mooee_shift
// where Mooee' is the tridiagonal part of Mooee_{+}, and
// u = (1,0,...,0) and v = (0,...,0,mq1*cee[0]) are chosen
// so that the outer-product u \otimes v gives the (0,Ls-1)
// entry of Mooee_{+}.
//
// We do this as two solves: Mooee'*y = d and Mooee'*q = u,
// and then construct the solution to the original system
// MooeeInvDag_shift_lc = y - <v,y> / ( 1 + <v,q> ) q
if(pm == 1){
for(int s=1; s<Ls; ++s){
m = -this->cee[s] / this->bee[s-1];
d[s] -= m*d[s-1];
u[s] -= m*u[s-1];
}
}
y[Ls-1] = d[Ls-1] / this->bee[Ls-1];
q[Ls-1] = u[Ls-1] / this->bee[Ls-1];
for(int s=Ls-2; s>=0; --s){
if(pm == 1){
y[s] = d[s] / this->bee[s];
q[s] = u[s] / this->bee[s];
} else {
y[s] = ( d[s] + this->cee[s]*y[s+1] ) / this->bee[s];
q[s] = ( u[s] + this->cee[s]*q[s+1] ) / this->bee[s];
}
}
// Construct MooeeInvDag_shift_lc
for(int s=0; s<Ls; ++s){
if(pm == 1){
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[0]*y[Ls-1] /
(1.0+mq1*this->cee[0]*q[Ls-1]) * q[s];
} else {
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[Ls-1]*y[0] /
(1.0+mq1*this->cee[Ls-1]*q[0]) * q[s];
}
}
// Compute remaining coefficients
N = (pm == 1) ? (1.0 + MooeeInvDag_shift_lc[Ls-1]) : (1.0 + MooeeInvDag_shift_lc[0]);
for(int s=0; s<Ls; ++s){
// MooeeInv_shift_lc
if(pm == 1){ MooeeInv_shift_lc[s] = std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s); }
else{ MooeeInv_shift_lc[s] = std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s); }
// MooeeInv_shift_norm
MooeeInv_shift_norm[s] = -MooeeInvDag_shift_lc[s] /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N;
// MooeeInvDag_shift_norm
if(pm == 1){ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s) /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
else{ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s) /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
}
}
}
// Recompute coefficients for a different value of shift constant
template<class Impl>
void MobiusEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
{
this->shift = new_shift;
if(new_shift != 0.0){
SetCoefficientsPrecondShiftOps();
} else {
int Ls = this->Ls;
Mooee_shift.resize(Ls,0.0);
MooeeInv_shift_lc.resize(Ls,0.0);
MooeeInv_shift_norm.resize(Ls,0.0);
MooeeInvDag_shift_lc.resize(Ls,0.0);
MooeeInvDag_shift_norm.resize(Ls,0.0);
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
int Ls = this->Ls;
GridBase* grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0];
if(LLs == Ls){ return; } // Not vectorised in 5th direction
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0; s<Ls; s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->mq1*this->cee[0];
Pminus(Ls-1,0) = this->mq1*this->cee[Ls-1];
if(this->shift != 0.0){
RealD c = 0.5 * this->alpha;
RealD d = 0.5;
RealD N = this->shift * this->k * ( std::pow(c+d,Ls) + this->mq1*std::pow(c-d,Ls) );
if(this->pm == 1) {
for(int s=0; s<Ls; ++s){
Pplus(s,Ls-1) += N * std::pow(-1.0,s) * std::pow(c-d,s) / std::pow(c+d,Ls+s+1);
}
} else {
for(int s=0; s<Ls; ++s){
Pminus(s,0) += N * std::pow(-1.0,s+1) * std::pow(c-d,Ls-1-s) / std::pow(c+d,2*Ls-s);
}
}
}
Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXcd PminusMat;
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
typedef typename SiteHalfSpinor::scalar_type scalar_type;
const int Nsimd = Simd::Nsimd();
Matp.resize(Ls*LLs);
Matm.resize(Ls*LLs);
for(int s2=0; s2<Ls; s2++){
for(int s1=0; s1<LLs; s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type*) &Vp;
scalar_type *sm = (scalar_type*) &Vm;
for(int l=0; l<Nsimd; l++){
if(switcheroo<Coeff_t>::iscomplex()) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
}
}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
}
FermOpTemplateInstantiate(MobiusEOFAFermion);
GparityFermOpTemplateInstantiate(MobiusEOFAFermion);
}}

View File

@ -26,108 +26,79 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef GRID_QCD_MOBIUS_EOFA_FERMION_H
#define GRID_QCD_MOBIUS_EOFA_FERMION_H
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Impl>
class MobiusEOFAFermion : public AbstractEOFAFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class MobiusEOFAFermion : public AbstractEOFAFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Shift operator coefficients for red-black preconditioned Mobius EOFA
std::vector<Coeff_t> Mooee_shift;
std::vector<Coeff_t> MooeeInv_shift_lc;
std::vector<Coeff_t> MooeeInv_shift_norm;
std::vector<Coeff_t> MooeeInvDag_shift_lc;
std::vector<Coeff_t> MooeeInvDag_shift_norm;
public:
// Shift operator coefficients for red-black preconditioned Mobius EOFA
Vector<Coeff_t> Mooee_shift;
Vector<Coeff_t> MooeeInv_shift_lc;
Vector<Coeff_t> MooeeInv_shift_norm;
Vector<Coeff_t> MooeeInvDag_shift_lc;
Vector<Coeff_t> MooeeInvDag_shift_norm;
virtual void Instantiatable(void) {};
virtual void Instantiatable(void) {};
// EOFA-specific operations
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
virtual void Dtilde (const FermionField& in, FermionField& out);
virtual void DtildeInv (const FermionField& in, FermionField& out);
// EOFA-specific operations
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
virtual void Dtilde (const FermionField& in, FermionField& out);
virtual void DtildeInv (const FermionField& in, FermionField& out);
// override multiply
virtual RealD M (const FermionField& in, FermionField& out);
virtual RealD Mdag (const FermionField& in, FermionField& out);
// override multiply
virtual RealD M (const FermionField& in, FermionField& out);
virtual RealD Mdag (const FermionField& in, FermionField& out);
// half checkerboard operations
virtual void Mooee (const FermionField& in, FermionField& out);
virtual void MooeeDag (const FermionField& in, FermionField& out);
virtual void MooeeInv (const FermionField& in, FermionField& out);
virtual void MooeeInv_shift (const FermionField& in, FermionField& out);
virtual void MooeeInvDag (const FermionField& in, FermionField& out);
virtual void MooeeInvDag_shift(const FermionField& in, FermionField& out);
// half checkerboard operations
virtual void Mooee (const FermionField& in, FermionField& out);
virtual void MooeeDag (const FermionField& in, FermionField& out);
virtual void MooeeInv (const FermionField& in, FermionField& out);
virtual void MooeeInv_shift (const FermionField& in, FermionField& out);
virtual void MooeeInvDag (const FermionField& in, FermionField& out);
virtual void MooeeInvDag_shift(const FermionField& in, FermionField& out);
virtual void M5D (const FermionField& psi, FermionField& chi);
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
virtual void M5D (const FermionField& psi, FermionField& chi);
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper);
void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs);
void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper,
Vector<Coeff_t>& shift_coeffs);
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper);
void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs);
void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper,
Vector<Coeff_t>& shift_coeffs);
void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
virtual void RefreshShiftCoefficients(RealD new_shift);
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
// Constructors
MobiusEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams());
void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
protected:
void SetCoefficientsPrecondShiftOps(void);
};
void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
virtual void RefreshShiftCoefficients(RealD new_shift);
// Constructors
MobiusEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams());
protected:
void SetCoefficientsPrecondShiftOps(void);
};
}}
#define INSTANTIATE_DPERP_MOBIUS_EOFA(A)\
template void MobiusEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void MobiusEOFAFermion<A>::M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
template void MobiusEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void MobiusEOFAFermion<A>::M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
template void MobiusEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
template void MobiusEOFAFermion<A>::MooeeInv_shift(const FermionField& psi, FermionField& chi); \
template void MobiusEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi); \
template void MobiusEOFAFermion<A>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi);
#undef MOBIUS_EOFA_DPERP_DENSE
#define MOBIUS_EOFA_DPERP_CACHE
#undef MOBIUS_EOFA_DPERP_LINALG
#define MOBIUS_EOFA_DPERP_VEC
NAMESPACE_END(Grid);
#endif

View File

@ -1,429 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
int Ls = this->Ls;
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0];
if(s==0){
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5m(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0];
if(s==0){
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5m(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); }
else{ spProj5m(tmp, psi._odata[ss+shift_s]); }
chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
int Ls = this->Ls;
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){
if(s==0) {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5p(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
chi[ss+Ls-1] = zero;
auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){
if(s==0) {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5p(tmp, psi._odata[ss+0]);
chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+s]); }
else{ spProj5m(tmp, psi._odata[ss+s]); }
chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
{
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
// Apply (L^{\prime})^{-1}
chi[ss] = psi[ss]; // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp;
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp;
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
{
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
auto tmp2_spProj = psi._odata[0];
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
chi[ss] = psi[ss]; // chi[0]=psi[0]
tmp2 = MooeeInv_shift_lc[0]*psi[ss];
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s];
}
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else{ spProj5m(tmp2_spProj, tmp2); }
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1;
}
// chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
// Apply U^{-1} and add shift term
for(int s=Ls-2; s>=0; s--){
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
spProj5m(tmp1, chi[ss+s]);
chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
// Apply (U^{\prime})^{-dag}
chi[ss] = psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp;
}
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp;
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
{
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
auto tmp2_spProj = psi._odata[0];
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
chi[ss] = psi[ss];
tmp2 = MooeeInvDag_shift_lc[0]*psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp1;
tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s];
}
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else{ spProj5m(tmp2_spProj, tmp2); }
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp1;
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
spProj5p(tmp1, chi[ss+Ls-1]);
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj;
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1;
spProj5p(tmp1, chi[ss+s]);
chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj;
}
}
this->MooeeInvTime += usecond();
}
#ifdef MOBIUS_EOFA_DPERP_CACHE
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -1,184 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
int pm = this->pm;
RealD shift = this->shift;
RealD alpha = this->alpha;
RealD k = this->k;
RealD mq1 = this->mq1;
chi.checkerboard = psi.checkerboard;
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = mq1*this->cee[0];
Pminus(Ls-1,0) = mq1*this->cee[Ls-1];
if(shift != 0.0){
Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
for(int s=0; s<Ls; ++s){
if(pm == 1){ Pplus(s,Ls-1) += shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
else{ Pminus(Ls-1-s,Ls-1) -= shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
}
}
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if(inv){
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
for(auto site=0; site<vol; site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = zero;
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
}
if(PminusMat(s1,s2) != 0.0){
spProj5m(SitePminus, psi[lex2]);
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
}
#ifdef MOBIUS_EOFA_DPERP_DENSE
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
template void MobiusEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
template void MobiusEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -1,290 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
}
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0);
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
}
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
// Apply U^{-1} and add shift term
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0]
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0);
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
// Apply L^{-dagger} and add shift
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
}
}
#ifdef MOBIUS_EOFA_DPERP_LINALG
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -1,983 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionvec.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
assert(Nc == 3);
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5m(hp, psi[ss+vp]);
spProj5p(hm, psi[ss+vm]);
if (vp <= v){ rotate(hp, hp, 1); }
if (vm >= v){ rotate(hm, hm, nsimd-1); }
hp = 0.5*hp;
hm = 0.5*hm;
spRecon5m(fp, hp);
spRecon5p(fm, hm);
chi[ss+v] = d[v]*phi[ss+v];
chi[ss+v] = chi[ss+v] + u[v]*fp;
chi[ss+v] = chi[ss+v] + l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if(vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
#if 0
this->M5D(psi, phi, chi, lower, diag, upper);
// FIXME: possible gain from vectorizing shift operation as well?
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
}
#else
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
Vector<iSinglet<Simd>> s(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
scalar_type* s_p = (scalar_type*) &s[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
s_p[ss] = shift_coeffs[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
assert(Nc == 3);
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
int vs = (this->pm == 1) ? LLs-1 : 0;
Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(2)(0) : psi[ss+vs]()(0)(0);
Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(2)(1) : psi[ss+vs]()(0)(1);
Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(2)(2) : psi[ss+vs]()(0)(2);
Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(3)(0) : psi[ss+vs]()(1)(0);
Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(3)(1) : psi[ss+vs]()(1)(1);
Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(3)(2) : psi[ss+vs]()(1)(2);
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if(vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(this->pm == 1 && vs <= v){
hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
if(this->pm == -1 && vs >= v){
hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
}
this->M5Dtime += usecond();
#endif
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5p(hp, psi[ss+vp]);
spProj5m(hm, psi[ss+vm]);
if(vp <= v){ rotate(hp, hp, 1); }
if(vm >= v){ rotate(hm, hm, nsimd-1); }
hp = hp*0.5;
hm = hm*0.5;
spRecon5p(fp, hp);
spRecon5m(fm, hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if (vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
#if 0
this->M5Ddag(psi, phi, chi, lower, diag, upper);
// FIXME: possible gain from vectorizing shift operation as well?
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
}
#else
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
Vector<iSinglet<Simd>> s(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
scalar_type* s_p = (scalar_type*) &s[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
s_p[ss] = shift_coeffs[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
int vs = (this->pm == 1) ? LLs-1 : 0;
Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(0)(0) : psi[ss+vs]()(2)(0);
Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(0)(1) : psi[ss+vs]()(2)(1);
Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(0)(2) : psi[ss+vs]()(2)(2);
Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(1)(0) : psi[ss+vs]()(3)(0);
Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(1)(1) : psi[ss+vs]()(3)(1);
Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(1)(2) : psi[ss+vs]()(3)(2);
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if (vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(this->pm == 1 && vs <= v){
hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
if(this->pm == -1 && vs >= v){
hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
}
Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
}
this->M5Dtime += usecond();
#endif
}
#ifdef AVX512
#include<simd/Intel512common.h>
#include<simd/Intel512avx.h>
#include<simd/Intel512single.h>
#endif
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
int s = s2 + l*LLs;
int lex = s2 + LLs*site;
if( s2==0 && l==0 ){
SiteChiP=zero;
SiteChiM=zero;
}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
}}
}}
{
int lex = s1 + LLs*site;
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %%zmm1
#define Chi_01 %%zmm2
#define Chi_02 %%zmm3
#define Chi_10 %%zmm4
#define Chi_11 %%zmm5
#define Chi_12 %%zmm6
#define Chi_20 %%zmm7
#define Chi_21 %%zmm8
#define Chi_22 %%zmm9
#define Chi_30 %%zmm10
#define Chi_31 %%zmm11
#define Chi_32 %%zmm12
#define BCAST0 %%zmm13
#define BCAST1 %%zmm14
#define BCAST2 %%zmm15
#define BCAST3 %%zmm16
#define BCAST4 %%zmm17
#define BCAST5 %%zmm18
#define BCAST6 %%zmm19
#define BCAST7 %%zmm20
#define BCAST8 %%zmm21
#define BCAST9 %%zmm22
#define BCAST10 %%zmm23
#define BCAST11 %%zmm24
int incr = LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
int lex = s2 + LLs*site;
uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t) &psi[lex];
for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
if((s2+l)==0) {
asm(
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
VBCASTCDUP(0,%2,BCAST0)
VBCASTCDUP(1,%2,BCAST1)
VBCASTCDUP(2,%2,BCAST2)
VBCASTCDUP(3,%2,BCAST3)
VBCASTCDUP(4,%2,BCAST4) VMULMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(5,%2,BCAST5) VMULMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(6,%2,BCAST6) VMULMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(7,%2,BCAST7) VMULMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(8,%2,BCAST8) VMULMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(9,%2,BCAST9) VMULMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(10,%2,BCAST10) VMULMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(11,%2,BCAST11) VMULMEM(0,%1,BCAST7,Chi_21)
VMULMEM(0,%1,BCAST8,Chi_22)
VMULMEM(0,%1,BCAST9,Chi_30)
VMULMEM(0,%1,BCAST10,Chi_31)
VMULMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
} else {
asm(
VBCASTCDUP(0,%2,BCAST0) VMADDMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(1,%2,BCAST1) VMADDMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(2,%2,BCAST2) VMADDMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(3,%2,BCAST3) VMADDMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(4,%2,BCAST4) VMADDMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(5,%2,BCAST5) VMADDMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(6,%2,BCAST6) VMADDMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(7,%2,BCAST7) VMADDMEM(0,%1,BCAST7,Chi_21)
VBCASTCDUP(8,%2,BCAST8) VMADDMEM(0,%1,BCAST8,Chi_22)
VBCASTCDUP(9,%2,BCAST9) VMADDMEM(0,%1,BCAST9,Chi_30)
VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
}
a0 = a0 + incr;
a1 = a1 + incr;
a2 = a2 + sizeof(typename Simd::scalar_type);
}
}
{
int lexa = s1+LLs*site;
asm (
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
// Z-mobius version
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
exit(-1);
};
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard = psi.checkerboard;
Vector<iSinglet<Simd>> Matp;
Vector<iSinglet<Simd>> Matm;
Vector<iSinglet<Simd>>* _Matp;
Vector<iSinglet<Simd>>* _Matm;
// MooeeInternalCompute(dag,inv,Matp,Matm);
if(inv && dag){
_Matp = &this->MatpInvDag;
_Matm = &this->MatmInvDag;
}
if(inv && (!dag)){
_Matp = &this->MatpInv;
_Matm = &this->MatmInv;
}
if(!inv){
MooeeInternalCompute(dag, inv, Matp, Matm);
_Matp = &Matp;
_Matm = &Matm;
}
assert(_Matp->size() == Ls*LLs);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
if(switcheroo<Coeff_t>::iscomplex()){
parallel_for(auto site=0; site<vol; site++){
MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
} else {
parallel_for(auto site=0; site<vol; site++){
MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
}
this->MooeeInvTime += usecond();
}
#ifdef MOBIUS_EOFA_DPERP_VEC
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplFH);
template void MobiusEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,57 +24,54 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_MOBIUS_FERMION_H
#define GRID_QCD_MOBIUS_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class MobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class MobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
MobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD b, RealD c,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void) {};
// Constructors
MobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD b, RealD c,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
{
RealD eps = 1.0;
std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Tanh approx"<<std::endl;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls);
std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Tanh approx"<<std::endl;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls);
// Call base setter
this->SetCoefficientsTanh(zdata,b,c);
// Call base setter
this->SetCoefficientsTanh(zdata,b,c);
Approx::zolotarev_free(zdata);
Approx::zolotarev_free(zdata);
}
};
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,58 +24,55 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
#define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class MobiusZolotarevFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class MobiusZolotarevFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
MobiusZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD b, RealD c,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void) {};
// Constructors
MobiusZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD b, RealD c,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = lo/hi;
{
RealD eps = lo/hi;
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,this->Ls,0);
assert(zdata->n==this->Ls);
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,this->Ls,0);
assert(zdata->n==this->Ls);
std::cout<<GridLogMessage << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
std::cout<<GridLogMessage << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
// Call base setter
this->SetCoefficientsZolotarev(hi,zdata,b,c);
// Call base setter
this->SetCoefficientsZolotarev(hi,zdata,b,c);
Approx::zolotarev_free(zdata);
}
};
Approx::zolotarev_free(zdata);
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,46 +24,44 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonCayleyTanhFermion : public MobiusFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class OverlapWilsonCayleyTanhFermion : public MobiusFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
this->MomentumSpacePropagatorHw(out,in,_m,twist);
};
};
// Constructors
OverlapWilsonCayleyTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
// Constructors
OverlapWilsonCayleyTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
// b+c=scale, b-c = 0 <=> b =c = scale/2
MobiusFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale,p)
{
}
};
// b+c=scale, b-c = 0 <=> b =c = scale/2
MobiusFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale,p)
{
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,45 +24,42 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
#define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Constructors
// Constructors
OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
// b+c=1.0, b-c = 0 <=> b =c = 1/2
MobiusZolotarevFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi,p)
OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
// b+c=1.0, b-c = 0 <=> b =c = 1/2
MobiusZolotarevFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi,p)
{}
{}
};
};
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,48 +24,47 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
#define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonContFracTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonContFracTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
// b+c=scale, b-c = 0 <=> b =c = scale/2
ContinuedFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls-1;// Even rational order
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
this->SetCoefficientsTanh(zdata,scale);
Approx::zolotarev_free(zdata);
}
};
// b+c=scale, b-c = 0 <=> b =c = scale/2
ContinuedFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls-1;// Even rational order
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
this->SetCoefficientsTanh(zdata,scale);
Approx::zolotarev_free(zdata);
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,51 +24,49 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
#define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
// b+c=scale, b-c = 0 <=> b =c = scale/2
ContinuedFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
// b+c=scale, b-c = 0 <=> b =c = scale/2
ContinuedFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls;// Odd rational order
RealD eps = lo/hi;
int nrational=this->Ls;// Odd rational order
RealD eps = lo/hi;
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
this->SetCoefficientsZolotarev(hi,zdata);
Approx::zolotarev_free(zdata);
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
this->SetCoefficientsZolotarev(hi,zdata);
Approx::zolotarev_free(zdata);
}
};
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,48 +24,46 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
#define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
// b+c=scale, b-c = 0 <=> b =c = scale/2
PartialFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls-1;// Even rational order
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
this->SetCoefficientsTanh(zdata,scale);
Approx::zolotarev_free(zdata);
}
};
// b+c=scale, b-c = 0 <=> b =c = scale/2
PartialFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls-1;// Even rational order
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
this->SetCoefficientsTanh(zdata,scale);
Approx::zolotarev_free(zdata);
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,51 +24,50 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
#define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
// b+c=scale, b-c = 0 <=> b =c = scale/2
PartialFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
// b+c=scale, b-c = 0 <=> b =c = scale/2
PartialFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls;// Odd rational order
RealD eps = lo/hi;
int nrational=this->Ls;// Odd rational order
RealD eps = lo/hi;
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
this->SetCoefficientsZolotarev(hi,zdata);
Approx::zolotarev_free(zdata);
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
this->SetCoefficientsZolotarev(hi,zdata);
Approx::zolotarev_free(zdata);
}
};
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,459 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h>
namespace Grid {
namespace QCD {
template<class Impl>
void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
// this does both dag and undag but is trivial; make a common helper routing
int sign = 1;
int Ls = this->Ls;
this->DhopDir(psi,chi,dir,disp);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(chi,-scale,chi,0.0,chi,s,s);
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
}
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
{
int Ls = this->Ls;
int sign = dag ? (-1) : 1;
if ( psi.checkerboard == Odd ) {
this->DhopEO(psi,chi,DaggerNo);
} else {
this->DhopOE(psi,chi,DaggerNo);
}
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(chi,-scale,chi,0.0,chi,s,s);
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
}
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Mooee_internal(const FermionField &psi, FermionField &chi,int dag)
{
// again dag and undag are trivially related
int sign = dag ? (-1) : 1;
int Ls = this->Ls;
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
// Do each 2x2 block aligned at s and multiplies Dw site diagonal by G5 so Hw
ag5xpby_ssp(chi,-dw_diag*scale,psi,amax*sqrt(qq)*scale,psi, s ,s+1);
ag5xpby_ssp(chi, dw_diag*scale,psi,amax*sqrt(qq)*scale,psi, s+1,s);
axpby_ssp (chi, 1.0, chi,sqrt(amax*pp)*scale*sign,psi,s+1,Ls-1);
}
{
RealD R=(1+mass)/(1-mass);
//R g5 psi[Ls-1] + p[0] H
ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale*dw_diag/amax,psi,Ls-1,Ls-1);
for(int b=0;b<nblock;b++){
int s = 2*b+1;
RealD pp = p[nblock-1-b];
axpby_ssp(chi,1.0,chi,-sqrt(amax*pp)*scale*sign,psi,Ls-1,s);
}
}
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInv_internal(const FermionField &psi, FermionField &chi,int dag)
{
int sign = dag ? (-1) : 1;
int Ls = this->Ls;
FermionField tmp(psi._grid);
///////////////////////////////////////////////////////////////////////////////////////
//Linv
///////////////////////////////////////////////////////////////////////////////////////
int nblock=(Ls-1)/2;
axpy(chi,0.0,psi,psi); // Identity piece
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=sign*sqrt(amax*amax*amax*pp*qq) / ( dw_diag*dw_diag + amax*amax* qq);
RealD coeff2=sign*sqrt(amax*pp)*dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
axpby_ssp (chi,1.0,chi,coeff1,psi,Ls-1,s);
axpbg5y_ssp(chi,1.0,chi,coeff2,psi,Ls-1,s+1);
}
///////////////////////////////////////////////////////////////////////////////////////
//Dinv (note D isn't really diagonal -- just diagonal enough that we can still invert)
// Compute Seeinv (coeff of gamma5)
///////////////////////////////////////////////////////////////////////////////////////
RealD R=(1+mass)/(1-mass);
RealD Seeinv = R + p[nblock]*dw_diag/amax;
for(int b=0;b<nblock;b++){
Seeinv += p[nblock-1-b]*dw_diag/amax / ( dw_diag*dw_diag/amax/amax + q[nblock-1-b]);
}
Seeinv = 1.0/Seeinv;
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
RealD coeff2=amax*sqrt(qq) / ( dw_diag*dw_diag + amax*amax* qq);
ag5xpby_ssp (tmp,-coeff1,chi,coeff2,chi,s,s+1);
ag5xpby_ssp (tmp, coeff1,chi,coeff2,chi,s+1,s);
}
ag5xpby_ssp (tmp, Seeinv,chi,0.0,chi,Ls-1,Ls-1);
///////////////////////////////////////////////////////////////////////////////////////
// Uinv
///////////////////////////////////////////////////////////////////////////////////////
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=-sign*sqrt(amax*amax*amax*pp*qq) / ( dw_diag*dw_diag + amax*amax* qq);
RealD coeff2=-sign*sqrt(amax*pp)*dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
axpby_ssp (chi,1.0/scale,tmp,coeff1/scale,tmp,s,Ls-1);
axpbg5y_ssp(chi,1.0/scale,tmp,coeff2/scale,tmp,s+1,Ls-1);
}
axpby_ssp (chi, 1.0/scale,tmp,0.0,tmp,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::M_internal(const FermionField &psi, FermionField &chi,int dag)
{
FermionField D(psi._grid);
int Ls = this->Ls;
int sign = dag ? (-1) : 1;
// For partial frac Hw case (b5=c5=1) chroma quirkily computes
//
// Conventions for partfrac appear to be a mess.
// Tony's Nara lectures have
//
// BlockDiag( H/p_i 1 | 1 )
// ( 1 p_i H / q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R +p0 H )
//
//Chroma ( -2H 2sqrt(q_i) | 0 )
// (2 sqrt(q_i) 2H | 2 sqrt(p_i) )
// ---------------------------------
// ( 0 -2 sqrt(p_i) | 2 R gamma_5 + p0 2H
//
// Edwards/Joo/Kennedy/Wenger
//
// Here, the "beta's" selected by chroma to scale the unphysical bulk constraint fields
// incorporate the approx scale factor. This is obtained by propagating the
// scale on "H" out to the off diagonal elements as follows:
//
// BlockDiag( H/p_i 1 | 1 )
// ( 1 p_i H / q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R + p_0 H )
//
// becomes:
// BlockDiag( H/ sp_i 1 | 1 )
// ( 1 sp_i H / s^2q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R + p_0/s H )
//
//
// This is implemented in Chroma by
// p0' = p0/approxMax
// p_i' = p_i*approxMax
// q_i' = q_i*approxMax*approxMax
//
// After the equivalence transform is applied the matrix becomes
//
//Chroma ( -2H sqrt(q'_i) | 0 )
// (sqrt(q'_i) 2H | sqrt(p'_i) )
// ---------------------------------
// ( 0 -sqrt(p'_i) | 2 R gamma_5 + p'0 2H
//
// = ( -2H sqrt(q_i)amax | 0 )
// (sqrt(q_i)amax 2H | sqrt(p_i*amax) )
// ---------------------------------
// ( 0 -sqrt(p_i)*amax | 2 R gamma_5 + p0/amax 2H
//
this->DW(psi,D,DaggerNo);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
double pp = p[nblock-1-b];
double qq = q[nblock-1-b];
// Do each 2x2 block aligned at s and
ag5xpby_ssp(chi,-1.0*scale,D,amax*sqrt(qq)*scale,psi, s ,s+1); // Multiplies Dw by G5 so Hw
ag5xpby_ssp(chi, 1.0*scale,D,amax*sqrt(qq)*scale,psi, s+1,s);
// Pick up last column
axpby_ssp (chi, 1.0, chi,sqrt(amax*pp)*scale*sign,psi,s+1,Ls-1);
}
{
double R=(1+this->mass)/(1-this->mass);
//R g5 psi[Ls] + p[0] H
ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale/amax,D,Ls-1,Ls-1);
for(int b=0;b<nblock;b++){
int s = 2*b+1;
double pp = p[nblock-1-b];
axpby_ssp(chi,1.0,chi,-sqrt(amax*pp)*scale*sign,psi,Ls-1,s);
}
}
}
template<class Impl>
RealD PartialFractionFermion5D<Impl>::M (const FermionField &in, FermionField &out)
{
M_internal(in,out,DaggerNo);
return norm2(out);
}
template<class Impl>
RealD PartialFractionFermion5D<Impl>::Mdag (const FermionField &in, FermionField &out)
{
M_internal(in,out,DaggerYes);
return norm2(out);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe (const FermionField &in, FermionField &out)
{
Meooe_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MeooeDag (const FermionField &in, FermionField &out)
{
Meooe_internal(in,out,DaggerYes);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Mooee (const FermionField &in, FermionField &out)
{
Mooee_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeDag (const FermionField &in, FermionField &out)
{
Mooee_internal(in,out,DaggerYes);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInv (const FermionField &in, FermionField &out)
{
MooeeInv_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInvDag (const FermionField &in, FermionField &out)
{
MooeeInv_internal(in,out,DaggerYes);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void PartialFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDeriv(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDerivOE(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDerivEO(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){
SetCoefficientsZolotarev(1.0/scale,zdata);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){
// check on degree matching
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
int Ls = this->Ls;
assert(Ls == (2*zdata->da -1) );
// Part frac
// RealD R;
R=(1+mass)/(1-mass);
dw_diag = (4.0-this->M5);
// std::vector<RealD> p;
// std::vector<RealD> q;
p.resize(zdata->da);
q.resize(zdata->dd);
for(int n=0;n<zdata->da;n++){
p[n] = zdata -> alpha[n];
}
for(int n=0;n<zdata->dd;n++){
q[n] = -zdata -> ap[n];
}
scale= part_frac_chroma_convention ? 2.0 : 1.0; // Chroma conventions annoy me
amax=zolo_hi;
}
template<class Impl>
void PartialFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
conformable(solution5d._grid,this->FermionGrid());
conformable(exported4d._grid,this->GaugeGrid());
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
int Ls = this->Ls;
conformable(imported5d._grid,this->FermionGrid());
conformable(input4d._grid ,this->GaugeGrid());
FermionField tmp(this->FermionGrid());
tmp=zero;
InsertSlice(input4d, tmp, Ls-1, Ls-1);
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
this->Dminus(tmp,imported5d);
}
// Constructors
template<class Impl>
PartialFractionFermion5D<Impl>::PartialFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,
const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5,p),
mass(_mass)
{
int Ls = this->Ls;
assert((Ls&0x1)==1); // Odd Ls required
int nrational=Ls-1;
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);
// NB: chroma uses a cast to "float" for the zolotarev range(!?).
// this creates a real difference in the operator which I do not like but we can replicate here
// to demonstrate compatibility
// RealD eps = (zolo_lo / zolo_hi);
// zdata = bfm_zolotarev(eps,nrational,0);
SetCoefficientsTanh(zdata,1.0);
Approx::zolotarev_free(zdata);
}
FermOpTemplateInstantiate(PartialFractionFermion5D);
}
}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,51 +24,49 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_PARTIAL_FRACTION_H
#define GRID_QCD_PARTIAL_FRACTION_H
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class PartialFractionFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class PartialFractionFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
const int part_frac_chroma_convention=1;
const int part_frac_chroma_convention=1;
void Meooe_internal(const FermionField &in, FermionField &out,int dag);
void Mooee_internal(const FermionField &in, FermionField &out,int dag);
void MooeeInv_internal(const FermionField &in, FermionField &out,int dag);
void M_internal(const FermionField &in, FermionField &out,int dag);
void Meooe_internal(const FermionField &in, FermionField &out,int dag);
void Mooee_internal(const FermionField &in, FermionField &out,int dag);
void MooeeInv_internal(const FermionField &in, FermionField &out,int dag);
void M_internal(const FermionField &in, FermionField &out,int dag);
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void Instantiatable(void) =0; // ensure no make-eee
virtual void Instantiatable(void) =0; // ensure no make-eee
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
@ -76,32 +74,30 @@ namespace Grid {
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
// Constructors
PartialFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
// Constructors
PartialFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
protected:
protected:
virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
virtual void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);
virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
virtual void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);
// Part frac
RealD mass;
RealD dw_diag;
RealD R;
RealD amax;
RealD scale;
std::vector<double> p;
std::vector<double> q;
// Part frac
RealD mass;
RealD dw_diag;
RealD R;
RealD amax;
RealD scale;
Vector<double> p;
Vector<double> q;
};
};
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -27,8 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Field>
class PauliVillarsSolverUnprec
@ -90,6 +89,4 @@ class PauliVillarsSolverFourierAccel
};
};
}
}
NAMESPACE_END(Grid);

View File

@ -27,8 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Field,class PVinverter> class Reconstruct5DfromPhysical {
private:
@ -131,5 +130,5 @@ template<class Field,class PVinverter> class Reconstruct5DfromPhysical {
}
};
}
}
NAMESPACE_END(Grid);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,46 +24,43 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H
#define GRID_QCD_SCALED_SHAMIR_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class ScaledShamirFermion : public MobiusFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class ScaledShamirFermion : public MobiusFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
// Constructors
ScaledShamirFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
// RealD scale):
RealD scale,const ImplParams &p= ImplParams()) :
// Constructors
ScaledShamirFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
// RealD scale):
RealD scale,const ImplParams &p= ImplParams()) :
// b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1
MobiusFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0),p)
// FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0))
{
}
};
// b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1
MobiusFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0),p)
// FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0))
{
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,40 +24,40 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef _SCHUR_DIAG_TWO_KAPPA_H
#define _SCHUR_DIAG_TWO_KAPPA_H
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
NAMESPACE_BEGIN(Grid);
// This is specific to (Z)mobius fermions
template<class Matrix, class Field>
class KappaSimilarityTransform {
public:
INHERIT_IMPL_TYPES(Matrix);
std::vector<Coeff_t> kappa, kappaDag, kappaInv, kappaInvDag;
// This is specific to (Z)mobius fermions
template<class Matrix, class Field>
class KappaSimilarityTransform {
public:
INHERIT_IMPL_TYPES(Matrix);
Vector<Coeff_t> kappa, kappaDag, kappaInv, kappaInvDag;
KappaSimilarityTransform (Matrix &zmob) {
for (int i=0;i<(int)zmob.bs.size();i++) {
Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) );
kappa.push_back( k );
kappaDag.push_back( conj(k) );
kappaInv.push_back( 1.0 / k );
kappaInvDag.push_back( 1.0 / conj(k) );
}
KappaSimilarityTransform (Matrix &zmob) {
for (int i=0;i<(int)zmob.bs.size();i++) {
Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) );
kappa.push_back( k );
kappaDag.push_back( conj(k) );
kappaInv.push_back( 1.0 / k );
kappaInvDag.push_back( 1.0 / conj(k) );
}
}
template<typename vobj>
void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) {
GridBase *grid=out._grid;
out.checkerboard = in.checkerboard;
void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) {
GridBase *grid=out.Grid();
out.Checkerboard() = in.Checkerboard();
assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now
int Ls = grid->_rdimensions[0];
parallel_for(int ss=0;ss<grid->oSites();ss++){
vobj tmp = s[ss % Ls]*in._odata[ss];
vstream(out._odata[ss],tmp);
}
thread_for(ss, grid->oSites(),
{
vobj tmp = s[ss % Ls]*in[ss];
vstream(out[ss],tmp);
});
}
RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) {
@ -70,33 +70,33 @@ namespace Grid {
virtual RealD MInv (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);}
virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);}
};
};
template<class Matrix,class Field>
class SchurDiagTwoKappaOperator : public SchurOperatorBase<Field> {
public:
KappaSimilarityTransform<Matrix, Field> _S;
SchurDiagTwoOperator<Matrix, Field> _Mat;
template<class Matrix,class Field>
class SchurDiagTwoKappaOperator : public SchurOperatorBase<Field> {
public:
KappaSimilarityTransform<Matrix, Field> _S;
SchurDiagTwoOperator<Matrix, Field> _Mat;
SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {};
SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {};
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid);
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in.Grid());
_S.MInv(in,out);
_Mat.Mpc(out,tmp);
return _S.M(tmp,out);
_S.MInv(in,out);
_Mat.Mpc(out,tmp);
return _S.M(tmp,out);
}
virtual RealD MpcDag (const Field &in, Field &out){
Field tmp(in._grid);
}
virtual RealD MpcDag (const Field &in, Field &out){
Field tmp(in.Grid());
_S.MDag(in,out);
_Mat.MpcDag(out,tmp);
return _S.MInvDag(tmp,out);
}
};
_S.MDag(in,out);
_Mat.MpcDag(out,tmp);
return _S.MInvDag(tmp,out);
}
};
NAMESPACE_END(Grid);
}
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,46 +24,43 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
#define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class ShamirZolotarevFermion : public MobiusZolotarevFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class ShamirZolotarevFermion : public MobiusZolotarevFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
// Constructors
// Constructors
ShamirZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
ShamirZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
// b+c = 1; b-c = 1 => b=1, c=0
MobiusZolotarevFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi,p)
// b+c = 1; b-c = 1 => b=1, c=0
MobiusZolotarevFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi,p)
{}
{}
};
};
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -0,0 +1,175 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template <class S, class Representation = FundamentalRepresentation >
class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > >
{
public:
typedef RealD _Coeff_t ;
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
static const bool LsVectorised=false;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
template <typename vtype> using iImplPropagator = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef iImplPropagator<Simd> SitePropagator;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef Lattice<SitePropagator> PropagatorField;
typedef StaggeredImplParams ImplParams;
typedef SimpleCompressor<SiteSpinor> Compressor;
typedef CartesianStencil<SiteSpinor, SiteSpinor, ImplParams> StencilImpl;
typedef typename StencilImpl::View_type StencilView;
ImplParams Params;
StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){};
static accelerator_inline void multLink(SiteSpinor &phi,
const SiteDoubledGaugeField &U,
const SiteSpinor &chi,
int mu)
{
mult(&phi(), &U(mu), &chi());
}
static accelerator_inline void multLinkAdd(SiteSpinor &phi,
const SiteDoubledGaugeField &U,
const SiteSpinor &chi,
int mu)
{
mac(&phi(), &U(mu), &chi());
}
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
reg = memory;
}
inline void InsertGaugeField(DoubledGaugeField &U_ds,
const GaugeLinkField &U,int mu)
{
PokeIndex<LorentzIndex>(U_ds, U, mu);
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &UUUds, // for Naik term
DoubledGaugeField &Uds,
const GaugeField &Uthin,
const GaugeField &Ufat) {
conformable(Uds.Grid(), GaugeGrid);
conformable(Uthin.Grid(), GaugeGrid);
conformable(Ufat.Grid(), GaugeGrid);
GaugeLinkField U(GaugeGrid);
GaugeLinkField UU(GaugeGrid);
GaugeLinkField UUU(GaugeGrid);
GaugeLinkField Udag(GaugeGrid);
GaugeLinkField UUUdag(GaugeGrid);
for (int mu = 0; mu < Nd; mu++) {
// Staggered Phase.
Lattice<iScalar<vInteger> > coor(GaugeGrid);
Lattice<iScalar<vInteger> > x(GaugeGrid); LatticeCoordinate(x,0);
Lattice<iScalar<vInteger> > y(GaugeGrid); LatticeCoordinate(y,1);
Lattice<iScalar<vInteger> > z(GaugeGrid); LatticeCoordinate(z,2);
Lattice<iScalar<vInteger> > t(GaugeGrid); LatticeCoordinate(t,3);
Lattice<iScalar<vInteger> > lin_z(GaugeGrid); lin_z=x+y;
Lattice<iScalar<vInteger> > lin_t(GaugeGrid); lin_t=x+y+z;
ComplexField phases(GaugeGrid); phases=1.0;
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
// 1 hop based on fat links
U = PeekIndex<LorentzIndex>(Ufat, mu);
Udag = adj( Cshift(U, mu, -1));
U = U *phases;
Udag = Udag *phases;
InsertGaugeField(Uds,U,mu);
InsertGaugeField(Uds,Udag,mu+4);
// PokeIndex<LorentzIndex>(Uds, U, mu);
// PokeIndex<LorentzIndex>(Uds, Udag, mu + 4);
// 3 hop based on thin links. Crazy huh ?
U = PeekIndex<LorentzIndex>(Uthin, mu);
UU = Gimpl::CovShiftForward(U,mu,U);
UUU= Gimpl::CovShiftForward(U,mu,UU);
UUUdag = adj( Cshift(UUU, mu, -3));
UUU = UUU *phases;
UUUdag = UUUdag *phases;
InsertGaugeField(UUUds,UUU,mu);
InsertGaugeField(UUUds,UUUdag,mu+4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
GaugeLinkField link(mat.Grid());
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
assert (0);
// Must never hit
}
};
typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec
typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float
typedef StaggeredImpl<vComplexD, FundamentalRepresentation > StaggeredImplD; // Double
NAMESPACE_END(Grid);

View File

@ -26,11 +26,9 @@ See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_STAGGERED_KERNELS_H
#define GRID_QCD_STAGGERED_KERNELS_H
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid)
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Staggered stencil for a single site.
@ -51,72 +49,69 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
public:
void DhopDir(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp);
void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
///////////////////////////////////////////////////////////////////////////////////////
// Generic Nc kernels
///////////////////////////////////////////////////////////////////////////////////////
void DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
void DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
void DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
///////////////////////////////////////////////////////////////////////////////////////
// Nc=3 specific kernels
///////////////////////////////////////////////////////////////////////////////////////
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
void DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
void DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
///////////////////////////////////////////////////////////////////////////////////////
// Asm Nc=3 specific kernels
///////////////////////////////////////////////////////////////////////////////////////
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
///////////////////////////////////////////////////////////////////////////////////////////////////
// Generic interface; fan out to right routine
///////////////////////////////////////////////////////////////////////////////////////////////////
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out, int interior=1,int exterior=1);
const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1);
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out, int interior=1,int exterior=1);
const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1);
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out, int dag, int interior,int exterior);
const FermionFieldView &in, FermionFieldView &out, int dag, int interior,int exterior);
public:
StaggeredKernels(const ImplParams &p = ImplParams());
};
}}
#endif
NAMESPACE_END(Grid);

View File

@ -0,0 +1,203 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template <class S, class Representation = FundamentalRepresentation >
class StaggeredVec5dImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
static const bool LsVectorised=true;
typedef RealD Coeff_t ;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nd>;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
template <typename vtype> using iImplPropagator = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
// Make the doubled gauge field a *scalar*
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
typedef iImplPropagator<Simd> SitePropagator;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef Lattice<SitePropagator> PropagatorField;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef Lattice<SiteSpinor> FermionField;
typedef StaggeredImplParams ImplParams;
typedef SimpleCompressor<SiteSpinor> Compressor;
typedef CartesianStencil<SiteSpinor, SiteSpinor, ImplParams> StencilImpl;
typedef typename StencilImpl::View_type StencilView;
ImplParams Params;
StaggeredVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
vsplat(reg, memory);
}
static accelerator_inline void multLink(SiteHalfSpinor &phi,
const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi,
int mu)
{
SiteGaugeLink UU;
for (int i = 0; i < Dimension; i++) {
for (int j = 0; j < Dimension; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
}
}
mult(&phi(), &UU(), &chi());
}
static accelerator_inline void multLinkAdd(SiteHalfSpinor &phi,
const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi,
int mu)
{
SiteGaugeLink UU;
for (int i = 0; i < Dimension; i++) {
for (int j = 0; j < Dimension; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
}
}
mac(&phi(), &UU(), &chi());
}
inline void InsertGaugeField(DoubledGaugeField &U_ds,const GaugeLinkField &U,int mu)
{
GridBase *GaugeGrid = U_ds.Grid();
thread_for(lidx, GaugeGrid->lSites(),{
SiteScalarGaugeLink ScalarU;
SiteDoubledGaugeField ScalarUds;
Coordinate lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
peekLocalSite(ScalarUds, U_ds, lcoor);
peekLocalSite(ScalarU, U, lcoor);
ScalarUds(mu) = ScalarU();
});
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &UUUds, // for Naik term
DoubledGaugeField &Uds,
const GaugeField &Uthin,
const GaugeField &Ufat)
{
GridBase * InputGrid = Uthin.Grid();
conformable(InputGrid,Ufat.Grid());
GaugeLinkField U(InputGrid);
GaugeLinkField UU(InputGrid);
GaugeLinkField UUU(InputGrid);
GaugeLinkField Udag(InputGrid);
GaugeLinkField UUUdag(InputGrid);
for (int mu = 0; mu < Nd; mu++) {
// Staggered Phase.
Lattice<iScalar<vInteger> > coor(InputGrid);
Lattice<iScalar<vInteger> > x(InputGrid); LatticeCoordinate(x,0);
Lattice<iScalar<vInteger> > y(InputGrid); LatticeCoordinate(y,1);
Lattice<iScalar<vInteger> > z(InputGrid); LatticeCoordinate(z,2);
Lattice<iScalar<vInteger> > t(InputGrid); LatticeCoordinate(t,3);
Lattice<iScalar<vInteger> > lin_z(InputGrid); lin_z=x+y;
Lattice<iScalar<vInteger> > lin_t(InputGrid); lin_t=x+y+z;
ComplexField phases(InputGrid); phases=1.0;
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
// 1 hop based on fat links
U = PeekIndex<LorentzIndex>(Ufat, mu);
Udag = adj( Cshift(U, mu, -1));
U = U *phases;
Udag = Udag *phases;
InsertGaugeField(Uds,U,mu);
InsertGaugeField(Uds,Udag,mu+4);
// 3 hop based on thin links. Crazy huh ?
U = PeekIndex<LorentzIndex>(Uthin, mu);
UU = Gimpl::CovShiftForward(U,mu,U);
UUU= Gimpl::CovShiftForward(U,mu,UU);
UUUdag = adj( Cshift(UUU, mu, -3));
UUU = UUU *phases;
UUUdag = UUUdag *phases;
InsertGaugeField(UUUds,UUU,mu);
InsertGaugeField(UUUds,UUUdag,mu+4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
assert(0);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
assert (0);
}
};
typedef StaggeredVec5dImpl<vComplex, FundamentalRepresentation > StaggeredVec5dImplR; // Real.. whichever prec
typedef StaggeredVec5dImpl<vComplexF, FundamentalRepresentation > StaggeredVec5dImplF; // Float
typedef StaggeredVec5dImpl<vComplexD, FundamentalRepresentation > StaggeredVec5dImplD; // Double
NAMESPACE_END(Grid);

View File

@ -27,15 +27,11 @@
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_WILSON_CLOVER_FERMION_H
#define GRID_QCD_WILSON_CLOVER_FERMION_H
#pragma once
#include <Grid/Grid.h>
namespace Grid
{
namespace QCD
{
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////////////////////////////////
// Wilson Clover
@ -131,22 +127,22 @@ public:
// Derivative parts unpreconditioned pseudofermions
void MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag)
{
conformable(X._grid, Y._grid);
conformable(X._grid, force._grid);
GaugeLinkField force_mu(force._grid), lambda(force._grid);
GaugeField clover_force(force._grid);
PropagatorField Lambda(force._grid);
conformable(X.Grid(), Y.Grid());
conformable(X.Grid(), force.Grid());
GaugeLinkField force_mu(force.Grid()), lambda(force.Grid());
GaugeField clover_force(force.Grid());
PropagatorField Lambda(force.Grid());
// Guido: Here we are hitting some performance issues:
// need to extract the components of the DoubledGaugeField
// for each call
// Possible solution
// Create a vector object to store them? (cons: wasting space)
std::vector<GaugeLinkField> U(Nd, this->Umu._grid);
std::vector<GaugeLinkField> U(Nd, this->Umu.Grid());
Impl::extractLinkField(U, this->Umu);
force = zero;
force = Zero();
// Derivative of the Wilson hopping term
this->DhopDeriv(force, X, Y, dag);
@ -179,10 +175,10 @@ public:
*/
int count = 0;
clover_force = zero;
clover_force = Zero();
for (int mu = 0; mu < 4; mu++)
{
force_mu = zero;
force_mu = Zero();
for (int nu = 0; nu < 4; nu++)
{
if (mu == nu)
@ -212,8 +208,8 @@ public:
// Computing C_{\mu \nu}(x) as in Eq.(B.39) in Zbigniew Sroczynski's PhD thesis
GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu)
{
conformable(lambda._grid, U[0]._grid);
GaugeLinkField out(lambda._grid), tmp(lambda._grid);
conformable(lambda.Grid(), U[0].Grid());
GaugeLinkField out(lambda.Grid()), tmp(lambda.Grid());
// insertion in upper staple
// please check redundancy of shift operations
@ -266,102 +262,113 @@ private:
// using the DeGrand-Rossi basis for the gamma matrices
CloverFieldType fillCloverYZ(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
CloverFieldType T(F.Grid());
T = Zero();
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T._odata[i]()(0, 1) = timesMinusI(F._odata[i]()());
T._odata[i]()(1, 0) = timesMinusI(F._odata[i]()());
T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()());
T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()());
}
T_v[i]()(0, 1) = timesMinusI(F_v[i]()());
T_v[i]()(1, 0) = timesMinusI(F_v[i]()());
T_v[i]()(2, 3) = timesMinusI(F_v[i]()());
T_v[i]()(3, 2) = timesMinusI(F_v[i]()());
});
return T;
}
CloverFieldType fillCloverXZ(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
CloverFieldType T(F.Grid());
T = Zero();
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T._odata[i]()(0, 1) = -F._odata[i]()();
T._odata[i]()(1, 0) = F._odata[i]()();
T._odata[i]()(2, 3) = -F._odata[i]()();
T._odata[i]()(3, 2) = F._odata[i]()();
}
T_v[i]()(0, 1) = -F_v[i]()();
T_v[i]()(1, 0) = F_v[i]()();
T_v[i]()(2, 3) = -F_v[i]()();
T_v[i]()(3, 2) = F_v[i]()();
});
return T;
}
CloverFieldType fillCloverXY(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
{
CloverFieldType T(F.Grid());
T = Zero();
T._odata[i]()(0, 0) = timesMinusI(F._odata[i]()());
T._odata[i]()(1, 1) = timesI(F._odata[i]()());
T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()());
T._odata[i]()(3, 3) = timesI(F._odata[i]()());
}
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T_v[i]()(0, 0) = timesMinusI(F_v[i]()());
T_v[i]()(1, 1) = timesI(F_v[i]()());
T_v[i]()(2, 2) = timesMinusI(F_v[i]()());
T_v[i]()(3, 3) = timesI(F_v[i]()());
});
return T;
}
CloverFieldType fillCloverXT(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
CloverFieldType T(F.Grid());
T = Zero();
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T._odata[i]()(0, 1) = timesI(F._odata[i]()());
T._odata[i]()(1, 0) = timesI(F._odata[i]()());
T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()());
T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()());
}
T_v[i]()(0, 1) = timesI(F_v[i]()());
T_v[i]()(1, 0) = timesI(F_v[i]()());
T_v[i]()(2, 3) = timesMinusI(F_v[i]()());
T_v[i]()(3, 2) = timesMinusI(F_v[i]()());
});
return T;
}
CloverFieldType fillCloverYT(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
CloverFieldType T(F.Grid());
T = Zero();
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T._odata[i]()(0, 1) = -(F._odata[i]()());
T._odata[i]()(1, 0) = (F._odata[i]()());
T._odata[i]()(2, 3) = (F._odata[i]()());
T._odata[i]()(3, 2) = -(F._odata[i]()());
}
T_v[i]()(0, 1) = -(F_v[i]()());
T_v[i]()(1, 0) = (F_v[i]()());
T_v[i]()(2, 3) = (F_v[i]()());
T_v[i]()(3, 2) = -(F_v[i]()());
});
return T;
}
CloverFieldType fillCloverZT(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
CloverFieldType T(F.Grid());
T = Zero();
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T._odata[i]()(0, 0) = timesI(F._odata[i]()());
T._odata[i]()(1, 1) = timesMinusI(F._odata[i]()());
T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()());
T._odata[i]()(3, 3) = timesI(F._odata[i]()());
}
T_v[i]()(0, 0) = timesI(F_v[i]()());
T_v[i]()(1, 1) = timesMinusI(F_v[i]()());
T_v[i]()(2, 2) = timesMinusI(F_v[i]()());
T_v[i]()(3, 3) = timesI(F_v[i]()());
});
return T;
}
};
}
}
NAMESPACE_END(Grid);
#endif // GRID_QCD_WILSON_CLOVER_FERMION_H

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -25,13 +25,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_WILSON_COMPRESSOR_H
#define GRID_QCD_WILSON_COMPRESSOR_H
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////////
// optimised versions supporting half precision too
@ -43,9 +42,9 @@ class WilsonCompressorTemplate;
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typename std::enable_if<std::is_same<_HCspinor,_Hspinor>::value>::type >
typename std::enable_if<std::is_same<_HCspinor,_Hspinor>::value>::type >
{
public:
public:
int mu,dag;
@ -62,15 +61,16 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typedef typename SiteHalfSpinor::vector_type vComplexHigh;
constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh);
inline int CommDatumSize(void) {
accelerator_inline int CommDatumSize(void) {
return sizeof(SiteHalfCommSpinor);
}
/*****************************************************/
/* Compress includes precision change if mpi data is not same */
/*****************************************************/
inline void Compress(SiteHalfSpinor * __restrict__ buf,Integer o,const SiteSpinor &in) {
SiteHalfSpinor tmp;
template<class _SiteHalfSpinor, class _SiteSpinor>
accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) {
_SiteHalfSpinor tmp;
projector::Proj(tmp,in,mu,dag);
vstream(buf[o],tmp);
}
@ -78,10 +78,10 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Exchange includes precision change if mpi data is not same */
/*****************************************************/
inline void Exchange(SiteHalfSpinor * __restrict__ mp,
const SiteHalfSpinor * __restrict__ vp0,
const SiteHalfSpinor * __restrict__ vp1,
Integer type,Integer o){
accelerator_inline void Exchange(SiteHalfSpinor *mp,
const SiteHalfSpinor * __restrict__ vp0,
const SiteHalfSpinor * __restrict__ vp1,
Integer type,Integer o){
SiteHalfSpinor tmp1;
SiteHalfSpinor tmp2;
exchange(tmp1,tmp2,vp0[o],vp1[o],type);
@ -92,19 +92,21 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Have a decompression step if mpi data is not same */
/*****************************************************/
inline void Decompress(SiteHalfSpinor * __restrict__ out,
SiteHalfSpinor * __restrict__ in, Integer o) {
accelerator_inline void Decompress(SiteHalfSpinor * __restrict__ out,
SiteHalfSpinor * __restrict__ in, Integer o) {
assert(0);
}
/*****************************************************/
/* Compress Exchange */
/*****************************************************/
inline void CompressExchange(SiteHalfSpinor * __restrict__ out0,
SiteHalfSpinor * __restrict__ out1,
const SiteSpinor * __restrict__ in,
Integer j,Integer k, Integer m,Integer type){
SiteHalfSpinor temp1, temp2,temp3,temp4;
accelerator_inline void CompressExchange(SiteHalfSpinor * __restrict__ out0,
SiteHalfSpinor * __restrict__ out1,
const SiteSpinor * __restrict__ in,
Integer j,Integer k, Integer m,Integer type)
{
SiteHalfSpinor temp1, temp2;
SiteHalfSpinor temp3, temp4;
projector::Proj(temp1,in[k],mu,dag);
projector::Proj(temp2,in[m],mu,dag);
exchange(temp3,temp4,temp1,temp2,type);
@ -115,15 +117,15 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Pass the info to the stencil */
/*****************************************************/
inline bool DecompressionStep(void) { return false; }
accelerator_inline bool DecompressionStep(void) { return false; }
};
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type >
typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type >
{
public:
public:
int mu,dag;
@ -140,15 +142,16 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typedef typename SiteHalfSpinor::vector_type vComplexHigh;
constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh);
inline int CommDatumSize(void) {
accelerator_inline int CommDatumSize(void) {
return sizeof(SiteHalfCommSpinor);
}
/*****************************************************/
/* Compress includes precision change if mpi data is not same */
/*****************************************************/
inline void Compress(SiteHalfSpinor *buf,Integer o,const SiteSpinor &in) {
SiteHalfSpinor hsp;
template<class _SiteHalfSpinor, class _SiteSpinor>
accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) {
_SiteHalfSpinor hsp;
SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf;
projector::Proj(hsp,in,mu,dag);
precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw);
@ -157,7 +160,7 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Exchange includes precision change if mpi data is not same */
/*****************************************************/
inline void Exchange(SiteHalfSpinor *mp,
accelerator_inline void Exchange(SiteHalfSpinor *mp,
SiteHalfSpinor *vp0,
SiteHalfSpinor *vp1,
Integer type,Integer o){
@ -172,8 +175,7 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Have a decompression step if mpi data is not same */
/*****************************************************/
inline void Decompress(SiteHalfSpinor *out,
SiteHalfSpinor *in, Integer o){
accelerator_inline void Decompress(SiteHalfSpinor *out, SiteHalfSpinor *in, Integer o){
SiteHalfCommSpinor *hin=(SiteHalfCommSpinor *)in;
precisionChange((vComplexHigh *)&out[o],(vComplexLow *)&hin[o],Nw);
}
@ -181,7 +183,7 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Compress Exchange */
/*****************************************************/
inline void CompressExchange(SiteHalfSpinor *out0,
accelerator_inline void CompressExchange(SiteHalfSpinor *out0,
SiteHalfSpinor *out1,
const SiteSpinor *in,
Integer j,Integer k, Integer m,Integer type){
@ -198,19 +200,19 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Pass the info to the stencil */
/*****************************************************/
inline bool DecompressionStep(void) { return true; }
accelerator_inline bool DecompressionStep(void) { return true; }
};
#define DECLARE_PROJ(Projector,Compressor,spProj) \
class Projector { \
public: \
template<class hsp,class fsp> \
static void Proj(hsp &result,const fsp &in,int mu,int dag){ \
spProj(result,in); \
} \
template<class hsp,class fsp> \
static accelerator void Proj(hsp &result,const fsp &in,int mu,int dag){ \
spProj(result,in); \
} \
}; \
template<typename HCS,typename HS,typename S> using Compressor = WilsonCompressorTemplate<HCS,HS,S,Projector>;
template<typename HCS,typename HS,typename S> using Compressor = WilsonCompressorTemplate<HCS,HS,S,Projector>;
DECLARE_PROJ(WilsonXpProjector,WilsonXpCompressor,spProjXp);
DECLARE_PROJ(WilsonYpProjector,WilsonYpCompressor,spProjYp);
@ -222,9 +224,9 @@ DECLARE_PROJ(WilsonZmProjector,WilsonZmCompressor,spProjZm);
DECLARE_PROJ(WilsonTmProjector,WilsonTmCompressor,spProjTm);
class WilsonProjector {
public:
public:
template<class hsp,class fsp>
static void Proj(hsp &result,const fsp &in,int mu,int dag){
static accelerator void Proj(hsp &result,const fsp &in,int mu,int dag){
int mudag=dag? mu : (mu+Nd)%(2*Nd);
switch(mudag) {
case Xp: spProjXp(result,in); break;
@ -243,9 +245,14 @@ template<typename HCS,typename HS,typename S> using WilsonCompressor = WilsonCom
// Fast comms buffer manipulation which should inline right through (avoid direction
// dependent logic that prevents inlining
template<class vobj,class cobj>
class WilsonStencil : public CartesianStencil<vobj,cobj> {
template<class vobj,class cobj,class Parameters>
class WilsonStencil : public CartesianStencil<vobj,cobj,Parameters> {
public:
typedef CartesianStencil<vobj,cobj,Parameters> Base;
typedef typename Base::View_type View_type;
typedef typename Base::StencilVector StencilVector;
double timer0;
double timer1;
double timer2;
@ -274,16 +281,40 @@ public:
if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl;
}
std::vector<int> surface_list;
WilsonStencil(GridBase *grid,
int npoints,
int checkerboard,
const std::vector<int> &directions,
const std::vector<int> &distances)
: CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances)
const std::vector<int> &distances,Parameters p)
: CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)
{
ZeroCountersi();
surface_list.resize(0);
this->same_node.resize(npoints);
};
void BuildSurfaceList(int Ls,int vol4){
// find same node for SHM
// Here we know the distance is 1 for WilsonStencil
for(int point=0;point<this->_npoints;point++){
this->same_node[point] = this->SameNode(point);
}
for(int site = 0 ;site< vol4;site++){
int local = 1;
for(int point=0;point<this->_npoints;point++){
if( (!this->GetNodeLocal(site*Ls,point)) && (!this->same_node[point]) ){
local = 0;
}
}
if(local == 0) {
surface_list.push_back(site);
}
}
}
template < class compressor>
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
@ -292,8 +323,6 @@ public:
this->HaloExchangeOptGather(source,compress);
double t1=usecond();
// Asynchronous MPI calls multidirectional, Isend etc...
// this->CommunicateBegin(reqs);
// this->CommunicateComplete(reqs);
// Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways.
this->Communicate();
double t2=usecond(); timer1 += t2-t1;
@ -327,7 +356,7 @@ public:
this->_grid->StencilBarrier();
this->mpi3synctime_g+=usecond();
assert(source._grid==this->_grid);
assert(source.Grid()==this->_grid);
this->halogtime-=usecond();
this->u_comm_offset=0;
@ -365,9 +394,10 @@ public:
this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size);
this->halogtime+=usecond();
accelerator_barrier();
}
};
};
}} // namespace close
NAMESPACE_END(Grid);
#endif

View File

@ -27,16 +27,13 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_WILSON_FERMION_H
#define GRID_QCD_WILSON_FERMION_H
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
class WilsonFermionStatic {
public:
public:
static int HandOptDslash; // these are a temporary hack
static int MortonOrder;
static const std::vector<int> directions;
@ -60,8 +57,9 @@ class WilsonFermionStatic {
};
template <class Impl>
class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
public:
class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef WilsonKernels<Impl> Kernels;
@ -138,10 +136,10 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
// Constructor
WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid, RealD _mass,
GridRedBlackCartesian &Hgrid, RealD _mass,
const ImplParams &p = ImplParams(),
const WilsonAnisotropyCoefficients &anis = WilsonAnisotropyCoefficients() );
// DoubleStore impl dependent
void ImportGauge(const GaugeField &_Umu);
@ -150,7 +148,7 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
///////////////////////////////////////////////////////////////
// protected:
public:
public:
virtual RealD Mass(void) { return mass; }
virtual int isTrivialEE(void) { return 1; };
RealD mass;
@ -171,7 +169,7 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
WilsonAnisotropyCoefficients anisotropyCoeff;
///////////////////////////////////////////////////////////////
@ -182,11 +180,11 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
};
@ -194,7 +192,6 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
NAMESPACE_END(Grid);
}
}
#endif

View File

@ -1,5 +1,5 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -26,216 +26,215 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_WILSON_FERMION_5D_H
#define GRID_QCD_WILSON_FERMION_5D_H
#include <Grid/perfmon/Stat.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
//
// parity = (x+y+z+t)|2;
// generalised five dim fermions like mobius, zolotarev etc..
//
// i.e. even even contains fifth dim hopping term.
//
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
//
// parity = (x+y+z+t)|2;
// generalised five dim fermions like mobius, zolotarev etc..
//
// i.e. even even contains fifth dim hopping term.
//
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
//
// parity = (x+y+z+t)|2;
// generalised five dim fermions like mobius, zolotarev etc..
//
// i.e. even even contains fifth dim hopping term.
//
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
//
// parity = (x+y+z+t)|2;
// generalised five dim fermions like mobius, zolotarev etc..
//
// i.e. even even contains fifth dim hopping term.
//
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
////////////////////////////////////////////////////////////////////////////////
class WilsonFermion5DStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static const std::vector<int> directions;
static const std::vector<int> displacements;
const int npoint = 8;
};
class WilsonFermion5DStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static const std::vector<int> directions;
static const std::vector<int> displacements;
static constexpr int npoint = 8;
};
template<class Impl>
class WilsonFermion5D : public WilsonKernels<Impl>, public WilsonFermion5DStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef WilsonKernels<Impl> Kernels;
PmuStat stat;
template<class Impl>
class WilsonFermion5D : public WilsonKernels<Impl>, public WilsonFermion5DStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef WilsonKernels<Impl> Kernels;
PmuStat stat;
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
void Report(void);
void ZeroCounters(void);
double DhopCalls;
double DhopCommTime;
double DhopComputeTime;
double DhopComputeTime2;
double DhopFaceTime;
double DhopTotalTime;
void Report(void);
void ZeroCounters(void);
double DhopCalls;
double DhopCommTime;
double DhopComputeTime;
double DhopComputeTime2;
double DhopFaceTime;
double DhopTotalTime;
double DerivCalls;
double DerivCommTime;
double DerivComputeTime;
double DerivDhopComputeTime;
double DerivCalls;
double DerivCommTime;
double DerivComputeTime;
double DerivDhopComputeTime;
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FiveDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FiveDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
// full checkerboard operations; leave unimplemented as abstract for now
virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;};
virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
// full checkerboard operations; leave unimplemented as abstract for now
virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;};
virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
// half checkerboard operations; leave unimplemented as abstract for now
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);};
virtual void Mooee (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);};
// half checkerboard operations; leave unimplemented as abstract for now
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);};
virtual void Mooee (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);};
virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
// These can be overridden by fancy 5d chiral action
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// These can be overridden by fancy 5d chiral action
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
// Implement hopping term non-hermitian hopping term; half cb or both
// Implement s-diagonal DW
void DW (const FermionField &in, FermionField &out,int dag);
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
// Implement hopping term non-hermitian hopping term; half cb or both
// Implement s-diagonal DW
void DW (const FermionField &in, FermionField &out,int dag);
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
// add a DhopComm
// -- suboptimal interface will presently trigger multiple comms.
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
// add a DhopComm
// -- suboptimal interface will presently trigger multiple comms.
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
///////////////////////////////////////////////////////////////
// New methods added
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
///////////////////////////////////////////////////////////////
// New methods added
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
void DhopInternal(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternal(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalOverlappedComms(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalOverlappedComms(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalSerialComms(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalSerialComms(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
// Constructors
WilsonFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _M5,const ImplParams &p= ImplParams());
// Constructors
WilsonFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _M5,const ImplParams &p= ImplParams());
// Constructors
/*
WilsonFermion5D(int simd,
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
double _M5,const ImplParams &p= ImplParams());
*/
// Constructors
/*
WilsonFermion5D(int simd,
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
double _M5,const ImplParams &p= ImplParams());
*/
// DoubleStore
void ImportGauge(const GaugeField &_Umu);
// DoubleStore
void ImportGauge(const GaugeField &_Umu);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
// Add these to the support from Wilson
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
GridBase *_FiveDimGrid;
GridBase *_FiveDimRedBlackGrid;
// Add these to the support from Wilson
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
GridBase *_FiveDimGrid;
GridBase *_FiveDimRedBlackGrid;
double M5;
int Ls;
double M5;
int Ls;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
///////////////////////////////////////////////////////////////
// Conserved current utilities
///////////////////////////////////////////////////////////////
void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
///////////////////////////////////////////////////////////////
// Conserved current utilities
///////////////////////////////////////////////////////////////
void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
void ContractJ5q(PropagatorField &q_in,ComplexField &J5q);
void ContractJ5q(FermionField &q_in,ComplexField &J5q);
void ContractJ5q(PropagatorField &q_in,ComplexField &J5q);
void ContractJ5q(FermionField &q_in,ComplexField &J5q);
};
};
}}
NAMESPACE_END(Grid);
#endif

View File

@ -0,0 +1,226 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////
// Single flavour four spinors with colour index
/////////////////////////////////////////////////////////////////////////////
template <class S, class Representation = FundamentalRepresentation,class Options = CoeffReal >
class WilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
static const bool LsVectorised=false;
static const int Nhcs = Options::Nhcs;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
typedef typename Options::_Coeff_t Coeff_t;
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhcs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor,ImplParams> StencilImpl;
typedef typename StencilImpl::View_type StencilView;
ImplParams Params;
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){
assert(Params.boundary_phases.size() == Nd);
};
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu)
{
auto UU = coalescedRead(U(mu));
mult(&phi(), &UU, &chi());
}
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu,
StencilEntry *SE,
StencilView &St)
{
multLink(phi,U,chi,mu);
}
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
reg = memory;
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &Uds,
const GaugeField &Umu)
{
typedef typename Simd::scalar_type scalar_type;
conformable(Uds.Grid(), GaugeGrid);
conformable(Umu.Grid(), GaugeGrid);
GaugeLinkField U(GaugeGrid);
GaugeLinkField tmp(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
////////////////////////////////////////////////////
// apply any boundary phase or twists
////////////////////////////////////////////////////
for (int mu = 0; mu < Nd; mu++) {
////////// boundary phase /////////////
auto pha = Params.boundary_phases[mu];
scalar_type phase( real(pha),imag(pha) );
int L = GaugeGrid->GlobalDimensions()[mu];
int Lmu = L - 1;
LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu);
// apply any twists
RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L;
if ( theta != 0.0) {
scalar_type twphase(::cos(theta),::sin(theta));
U = twphase*U;
std::cout << GridLogMessage << " Twist ["<<mu<<"] "<< Params.twist_n_2pi_L[mu]<< " phase"<<phase <<std::endl;
}
tmp = where(coor == Lmu, phase * U, U);
PokeIndex<LorentzIndex>(Uds, tmp, mu);
U = adj(Cshift(U, mu, -1));
U = where(coor == 0, conjugate(phase) * U, U);
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
GaugeLinkField link(mat.Grid());
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu);
}
inline void outerProductImpl(PropagatorField &mat, const FermionField &B, const FermionField &A){
mat = outerProduct(B,A);
}
inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) {
mat = TraceIndex<SpinIndex>(P);
}
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
for (int mu = 0; mu < Nd; mu++)
mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
int Ls=Btilde.Grid()->_fdimensions[0];
GaugeLinkField tmp(mat.Grid());
tmp = Zero();
auto tmp_v = tmp.View();
auto Btilde_v = Btilde.View();
auto Atilde_v = Atilde.View();
thread_for(sss,tmp.Grid()->oSites(),{
int sU=sss;
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
tmp_v[sU] = tmp_v[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here
}
});
PokeIndex<LorentzIndex>(mat,tmp,mu);
}
};
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > WilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double
typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float
typedef WilsonImpl<vComplexD, AdjointRepresentation, CoeffReal > WilsonAdjImplD; // Double
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplD; // Double
typedef WilsonImpl<vComplex, TwoIndexAntiSymmetricRepresentation, CoeffReal > WilsonTwoIndexAntiSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, TwoIndexAntiSymmetricRepresentation, CoeffReal > WilsonTwoIndexAntiSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexAntiSymmetricRepresentation, CoeffReal > WilsonTwoIndexAntiSymmetricImplD; // Double
NAMESPACE_END(Grid);

View File

@ -1,455 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
namespace QCD {
int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric;
int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute;
template <class Impl>
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
////////////////////////////////////////////
// Generic implementation; move to different file?
////////////////////////////////////////////
#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
chi_p = &chi; \
if (SE->_permute) { \
spProj(tmp, in._odata[SE->_offset]); \
permute(chi, tmp, ptype); \
} else { \
spProj(chi, in._odata[SE->_offset]); \
} \
} else { \
chi_p = &buf[SE->_offset]; \
} \
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
Recon(result, Uchi);
#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
chi_p = &chi; \
if (SE->_permute) { \
spProj(tmp, in._odata[SE->_offset]); \
permute(chi, tmp, ptype); \
} else { \
spProj(chi, in._odata[SE->_offset]); \
} \
} else if ( st.same_node[Dir] ) { \
chi_p = &buf[SE->_offset]; \
} \
if (SE->_is_local || st.same_node[Dir] ) { \
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
Recon(result, Uchi); \
}
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
chi_p = &buf[SE->_offset]; \
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
Recon(result, Uchi); \
nmu++; \
}
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
if (gamma == Dir) { \
if (SE->_is_local && SE->_permute) { \
spProj(tmp, in._odata[SE->_offset]); \
permute(chi, tmp, ptype); \
} else if (SE->_is_local) { \
spProj(chi, in._odata[SE->_offset]); \
} else { \
chi = buf[SE->_offset]; \
} \
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); \
Recon(result, Uchi); \
}
////////////////////////////////////////////////////////////////////
// All legs kernels ; comms then compute
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp);
GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp);
GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
////////////////////////////////////////////////////////////////////
// Interior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
result=zero;
GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_INT(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_INT(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
result=zero;
GENERIC_STENCIL_LEG_INT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_INT(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_INT(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
////////////////////////////////////////////////////////////////////
// Exterior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
result=zero;
GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_EXT(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_EXT(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_EXT(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm);
if ( nmu ) {
out._odata[sF] = out._odata[sF] + result;
}
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
result=zero;
GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_EXT(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_EXT(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_EXT(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm);
if ( nmu ) {
out._odata[sF] = out._odata[sF] + result;
}
};
template <class Impl>
void WilsonKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteSpinor result;
SiteHalfSpinor Uchi;
StencilEntry *SE;
int ptype;
SE = st.GetEntry(ptype, dir, sF);
GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp);
GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp);
GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp);
GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp);
GENERIC_DHOPDIR_LEG(Xm,spProjXm,spReconXm);
GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm);
GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm);
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
vstream(out._odata[sF], result);
}
/*******************************************************************************
* Conserved current utilities for Wilson fermions, for contracting propagators
* to make a conserved current sink or inserting the conserved current
* sequentially. Common to both 4D and 5D.
******************************************************************************/
// N.B. Functions below assume a -1/2 factor within U.
#define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr))
#define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr))
/*******************************************************************************
* Name: ContractConservedCurrentSiteFwd
* Operation: (1/2) * q2[x] * U(x) * (g[mu] - 1) * q1[x + mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in_1 shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(
const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
if (switch_sign)
{
q_out -= result;
}
else
{
q_out += result;
}
}
/*******************************************************************************
* Name: ContractConservedCurrentSiteBwd
* Operation: (1/2) * q2[x + mu] * U^dag(x) * (g[mu] + 1) * q1[x]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in_2 shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(
const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu + Nd);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
if (switch_sign)
{
q_out += result;
}
else
{
q_out -= result;
}
}
// G-parity requires more specialised implementation.
#define NO_CURR_SITE(Impl) \
template <> \
void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd( \
const SitePropagator &q_in_1, \
const SitePropagator &q_in_2, \
SitePropagator &q_out, \
DoubledGaugeField &U, \
unsigned int sU, \
unsigned int mu, \
bool switch_sign) \
{ \
assert(0); \
} \
template <> \
void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd( \
const SitePropagator &q_in_1, \
const SitePropagator &q_in_2, \
SitePropagator &q_out, \
DoubledGaugeField &U, \
unsigned int mu, \
unsigned int sU, \
bool switch_sign) \
{ \
assert(0); \
}
NO_CURR_SITE(GparityWilsonImplF);
NO_CURR_SITE(GparityWilsonImplD);
NO_CURR_SITE(GparityWilsonImplFH);
NO_CURR_SITE(GparityWilsonImplDF);
/*******************************************************************************
* Name: SeqConservedCurrentSiteFwd
* Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLinkProp(result, U._odata[sU], q_in, mu);
result = WilsonCurrentFwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign)
{
q_out -= result;
}
else
{
q_out += result;
}
}
/*******************************************************************************
* Name: SeqConservedCurrentSiteFwd
* Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in shifted in -ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLinkProp(result, U._odata[sU], q_in, mu + Nd);
result = WilsonCurrentBwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign)
{
q_out += result;
}
else
{
q_out -= result;
}
}
FermOpTemplateInstantiate(WilsonKernels);
AdjointFermOpTemplateInstantiate(WilsonKernels);
TwoIndexFermOpTemplateInstantiate(WilsonKernels);
}}

View File

@ -27,19 +27,17 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_DHOP_H
#define GRID_QCD_DHOP_H
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
// Common to both the WilsonFermion and WilsonFermion5D
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
// Common to both the WilsonFermion and WilsonFermion5D
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class WilsonKernelsStatic {
public:
public:
enum { OptGeneric, OptHandUnroll, OptInlineAsm };
enum { CommsAndCompute, CommsThenCompute };
static int Opt;
@ -47,235 +45,123 @@ class WilsonKernelsStatic {
};
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
public:
public:
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
public:
template <bool EnableBool = true>
typename std::enable_if<Impl::isFundamental==true && Nc == 3 &&EnableBool, void>::type
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
{
bgq_l1p_optimisation(1);
switch(Opt) {
#if defined(AVX512) || defined (QPX)
case OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else assert(0);
break;
#endif
case OptHandUnroll:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
sF++;
}
sU++;
}
break;
case OptGeneric:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
break;
default:
assert(0);
}
bgq_l1p_optimisation(0);
}
template <bool EnableBool = true>
typename std::enable_if<(Impl::isFundamental==false || (Impl::isFundamental==true && Nc != 3)) && EnableBool, void>::type
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
// no kernel choice
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
}
template <bool EnableBool = true>
typename std::enable_if<Impl::isFundamental==true && Nc == 3 && EnableBool,void>::type
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
{
bgq_l1p_optimisation(1);
switch(Opt) {
#if defined(AVX512) || defined (QPX)
case OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else assert(0);
break;
#endif
case OptHandUnroll:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::HandDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::HandDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
break;
case OptGeneric:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
break;
default:
assert(0);
}
bgq_l1p_optimisation(0);
}
static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
template <bool EnableBool = true>
typename std::enable_if<(Impl::isFundamental==false || (Impl::isFundamental==true && Nc != 3)) && EnableBool,void>::type
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) {
static void DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
}
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
void DhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
//////////////////////////////////////////////////////////////////////////////
// Utilities for inserting Wilson conserved current.
//////////////////////////////////////////////////////////////////////////////
void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
static void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign = false);
void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
static void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign = false);
void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
static void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
vPredicate t_mask,
bool switch_sign = false);
void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
static void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
vPredicate t_mask,
bool switch_sign = false);
private:
// Specialised variants
void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
// Specialised variants
static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
static accelerator void GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
static accelerator void GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
public:
static accelerator void GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
WilsonKernels(const ImplParams &p = ImplParams());
static void AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
static void AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
static void AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
static void AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
static void AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
static void AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
// Keep Hand unrolled temporarily
static accelerator void HandDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
public:
WilsonKernels(const ImplParams &p = ImplParams()) : Base(p){};
};
}}
NAMESPACE_END(Grid);
#endif

View File

@ -1,127 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsm.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
namespace QCD {
///////////////////////////////////////////////////////////
// Default to no assembler implementation
///////////////////////////////////////////////////////////
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
#include <Grid/qcd/action/fermion/WilsonKernelsAsmAvx512.h>
#include <Grid/qcd/action/fermion/WilsonKernelsAsmQPX.h>
#define INSTANTIATE_ASM(A)\
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
\
template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
\
template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
\
template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
INSTANTIATE_ASM(WilsonImplF);
INSTANTIATE_ASM(WilsonImplD);
INSTANTIATE_ASM(ZWilsonImplF);
INSTANTIATE_ASM(ZWilsonImplD);
INSTANTIATE_ASM(GparityWilsonImplF);
INSTANTIATE_ASM(GparityWilsonImplD);
INSTANTIATE_ASM(DomainWallVec5dImplF);
INSTANTIATE_ASM(DomainWallVec5dImplD);
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
INSTANTIATE_ASM(WilsonImplFH);
INSTANTIATE_ASM(WilsonImplDF);
INSTANTIATE_ASM(ZWilsonImplFH);
INSTANTIATE_ASM(ZWilsonImplDF);
INSTANTIATE_ASM(GparityWilsonImplFH);
INSTANTIATE_ASM(GparityWilsonImplDF);
INSTANTIATE_ASM(DomainWallVec5dImplFH);
INSTANTIATE_ASM(DomainWallVec5dImplDF);
INSTANTIATE_ASM(ZDomainWallVec5dImplFH);
INSTANTIATE_ASM(ZDomainWallVec5dImplDF);
}}

View File

@ -1,650 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#if defined(AVX512)
///////////////////////////////////////////////////////////
// If we are AVX512 specialise the single precision routine
///////////////////////////////////////////////////////////
#include <simd/Intel512wilson.h>
#include <simd/Intel512single.h>
static Vector<vComplexF> signsF;
template<typename vtype>
int setupSigns(Vector<vtype>& signs ){
Vector<vtype> bother(2);
signs = bother;
vrsign(signs[0]);
visign(signs[1]);
return 1;
}
static int signInitF = setupSigns(signsF);
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define COMPLEX_SIGNS(isigns) vComplexF *isigns = &signsF[0];
/////////////////////////////////////////////////////////////////
// XYZT vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
#define MAYBEPERM(A,B)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
/////////////////////////////////////////////////////////////////
// Ls vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#undef MULT_2SPIN
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf)
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef COMPLEX_SIGNS
#undef MAYBEPERM
#undef MULT_2SPIN
///////////////////////////////////////////////////////////
// If we are AVX512 specialise the double precision routine
///////////////////////////////////////////////////////////
#include <simd/Intel512double.h>
static Vector<vComplexD> signsD;
static int signInitD = setupSigns(signsD);
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define COMPLEX_SIGNS(isigns) vComplexD *isigns = &signsD[0];
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
/////////////////////////////////////////////////////////////////
// XYZT vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
#define MAYBEPERM(A,B)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
/////////////////////////////////////////////////////////////////
// Ls vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#undef MULT_2SPIN
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf)
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef COMPLEX_SIGNS
#undef MAYBEPERM
#undef MULT_2SPIN
#endif //AVX512

View File

@ -1,99 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc
Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonTMFermion.h>
namespace Grid {
namespace QCD {
/*
* BF sequence
*
void bfmbase<Float>::MooeeInv(Fermion_t psi,
Fermion_t chi,
int dag, int cb)
double m = this->mass;
double tm = this->twistedmass;
double mtil = 4.0+this->mass;
double sq = mtil*mtil + tm*tm;
double a = mtil/sq;
double b = -tm /sq;
if(dag) b=-b;
axpibg5x(chi,psi,a,b);
void bfmbase<Float>::Mooee(Fermion_t psi,
Fermion_t chi,
int dag,int cb)
double a = 4.0+this->mass;
double b = this->twistedmass;
if(dag) b=-b;
axpibg5x(chi,psi,a,b);
*/
template<class Impl>
void WilsonTMFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
RealD a = 4.0+this->mass;
RealD b = this->mu;
out.checkerboard = in.checkerboard;
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
RealD a = 4.0+this->mass;
RealD b = -this->mu;
out.checkerboard = in.checkerboard;
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
RealD m = this->mass;
RealD tm = this->mu;
RealD mtil = 4.0+this->mass;
RealD sq = mtil*mtil+tm*tm;
RealD a = mtil/sq;
RealD b = -tm /sq;
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
RealD m = this->mass;
RealD tm = this->mu;
RealD mtil = 4.0+this->mass;
RealD sq = mtil*mtil+tm*tm;
RealD a = mtil/sq;
RealD b = tm /sq;
axpibg5x(out,in,a,b);
}
FermOpTemplateInstantiate(WilsonTMFermion);
}
}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -23,55 +23,52 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_WILSON_TM_FERMION_H
#define GRID_QCD_WILSON_TM_FERMION_H
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonFermion.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class WilsonTMFermion : public WilsonFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class WilsonTMFermion : public WilsonFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
WilsonTMFermion(GaugeField &_Umu,
GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid,
RealD _mass,
RealD _mu,
const ImplParams &p= ImplParams()
) :
WilsonFermion<Impl>(_Umu,
Fgrid,
Hgrid,
_mass,p)
virtual void Instantiatable(void) {};
// Constructors
WilsonTMFermion(GaugeField &_Umu,
GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid,
RealD _mass,
RealD _mu,
const ImplParams &p= ImplParams()
) :
WilsonFermion<Impl>(_Umu,
Fgrid,
Hgrid,
_mass,p)
{
mu = _mu;
}
{
mu = _mu;
}
// allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out) ;
virtual void MooeeDag(const FermionField &in, FermionField &out) ;
virtual void MooeeInv(const FermionField &in, FermionField &out) ;
virtual void MooeeInvDag(const FermionField &in, FermionField &out) ;
// allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out) ;
virtual void MooeeDag(const FermionField &in, FermionField &out) ;
virtual void MooeeInv(const FermionField &in, FermionField &out) ;
virtual void MooeeInvDag(const FermionField &in, FermionField &out) ;
private:
RealD mu; // TwistedMass parameter
private:
RealD mu; // TwistedMass parameter
};
};
NAMESPACE_END(Grid);
}}
#endif

View File

@ -30,126 +30,123 @@ Author: paboyle <paboyle@ph.ed.ac.uk> ; NB Christoph did similar in GPT
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonFermion.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Impl>
class WilsonTMFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
WilsonTMFermion5D(GaugeField &_Umu,
GridCartesian &Fgrid,
GridRedBlackCartesian &Frbgrid,
GridCartesian &Ugrid,
GridRedBlackCartesian &Urbgrid,
const std::vector<RealD> _mass,
const std::vector<RealD> _mu,
const ImplParams &p= ImplParams()
) :
WilsonFermion5D<Impl>(_Umu,
Fgrid,
Frbgrid,
Ugrid,
Urbgrid,
4.0,p)
{
update(_mass,_mu);
}
virtual void Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
this->DhopEO(in, out, DaggerNo);
} else {
this->DhopOE(in, out, DaggerNo);
}
}
virtual void MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
this->DhopEO(in, out, DaggerYes);
} else {
this->DhopOE(in, out, DaggerYes);
}
}
// allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
//axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,this->mu[s]);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,-this->mu[s]);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeInv(const FermionField &in, FermionField &out) {
for (int s=0;s<(int)this->mass.size();s++) {
RealD m = this->mass[s];
RealD tm = this->mu[s];
RealD mtil = 4.0+this->mass[s];
RealD sq = mtil*mtil+tm*tm;
ComplexD a = mtil/sq;
ComplexD b(0.0, -tm /sq);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeInvDag(const FermionField &in, FermionField &out) {
for (int s=0;s<(int)this->mass.size();s++) {
RealD m = this->mass[s];
RealD tm = this->mu[s];
RealD mtil = 4.0+this->mass[s];
RealD sq = mtil*mtil+tm*tm;
ComplexD a = mtil/sq;
ComplexD b(0.0,tm /sq);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual RealD M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
this->Dhop(in, out, DaggerNo);
FermionField tmp(out._grid);
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,this->mu[s]);
axpbg5y_ssp(tmp,a,in,b,in,s,s);
}
return axpy_norm(out, 1.0, tmp, out);
}
// needed for fast PV
void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu) {
assert(_mass.size() == _mu.size());
assert(_mass.size() == this->FermionGrid()->_fdimensions[0]);
this->mass = _mass;
this->mu = _mu;
}
private:
std::vector<RealD> mu;
std::vector<RealD> mass;
};
template<class Impl>
class WilsonTMFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
WilsonTMFermion5D(GaugeField &_Umu,
GridCartesian &Fgrid,
GridRedBlackCartesian &Frbgrid,
GridCartesian &Ugrid,
GridRedBlackCartesian &Urbgrid,
const std::vector<RealD> _mass,
const std::vector<RealD> _mu,
const ImplParams &p= ImplParams()
) :
WilsonFermion5D<Impl>(_Umu,
Fgrid,
Frbgrid,
Ugrid,
Urbgrid,
4.0,p)
typedef WilsonTMFermion5D<WilsonImplF> WilsonTMFermion5DF;
typedef WilsonTMFermion5D<WilsonImplD> WilsonTMFermion5DD;
{
update(_mass,_mu);
}
virtual void Meooe(const FermionField &in, FermionField &out) {
if (in.Checkerboard() == Odd) {
this->DhopEO(in, out, DaggerNo);
} else {
this->DhopOE(in, out, DaggerNo);
}
}
virtual void MeooeDag(const FermionField &in, FermionField &out) {
if (in.Checkerboard() == Odd) {
this->DhopEO(in, out, DaggerYes);
} else {
this->DhopOE(in, out, DaggerYes);
}
}
// allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard();
//axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,this->mu[s]);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeDag(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard();
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,-this->mu[s]);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeInv(const FermionField &in, FermionField &out) {
for (int s=0;s<(int)this->mass.size();s++) {
RealD m = this->mass[s];
RealD tm = this->mu[s];
RealD mtil = 4.0+this->mass[s];
RealD sq = mtil*mtil+tm*tm;
ComplexD a = mtil/sq;
ComplexD b(0.0, -tm /sq);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeInvDag(const FermionField &in, FermionField &out) {
for (int s=0;s<(int)this->mass.size();s++) {
RealD m = this->mass[s];
RealD tm = this->mu[s];
RealD mtil = 4.0+this->mass[s];
RealD sq = mtil*mtil+tm*tm;
ComplexD a = mtil/sq;
ComplexD b(0.0,tm /sq);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual RealD M(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard();
this->Dhop(in, out, DaggerNo);
FermionField tmp(out.Grid());
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,this->mu[s]);
axpbg5y_ssp(tmp,a,in,b,in,s,s);
}
return axpy_norm(out, 1.0, tmp, out);
}
// needed for fast PV
void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu) {
assert(_mass.size() == _mu.size());
assert(_mass.size() == this->FermionGrid()->_fdimensions[0]);
this->mass = _mass;
this->mu = _mu;
}
private:
std::vector<RealD> mu;
std::vector<RealD> mass;
};
typedef WilsonTMFermion5D<WilsonImplF> WilsonTMFermion5DF;
typedef WilsonTMFermion5D<WilsonImplD> WilsonTMFermion5DD;
}}
NAMESPACE_END(Grid);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,56 +24,50 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_ZMOBIUS_FERMION_H
#define GRID_QCD_ZMOBIUS_FERMION_H
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class ZMobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class ZMobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
ZMobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void) {};
// Constructors
ZMobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
std::vector<Coeff_t> zgamma(this->Ls);
for(int s=0;s<this->Ls;s++){
zgamma[s] = gamma[s];
}
// Call base setter
this->SetCoefficientsInternal(1.0,zgamma,b,c);
}
};
{
// RealD eps = 1.0;
std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
Vector<Coeff_t> zgamma(this->Ls);
for(int s=0;s<this->Ls;s++){
zgamma[s] = gamma[s];
}
// Call base setter
this->SetCoefficientsInternal(1.0,zgamma,b,c);
}
}
#endif
};
NAMESPACE_END(Grid);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -26,19 +26,19 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
NAMESPACE_BEGIN(Grid);
/*
* Dense matrix versions of routines
*/
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
@ -54,10 +54,10 @@ template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{
int Ls=this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs;
chi.checkerboard=psi.checkerboard;
chi.Checkerboard()=psi.Checkerboard();
assert(Ls==LLs);
@ -96,15 +96,14 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
}
// For the non-vectorised s-direction this is simple
for(auto site=0;site<vol;site++){
thread_loop( (auto site=0;site<vol;site++), {
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0;s1<Ls;s1++){
SiteChi =zero;
SiteChi =Zero();
for(int s2=0;s2<Ls;s2++){
int lex2 = s2+Ls*site;
@ -120,7 +119,7 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
});
}
#ifdef CAYLEY_DPERP_DENSE
@ -153,4 +152,4 @@ template void CayleyFermion5D<ZWilsonImplFH>::MooeeInternal(const FermionField &
template void CayleyFermion5D<ZWilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
#endif
}}
NAMESPACE_END(Grid);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -26,26 +26,24 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
// Pminus fowards
// Pplus backwards
template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
Coeff_t one(1.0);
int Ls=this->Ls;
@ -66,9 +64,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
Coeff_t one(1.0);
int Ls=this->Ls;
@ -91,7 +89,7 @@ void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard=psi.checkerboard;
chi.Checkerboard()=psi.Checkerboard();
int Ls=this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0]
@ -120,7 +118,7 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard=psi.checkerboard;
chi.Checkerboard()=psi.Checkerboard();
int Ls=this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0]
@ -145,20 +143,19 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
#ifdef CAYLEY_DPERP_LINALG
INSTANTIATE_DPERP(WilsonImplF);
INSTANTIATE_DPERP(WilsonImplD);
INSTANTIATE_DPERP(GparityWilsonImplF);
INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
INSTANTIATE_DPERP(WilsonImplF);
INSTANTIATE_DPERP(WilsonImplD);
INSTANTIATE_DPERP(GparityWilsonImplF);
INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
#endif
}
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,158 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
/*
* Dense matrix versions of routines
*/
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs;
chi.Checkerboard() = psi.Checkerboard();
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->dp;
Pminus(Ls-1,0) = this->dm;
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
for(auto site=0; site<vol; site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = Zero();
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
}
if(PminusMat(s1,s2) != 0.0){
spProj5m(SitePminus, psi[lex2]);
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
}
#ifdef DOMAIN_WALL_EOFA_DPERP_DENSE
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
template void DomainWallEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
template void DomainWallEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
NAMESPACE_END(Grid);

View File

@ -0,0 +1,167 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
FermionField tmp(psi.Grid());
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1);
}
axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1);
axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
FermionField tmp(psi.Grid());
// Apply (U^{\prime})^{-dagger}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1);
axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
}
}
#ifdef DOMAIN_WALL_EOFA_DPERP_LINALG
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
#endif
NAMESPACE_END(Grid);

View File

@ -0,0 +1,183 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
/*
* Dense matrix versions of routines
*/
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs;
int pm = this->pm;
RealD shift = this->shift;
RealD alpha = this->alpha;
RealD k = this->k;
RealD mq1 = this->mq1;
chi.Checkerboard() = psi.Checkerboard();
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = mq1*this->cee[0];
Pminus(Ls-1,0) = mq1*this->cee[Ls-1];
if(shift != 0.0){
Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
for(int s=0; s<Ls; ++s){
if(pm == 1){ Pplus(s,Ls-1) += shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
else{ Pminus(Ls-1-s,Ls-1) -= shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
}
}
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if(inv){
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
for(auto site=0; site<vol; site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = Zero();
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
}
if(PminusMat(s1,s2) != 0.0){
spProj5m(SitePminus, psi[lex2]);
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
}
#ifdef MOBIUS_EOFA_DPERP_DENSE
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
template void MobiusEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
template void MobiusEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
NAMESPACE_END(Grid);

View File

@ -0,0 +1,289 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper,
Vector<Coeff_t>& shift_coeffs)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper,
Vector<Coeff_t>& shift_coeffs)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
}
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
FermionField tmp(psi.Grid());
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0);
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
}
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
// Apply U^{-1} and add shift term
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
FermionField tmp(psi.Grid());
// Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0]
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0);
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
// Apply L^{-dagger} and add shift
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
}
}
#ifdef MOBIUS_EOFA_DPERP_LINALG
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
#endif
NAMESPACE_END(Grid);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -23,13 +23,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef G5_HERMITIAN_LINOP
#define G5_HERMITIAN_LINOP
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////
// Wrap an already herm matrix
@ -46,12 +45,12 @@ public:
HermOp(in,out);
}
void OpDiag (const Field &in, Field &out) {
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.Mdiag(in,tmp);
G5R5(out,tmp);
}
void OpDir (const Field &in, Field &out,int dir,int disp) {
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.Mdir(in,tmp,dir,disp);
G5R5(out,tmp);
}
@ -68,7 +67,7 @@ public:
n2=real(dot);
}
void HermOp(const Field &in, Field &out){
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.M(in,tmp);
G5R5(out,tmp);
}
@ -80,7 +79,7 @@ class Gamma5HermitianLinearOperator : public LinearOperatorBase<Field> {
Matrix &_Mat;
Gamma g5;
public:
Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat), g5(Gamma::Algebra::Gamma5) {};
Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat), g5(Gamma::Algebra::Gamma5) {};
void Op (const Field &in, Field &out){
HermOp(in,out);
}
@ -88,12 +87,12 @@ public:
HermOp(in,out);
}
void OpDiag (const Field &in, Field &out) {
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.Mdiag(in,tmp);
out=g5*tmp;
}
void OpDir (const Field &in, Field &out,int dir,int disp) {
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.Mdir(in,tmp,dir,disp);
out=g5*tmp;
}
@ -110,12 +109,11 @@ public:
n2=real(dot);
}
void HermOp(const Field &in, Field &out){
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.M(in,tmp);
out=g5*tmp;
}
};
}}
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -26,31 +26,30 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Impl>
CayleyFermion5D<Impl>::CayleyFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_M5,p),
mass(_mass)
{
}
template<class Impl>
CayleyFermion5D<Impl>::CayleyFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_M5,p),
mass(_mass)
{
}
///////////////////////////////////////////////////////////////
// Physical surface field utilities
@ -61,8 +60,8 @@ void CayleyFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &so
int Ls = this->Ls;
FermionField tmp(this->FermionGrid());
tmp = solution5d;
conformable(solution5d._grid,this->FermionGrid());
conformable(exported4d._grid,this->GaugeGrid());
conformable(solution5d.Grid(),this->FermionGrid());
conformable(exported4d.Grid(),this->GaugeGrid());
axpby_ssp_pminus(tmp, 0., solution5d, 1., solution5d, 0, 0);
axpby_ssp_pplus (tmp, 1., tmp , 1., solution5d, 0, Ls-1);
ExtractSlice(exported4d, tmp, 0, 0);
@ -71,7 +70,7 @@ template<class Impl>
void CayleyFermion5D<Impl>::P(const FermionField &psi, FermionField &chi)
{
int Ls= this->Ls;
chi=zero;
chi=Zero();
for(int s=0;s<Ls;s++){
axpby_ssp_pminus(chi,1.0,chi,1.0,psi,s,s);
axpby_ssp_pplus (chi,1.0,chi,1.0,psi,s,(s+1)%Ls);
@ -81,7 +80,7 @@ template<class Impl>
void CayleyFermion5D<Impl>::Pdag(const FermionField &psi, FermionField &chi)
{
int Ls= this->Ls;
chi=zero;
chi=Zero();
for(int s=0;s<Ls;s++){
axpby_ssp_pminus(chi,1.0,chi,1.0,psi,s,s);
axpby_ssp_pplus (chi,1.0,chi,1.0,psi,s,(s-1+Ls)%Ls);
@ -93,8 +92,8 @@ void CayleyFermion5D<Impl>::ExportPhysicalFermionSource(const FermionField &solu
int Ls = this->Ls;
FermionField tmp(this->FermionGrid());
tmp = solution5d;
conformable(solution5d._grid,this->FermionGrid());
conformable(exported4d._grid,this->GaugeGrid());
conformable(solution5d.Grid(),this->FermionGrid());
conformable(exported4d.Grid(),this->GaugeGrid());
axpby_ssp_pplus (tmp, 0., solution5d, 1., solution5d, 0, 0);
axpby_ssp_pminus(tmp, 1., tmp , 1., solution5d, 0, Ls-1);
ExtractSlice(exported4d, tmp, 0, 0);
@ -104,9 +103,9 @@ void CayleyFermion5D<Impl>::ImportUnphysicalFermion(const FermionField &input4d,
{
int Ls = this->Ls;
FermionField tmp(this->FermionGrid());
conformable(imported5d._grid,this->FermionGrid());
conformable(input4d._grid ,this->GaugeGrid());
tmp = zero;
conformable(imported5d.Grid(),this->FermionGrid());
conformable(input4d.Grid() ,this->GaugeGrid());
tmp = Zero();
InsertSlice(input4d, tmp, 0 , 0);
InsertSlice(input4d, tmp, Ls-1, 0);
axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, 0, 0);
@ -119,9 +118,9 @@ void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &inpu
{
int Ls = this->Ls;
FermionField tmp(this->FermionGrid());
conformable(imported5d._grid,this->FermionGrid());
conformable(input4d._grid ,this->GaugeGrid());
tmp = zero;
conformable(imported5d.Grid(),this->FermionGrid());
conformable(input4d.Grid() ,this->GaugeGrid());
tmp = Zero();
InsertSlice(input4d, tmp, 0 , 0);
InsertSlice(input4d, tmp, Ls-1, 0);
axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, 0, 0);
@ -156,7 +155,7 @@ void CayleyFermion5D<Impl>::DminusDag(const FermionField &psi, FermionField &chi
template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void)
{
this->Report();
std::vector<int> latt = GridDefaultLatt();
Coordinate latt = GridDefaultLatt();
RealD volume = this->Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
RealD NP = this->_FourDimGrid->_Nprocessors;
if ( M5Dcalls > 0 ) {
@ -164,10 +163,16 @@ template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void)
std::cout << GridLogMessage << "CayleyFermion5D Number of M5D Calls : " << M5Dcalls << std::endl;
std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << M5Dtime / M5Dcalls << " us" << std::endl;
// Flops = 6.0*(Nc*Ns) *Ls*vol
RealD mflops = 6.0*12*volume*M5Dcalls/M5Dtime/2; // 2 for red black counting
// Flops = 10.0*(Nc*Ns) *Ls*vol
RealD mflops = 10.0*(Nc*Ns)*volume*M5Dcalls/M5Dtime/2; // 2 for red black counting
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
// Bytes = sizeof(Real) * (Nc*Ns*Nreim) * Ls * vol * (read+write) (/2 for red black counting)
// read = 2 ( psi[ss+s+1] and psi[ss+s-1] count as 1 )
// write = 1
RealD Gbytes = sizeof(Real) * (Nc*Ns*2) * volume * 3 /2. * 1.e-9;
std::cout << GridLogMessage << "Average bandwidth (GB/s) : " << Gbytes/M5Dtime*M5Dcalls*1.e6 << std::endl;
}
if ( MooeeInvCalls > 0 ) {
@ -175,11 +180,16 @@ template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void)
std::cout << GridLogMessage << "#### MooeeInv calls report " << std::endl;
std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl;
std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl;
#ifdef GRID_NVCC
RealD mflops = ( -16.*Nc*Ns+this->Ls*(1.+18.*Nc*Ns) )*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
#else
// Flops = MADD * Ls *Ls *4dvol * spin/colour/complex
RealD mflops = 2.0*24*this->Ls*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
#endif
}
}
@ -198,18 +208,18 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag (Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
Vector<Coeff_t> diag (Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
M5D(psi,chi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag = bs;
std::vector<Coeff_t> upper= cs;
std::vector<Coeff_t> lower= cs;
Vector<Coeff_t> diag = bs;
Vector<Coeff_t> upper= cs;
Vector<Coeff_t> lower= cs;
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5D(psi,psi,Din,lower,diag,upper);
@ -218,9 +228,9 @@ void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &D
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag = beo;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
Vector<Coeff_t> diag = beo;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-ceo[i];
lower[i]=-ceo[i];
@ -233,9 +243,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag = bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
Vector<Coeff_t> diag = bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-cee[i];
lower[i]=-cee[i];
@ -248,9 +258,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag = bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
Vector<Coeff_t> diag = bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for (int s=0;s<Ls;s++){
// Assemble the 5d matrix
@ -278,9 +288,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0);
std::vector<Coeff_t> lower(Ls,-1.0);
Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0);
Vector<Coeff_t> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper);
@ -290,9 +300,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag =bs;
std::vector<Coeff_t> upper=cs;
std::vector<Coeff_t> lower=cs;
Vector<Coeff_t> diag =bs;
Vector<Coeff_t> upper=cs;
Vector<Coeff_t> lower=cs;
for (int s=0;s<Ls;s++){
if ( s== 0 ) {
@ -315,9 +325,7 @@ void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField
template<class Impl>
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
FermionField Din(psi._grid);
FermionField Din(psi.Grid());
// Assemble Din
Meooe5D(psi,Din);
@ -337,7 +345,7 @@ RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
//D1+ D1- P- -> D1+^dag P+ D2-^dag
//D2- P+ D2+ P-D1-^dag D2+dag
FermionField Din(psi._grid);
FermionField Din(psi.Grid());
// Apply Dw
this->DW(psi,Din,DaggerYes);
@ -353,11 +361,9 @@ RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
template<class Impl>
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
Meooe5D(psi,this->tmp());
if ( psi.checkerboard == Odd ) {
if ( psi.Checkerboard() == Odd ) {
this->DhopEO(this->tmp(),chi,DaggerNo);
} else {
this->DhopOE(this->tmp(),chi,DaggerNo);
@ -368,7 +374,7 @@ template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
{
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
if ( psi.Checkerboard() == Odd ) {
this->DhopEO(psi,this->tmp(),DaggerYes);
} else {
this->DhopOE(psi,this->tmp(),DaggerYes);
@ -386,7 +392,7 @@ void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,in
template<class Impl>
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
FermionField Din(V.Grid());
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
@ -401,7 +407,7 @@ void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const
template<class Impl>
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
FermionField Din(V.Grid());
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
@ -416,7 +422,7 @@ void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const
template<class Impl>
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
FermionField Din(V.Grid());
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
@ -433,7 +439,7 @@ void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
{
std::vector<Coeff_t> gamma(this->Ls);
Vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(1.0,gamma,b,c);
}
@ -441,13 +447,13 @@ void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,Re
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
{
std::vector<Coeff_t> gamma(this->Ls);
Vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(zolo_hi,gamma,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c)
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t> & gamma,RealD b,RealD c)
{
int Ls=this->Ls;
@ -568,12 +574,12 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
dee[Ls-1] += delta_d;
}
int inv=1;
this->MooeeInternalCompute(0,inv,MatpInv,MatmInv);
this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
// int inv=1;
// this->MooeeInternalCompute(0,inv,MatpInv,MatmInv);
// this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
}
#if 0
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv,
Vector<iSinglet<Simd> > & Matp,
@ -628,35 +634,32 @@ void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv,
Matm.resize(Ls*LLs);
for(int s2=0;s2<Ls;s2++){
for(int s1=0;s1<LLs;s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type *)&Vp;
scalar_type *sm = (scalar_type *)&Vm;
for(int l=0;l<Nsimd;l++){
if ( switcheroo<Coeff_t>::iscomplex() ) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
for(int s1=0;s1<LLs;s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type *)&Vp;
scalar_type *sm = (scalar_type *)&Vm;
for(int l=0;l<Nsimd;l++){
if ( switcheroo<Coeff_t>::iscomplex() ) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
}
}
}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
}
FermOpTemplateInstantiate(CayleyFermion5D);
GparityFermOpTemplateInstantiate(CayleyFermion5D);
}}
#endif
NAMESPACE_END(Grid);

View File

@ -0,0 +1,235 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
NAMESPACE_BEGIN(Grid);
// Pminus fowards
// Pplus backwards..
template<class Impl>
void
CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
int Ls =this->Ls;
// 10 = 3 complex mult + 2 complex add
// Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting)
M5Dcalls++;
M5Dtime-=usecond();
uint64_t nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss= sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1, tmp2;
for(int s=0;s<Ls;s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1,psi(idx_u));
spProj5p(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],diag[s]*phi(ss+s)+upper[s]*tmp1+lower[s]*tmp2);
}
});
M5Dtime+=usecond();
}
template<class Impl>
void
CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
int Ls=this->Ls;
// Flops = 6.0*(Nc*Ns) *Ls*vol
M5Dcalls++;
M5Dtime-=usecond();
uint64_t nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2;
for(int s=0;s<Ls;s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1,psi(idx_u));
spProj5m(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],diag[s]*phi(ss+s)+upper[s]*tmp1+lower[s]*tmp2);
}
});
M5Dtime+=usecond();
}
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi_i)
{
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
int Ls=this->Ls;
auto plee = & lee [0];
auto pdee = & dee [0];
auto puee = & uee [0];
auto pleem = & leem[0];
auto pueem = & ueem[0];
MooeeInvCalls++;
MooeeInvTime-=usecond();
uint64_t nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
// Apply (L^{\prime})^{-1}
coalescedWrite(chi[ss],psi(ss)); // chi[0]=psi[0]
for(int s=1;s<Ls;s++){
spProj5p(tmp,chi(ss+s-1));
coalescedWrite(chi[ss+s] , psi(ss+s)-plee[s-1]*tmp);
}
// L_m^{-1}
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -pleem[s]P_- chi
spProj5m(tmp,chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp);
}
// U_m^{-1} D^{-1}
for (int s=0;s<Ls-1;s++){
// Chi[s] + 1/d chi[s]
spProj5p(tmp,chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s)-(pueem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply U^{-1}
for (int s=Ls-2;s>=0;s--){
spProj5m(tmp,chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp);
}
});
MooeeInvTime+=usecond();
}
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i)
{
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
int Ls=this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
auto plee = & lee [0];
auto pdee = & dee [0];
auto puee = & uee [0];
auto pleem = & leem[0];
auto pueem = & ueem[0];
assert(psi.Checkerboard() == psi.Checkerboard());
MooeeInvCalls++;
MooeeInvTime-=usecond();
uint64_t nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
// Apply (U^{\prime})^{-dagger}
coalescedWrite(chi[ss],psi(ss));
for (int s=1;s<Ls;s++){
spProj5m(tmp,chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s)-conjugate(puee[s-1])*tmp);
}
// U_m^{-\dagger}
for (int s=0;s<Ls-1;s++){
spProj5p(tmp,chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - conjugate(pueem[s])*tmp);
}
// L_m^{-\dagger} D^{-dagger}
for (int s=0;s<Ls-1;s++){
spProj5m(tmp,chi(ss+Ls-1));
coalescedWrite(chi[ss+s], conjugate(1.0/pdee[s])*chi(ss+s)-conjugate(pleem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], conjugate(1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply L^{-dagger}
for (int s=Ls-2;s>=0;s--){
spProj5p(tmp,chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - conjugate(plee[s])*tmp);
}
});
MooeeInvTime+=usecond();
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,831 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
NAMESPACE_BEGIN(Grid);
/*
* Dense matrix versions of routines
*/
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
{
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
}
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
{
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
}
template<class Impl>
void
CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd= Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs==nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
scalar_type * l_p = (scalar_type *)&l[0];
scalar_type * d_p = (scalar_type *)&d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o+i*LLs;
int ss = o*nsimd+i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
M5Dcalls++;
M5Dtime-=usecond();
assert(Nc==3);
thread_loop( (int ss=0;ss<grid->oSites();ss+=LLs),{ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0;v<LLs;v++){
int vp=(v+1)%LLs;
int vm=(v+LLs-1)%LLs;
spProj5m(hp,psi[ss+vp]);
spProj5p(hm,psi[ss+vm]);
if ( vp<=v ) rotate(hp,hp,1);
if ( vm>=v ) rotate(hm,hm,nsimd-1);
hp=0.5*hp;
hm=0.5*hm;
spRecon5m(fp,hp);
spRecon5p(fm,hm);
chi[ss+v] = d[v]*phi[ss+v];
chi[ss+v] = chi[ss+v] +u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0;v<LLs;v++){
vprefetch(psi[ss+v+LLs]);
int vp= (v==LLs-1) ? 0 : v+1;
int vm= (v==0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if ( vp<=v ) {
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if ( vm>=v ) {
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_12);
vstream(chi[ss+v]()(0)(0),p_00);
vstream(chi[ss+v]()(0)(1),p_01);
vstream(chi[ss+v]()(0)(2),p_02);
vstream(chi[ss+v]()(1)(0),p_10);
vstream(chi[ss+v]()(1)(1),p_11);
vstream(chi[ss+v]()(1)(2),p_12);
vstream(chi[ss+v]()(2)(0),p_20);
vstream(chi[ss+v]()(2)(1),p_21);
vstream(chi[ss+v]()(2)(2),p_22);
vstream(chi[ss+v]()(3)(0),p_30);
vstream(chi[ss+v]()(3)(1),p_31);
vstream(chi[ss+v]()(3)(2),p_32);
}
#endif
});
M5Dtime+=usecond();
}
template<class Impl>
void
CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi=psi_i.View();
auto phi=phi_i.View();
auto chi=chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd= Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs==nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
scalar_type * l_p = (scalar_type *)&l[0];
scalar_type * d_p = (scalar_type *)&d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o+i*LLs;
int ss = o*nsimd+i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
M5Dcalls++;
M5Dtime-=usecond();
thread_loop( (int ss=0;ss<grid->oSites();ss+=LLs),{ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0;v<LLs;v++){
int vp=(v+1)%LLs;
int vm=(v+LLs-1)%LLs;
spProj5p(hp,psi[ss+vp]);
spProj5m(hm,psi[ss+vm]);
if ( vp<=v ) rotate(hp,hp,1);
if ( vm>=v ) rotate(hm,hm,nsimd-1);
hp=hp*0.5;
hm=hm*0.5;
spRecon5p(fp,hp);
spRecon5m(fm,hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0;v<LLs;v++){
vprefetch(psi[ss+v+LLs]);
int vp= (v==LLs-1) ? 0 : v+1;
int vm= (v==0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if ( vp<=v ) {
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if ( vm>=v ) {
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_12);
vstream(chi[ss+v]()(0)(0),p_00);
vstream(chi[ss+v]()(0)(1),p_01);
vstream(chi[ss+v]()(0)(2),p_02);
vstream(chi[ss+v]()(1)(0),p_10);
vstream(chi[ss+v]()(1)(1),p_11);
vstream(chi[ss+v]()(1)(2),p_12);
vstream(chi[ss+v]()(2)(0),p_20);
vstream(chi[ss+v]()(2)(1),p_21);
vstream(chi[ss+v]()(2)(2),p_22);
vstream(chi[ss+v]()(3)(0),p_30);
vstream(chi[ss+v]()(3)(1),p_31);
vstream(chi[ss+v]()(3)(2),p_32);
}
#endif
});
M5Dtime+=usecond();
}
#ifdef AVX512
#include <simd/Intel512common.h>
#include <simd/Intel512avx.h>
#include <simd/Intel512single.h>
#endif
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi_i, FermionField &chi_i,
int LLs, int site,
Vector<iSinglet<Simd> > &Matp,
Vector<iSinglet<Simd> > &Matm)
{
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
auto psi = psi_i.View();
auto chi = chi_i.View();
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
int s=s2+l*LLs;
int lex=s2+LLs*site;
if ( s2==0 && l==0) {
SiteChiP=Zero();
SiteChiM=Zero();
}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastP()(sp )(co),psi[lex]()(sp)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastM()(sp )(co),psi[lex]()(sp+2)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
SiteChiP()(sp)(co)=real_madd(Matp[LLs*s+s1]()()(),BcastP()(sp)(co),SiteChiP()(sp)(co)); // 1100 us.
SiteChiM()(sp)(co)=real_madd(Matm[LLs*s+s1]()()(),BcastM()(sp)(co),SiteChiM()(sp)(co)); // each found by commenting out
}}
}}
{
int lex = s1+LLs*site;
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %%zmm1
#define Chi_01 %%zmm2
#define Chi_02 %%zmm3
#define Chi_10 %%zmm4
#define Chi_11 %%zmm5
#define Chi_12 %%zmm6
#define Chi_20 %%zmm7
#define Chi_21 %%zmm8
#define Chi_22 %%zmm9
#define Chi_30 %%zmm10
#define Chi_31 %%zmm11
#define Chi_32 %%zmm12
#define BCAST0 %%zmm13
#define BCAST1 %%zmm14
#define BCAST2 %%zmm15
#define BCAST3 %%zmm16
#define BCAST4 %%zmm17
#define BCAST5 %%zmm18
#define BCAST6 %%zmm19
#define BCAST7 %%zmm20
#define BCAST8 %%zmm21
#define BCAST9 %%zmm22
#define BCAST10 %%zmm23
#define BCAST11 %%zmm24
int incr=LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
int lex=s2+LLs*site;
uint64_t a0 = (uint64_t)&Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t)&Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t)&psi[lex];
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
if ( (s2+l)==0 ) {
asm (
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
VBCASTCDUP(0,%2,BCAST0)
VBCASTCDUP(1,%2,BCAST1)
VBCASTCDUP(2,%2,BCAST2)
VBCASTCDUP(3,%2,BCAST3)
VBCASTCDUP(4,%2,BCAST4) VMULMEM (0,%0,BCAST0,Chi_00)
VBCASTCDUP(5,%2,BCAST5) VMULMEM (0,%0,BCAST1,Chi_01)
VBCASTCDUP(6,%2,BCAST6) VMULMEM (0,%0,BCAST2,Chi_02)
VBCASTCDUP(7,%2,BCAST7) VMULMEM (0,%0,BCAST3,Chi_10)
VBCASTCDUP(8,%2,BCAST8) VMULMEM (0,%0,BCAST4,Chi_11)
VBCASTCDUP(9,%2,BCAST9) VMULMEM (0,%0,BCAST5,Chi_12)
VBCASTCDUP(10,%2,BCAST10) VMULMEM (0,%1,BCAST6,Chi_20)
VBCASTCDUP(11,%2,BCAST11) VMULMEM (0,%1,BCAST7,Chi_21)
VMULMEM (0,%1,BCAST8,Chi_22)
VMULMEM (0,%1,BCAST9,Chi_30)
VMULMEM (0,%1,BCAST10,Chi_31)
VMULMEM (0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
} else {
asm (
VBCASTCDUP(0,%2,BCAST0) VMADDMEM (0,%0,BCAST0,Chi_00)
VBCASTCDUP(1,%2,BCAST1) VMADDMEM (0,%0,BCAST1,Chi_01)
VBCASTCDUP(2,%2,BCAST2) VMADDMEM (0,%0,BCAST2,Chi_02)
VBCASTCDUP(3,%2,BCAST3) VMADDMEM (0,%0,BCAST3,Chi_10)
VBCASTCDUP(4,%2,BCAST4) VMADDMEM (0,%0,BCAST4,Chi_11)
VBCASTCDUP(5,%2,BCAST5) VMADDMEM (0,%0,BCAST5,Chi_12)
VBCASTCDUP(6,%2,BCAST6) VMADDMEM (0,%1,BCAST6,Chi_20)
VBCASTCDUP(7,%2,BCAST7) VMADDMEM (0,%1,BCAST7,Chi_21)
VBCASTCDUP(8,%2,BCAST8) VMADDMEM (0,%1,BCAST8,Chi_22)
VBCASTCDUP(9,%2,BCAST9) VMADDMEM (0,%1,BCAST9,Chi_30)
VBCASTCDUP(10,%2,BCAST10) VMADDMEM (0,%1,BCAST10,Chi_31)
VBCASTCDUP(11,%2,BCAST11) VMADDMEM (0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
}
a0 = a0+incr;
a1 = a1+incr;
a2 = a2+sizeof(typename Simd::scalar_type);
}}
{
int lexa = s1+LLs*site;
asm (
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
// Z-mobius version
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi_i, FermionField &chi_i,
int LLs, int site, Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matm)
{
EnableIf<Impl::LsVectorised,int> sfinae=0;
#ifndef AVX512
{
auto psi = psi_i.View();
auto chi = chi_i.View();
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
int s=s2+l*LLs;
int lex=s2+LLs*site;
if ( s2==0 && l==0) {
SiteChiP=Zero();
SiteChiM=Zero();
}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastP()(sp )(co),psi[lex]()(sp)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastM()(sp )(co),psi[lex]()(sp+2)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
SiteChiP()(sp)(co)=SiteChiP()(sp)(co)+ Matp[LLs*s+s1]()()()*BcastP()(sp)(co);
SiteChiM()(sp)(co)=SiteChiM()(sp)(co)+ Matm[LLs*s+s1]()()()*BcastM()(sp)(co);
}}
}}
{
int lex = s1+LLs*site;
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
auto psi = psi_i.View();
auto chi = chi_i.View();
// pointers
// MASK_REGS;
#define Chi_00 %zmm0
#define Chi_01 %zmm1
#define Chi_02 %zmm2
#define Chi_10 %zmm3
#define Chi_11 %zmm4
#define Chi_12 %zmm5
#define Chi_20 %zmm6
#define Chi_21 %zmm7
#define Chi_22 %zmm8
#define Chi_30 %zmm9
#define Chi_31 %zmm10
#define Chi_32 %zmm11
#define pChi_00 %%zmm0
#define pChi_01 %%zmm1
#define pChi_02 %%zmm2
#define pChi_10 %%zmm3
#define pChi_11 %%zmm4
#define pChi_12 %%zmm5
#define pChi_20 %%zmm6
#define pChi_21 %%zmm7
#define pChi_22 %%zmm8
#define pChi_30 %%zmm9
#define pChi_31 %%zmm10
#define pChi_32 %%zmm11
#define BCAST_00 %zmm12
#define SHUF_00 %zmm13
#define BCAST_01 %zmm14
#define SHUF_01 %zmm15
#define BCAST_02 %zmm16
#define SHUF_02 %zmm17
#define BCAST_10 %zmm18
#define SHUF_10 %zmm19
#define BCAST_11 %zmm20
#define SHUF_11 %zmm21
#define BCAST_12 %zmm22
#define SHUF_12 %zmm23
#define Mp %zmm24
#define Mps %zmm25
#define Mm %zmm26
#define Mms %zmm27
#define N 8
int incr=LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
int lex=s2+LLs*site;
uint64_t a0 = (uint64_t)&Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t)&Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t)&psi[lex];
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
if ( (s2+l)==0 ) {
LOAD64(%r8,a0);
LOAD64(%r9,a1);
LOAD64(%r10,a2);
asm (
VLOAD(0,%r8,Mp)// i r
VLOAD(0,%r9,Mm)
VSHUF(Mp,Mps) // r i
VSHUF(Mm,Mms)
VPREFETCH1(12,%r10) VPREFETCH1(13,%r10)
VPREFETCH1(14,%r10) VPREFETCH1(15,%r10)
VMULIDUP(0*N,%r10,Mps,Chi_00)
VMULIDUP(1*N,%r10,Mps,Chi_01)
VMULIDUP(2*N,%r10,Mps,Chi_02)
VMULIDUP(3*N,%r10,Mps,Chi_10)
VMULIDUP(4*N,%r10,Mps,Chi_11)
VMULIDUP(5*N,%r10,Mps,Chi_12)
VMULIDUP(6*N ,%r10,Mms,Chi_20)
VMULIDUP(7*N ,%r10,Mms,Chi_21)
VMULIDUP(8*N ,%r10,Mms,Chi_22)
VMULIDUP(9*N ,%r10,Mms,Chi_30)
VMULIDUP(10*N,%r10,Mms,Chi_31)
VMULIDUP(11*N,%r10,Mms,Chi_32)
VMADDSUBRDUP(0*N,%r10,Mp,Chi_00)
VMADDSUBRDUP(1*N,%r10,Mp,Chi_01)
VMADDSUBRDUP(2*N,%r10,Mp,Chi_02)
VMADDSUBRDUP(3*N,%r10,Mp,Chi_10)
VMADDSUBRDUP(4*N,%r10,Mp,Chi_11)
VMADDSUBRDUP(5*N,%r10,Mp,Chi_12)
VMADDSUBRDUP(6*N ,%r10,Mm,Chi_20)
VMADDSUBRDUP(7*N ,%r10,Mm,Chi_21)
VMADDSUBRDUP(8*N ,%r10,Mm,Chi_22)
VMADDSUBRDUP(9*N ,%r10,Mm,Chi_30)
VMADDSUBRDUP(10*N,%r10,Mm,Chi_31)
VMADDSUBRDUP(11*N,%r10,Mm,Chi_32)
);
} else {
LOAD64(%r8,a0);
LOAD64(%r9,a1);
LOAD64(%r10,a2);
asm (
VLOAD(0,%r8,Mp)
VSHUF(Mp,Mps)
VLOAD(0,%r9,Mm)
VSHUF(Mm,Mms)
VMADDSUBIDUP(0*N,%r10,Mps,Chi_00) // Mri * Pii +- Cir
VMADDSUBIDUP(1*N,%r10,Mps,Chi_01)
VMADDSUBIDUP(2*N,%r10,Mps,Chi_02)
VMADDSUBIDUP(3*N,%r10,Mps,Chi_10)
VMADDSUBIDUP(4*N,%r10,Mps,Chi_11)
VMADDSUBIDUP(5*N,%r10,Mps,Chi_12)
VMADDSUBIDUP(6 *N,%r10,Mms,Chi_20)
VMADDSUBIDUP(7 *N,%r10,Mms,Chi_21)
VMADDSUBIDUP(8 *N,%r10,Mms,Chi_22)
VMADDSUBIDUP(9 *N,%r10,Mms,Chi_30)
VMADDSUBIDUP(10*N,%r10,Mms,Chi_31)
VMADDSUBIDUP(11*N,%r10,Mms,Chi_32)
VMADDSUBRDUP(0*N,%r10,Mp,Chi_00) // Cir = Mir * Prr +- ( Mri * Pii +- Cir)
VMADDSUBRDUP(1*N,%r10,Mp,Chi_01) // Ci = MiPr + Ci + MrPi ; Cr = MrPr - ( MiPi - Cr)
VMADDSUBRDUP(2*N,%r10,Mp,Chi_02)
VMADDSUBRDUP(3*N,%r10,Mp,Chi_10)
VMADDSUBRDUP(4*N,%r10,Mp,Chi_11)
VMADDSUBRDUP(5*N,%r10,Mp,Chi_12)
VMADDSUBRDUP(6 *N,%r10,Mm,Chi_20)
VMADDSUBRDUP(7 *N,%r10,Mm,Chi_21)
VMADDSUBRDUP(8 *N,%r10,Mm,Chi_22)
VMADDSUBRDUP(9 *N,%r10,Mm,Chi_30)
VMADDSUBRDUP(10*N,%r10,Mm,Chi_31)
VMADDSUBRDUP(11*N,%r10,Mm,Chi_32)
);
}
a0 = a0+incr;
a1 = a1+incr;
a2 = a2+sizeof(typename Simd::scalar_type);
}}
{
int lexa = s1+LLs*site;
/*
SiteSpinor tmp;
asm (
VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02)
VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12)
VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22)
VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32)
: : "r" ((uint64_t)&tmp) : "memory" );
*/
asm (
VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02)
VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12)
VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22)
VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
// if ( 1 || (site==0) ) {
// std::cout<<site << " s1 "<<s1<<"\n\t"<<tmp << "\n't" << chi[lexa] <<"\n\t"<<tmp-chi[lexa]<<std::endl;
// }
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{
EnableIf<Impl::LsVectorised,int> sfinae=0;
chi.Checkerboard()=psi.Checkerboard();
int Ls=this->Ls;
int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs;
Vector<iSinglet<Simd> > Matp;
Vector<iSinglet<Simd> > Matm;
Vector<iSinglet<Simd> > *_Matp;
Vector<iSinglet<Simd> > *_Matm;
// MooeeInternalCompute(dag,inv,Matp,Matm);
if ( inv && dag ) {
_Matp = &MatpInvDag;
_Matm = &MatmInvDag;
}
if ( inv && (!dag) ) {
_Matp = &MatpInv;
_Matm = &MatmInv;
}
if ( !inv ) {
MooeeInternalCompute(dag,inv,Matp,Matm);
_Matp = &Matp;
_Matm = &Matm;
}
assert(_Matp->size()==Ls*LLs);
MooeeInvCalls++;
MooeeInvTime-=usecond();
if ( switcheroo<Coeff_t>::iscomplex() ) {
thread_loop( (auto site=0;site<vol;site++),{
MooeeInternalZAsm(psi,chi,LLs,site,*_Matp,*_Matm);
});
} else {
thread_loop( (auto site=0;site<vol;site++),{
MooeeInternalAsm(psi,chi,LLs,site,*_Matp,*_Matm);
});
}
MooeeInvTime+=usecond();
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,321 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>
#pragma once
NAMESPACE_BEGIN(Grid);
template<class Impl>
void ContinuedFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale)
{
SetCoefficientsZolotarev(1.0/scale,zdata);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
{
// How to check Ls matches??
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
int Ls = this->Ls;
assert(zdata->db==Ls);// Beta has Ls coeffs
R=(1+this->mass)/(1-this->mass);
Beta.resize(Ls);
cc.resize(Ls);
cc_d.resize(Ls);
sqrt_cc.resize(Ls);
for(int i=0; i < Ls ; i++){
Beta[i] = zdata -> beta[i];
cc[i] = 1.0/Beta[i];
cc_d[i]=std::sqrt(cc[i]);
}
cc_d[Ls-1]=1.0;
for(int i=0; i < Ls-1 ; i++){
sqrt_cc[i]= std::sqrt(cc[i]*cc[i+1]);
}
sqrt_cc[Ls-2]=std::sqrt(cc[Ls-2]);
ZoloHiInv =1.0/zolo_hi;
dw_diag = (4.0-this->M5)*ZoloHiInv;
See.resize(Ls);
Aee.resize(Ls);
int sign=1;
for(int s=0;s<Ls;s++){
Aee[s] = sign * Beta[s] * dw_diag;
sign = - sign;
}
Aee[Ls-1] += R;
See[0] = Aee[0];
for(int s=1;s<Ls;s++){
See[s] = Aee[s] - 1.0/See[s-1];
}
for(int s=0;s<Ls;s++){
std::cout<<GridLogMessage <<"s = "<<s<<" Beta "<<Beta[s]<<" Aee "<<Aee[s] <<" See "<<See[s] <<std::endl;
}
}
template<class Impl>
RealD ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
FermionField D(psi.Grid());
this->DW(psi,D,DaggerNo);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*ZoloHiInv,D,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
} else if ( s==(Ls-1) ){
RealD R=(1.0+mass)/(1.0-mass);
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,D,sqrt_cc[s-1],psi,s,s-1);
ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,D,sqrt_cc[s],psi,s,s+1);
axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1);
}
sign=-sign;
}
return norm2(chi);
}
template<class Impl>
RealD ContinuedFractionFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
{
// This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
// The rest of matrix is symmetric.
// Can ignore "dag"
return M(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
int Ls = this->Ls;
this->DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,chi,0.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,chi,0.0,chi,s,s);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
// Apply 4d dslash
if ( psi.Checkerboard() == Odd ) {
this->DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
} else {
this->DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
}
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,chi,0.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,chi,0.0,chi,s,s);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
{
this->Meooe(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*dw_diag,psi,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
} else if ( s==(Ls-1) ){
// Drop the CC here.
double R=(1+mass)/(1-mass);
ag5xpby_ssp(chi,Beta[s]*dw_diag,psi,sqrt_cc[s-1],psi,s,s-1);
ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*dw_diag,psi,sqrt_cc[s],psi,s,s+1);
axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
this->Mooee(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
// Apply Linv
axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0);
for(int s=1;s<Ls;s++){
axpbg5y_ssp(chi,1.0/cc_d[s],psi,-1.0/See[s-1],chi,s,s-1);
}
// Apply Dinv
for(int s=0;s<Ls;s++){
ag5xpby_ssp(chi,1.0/See[s],chi,0.0,chi,s,s); //only appearance of See[0]
}
// Apply Uinv = (Linv)^T
axpby_ssp(chi,1.0/cc_d[Ls-1],chi,0.0,chi,Ls-1,Ls-1);
for(int s=Ls-2;s>=0;s--){
axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1);
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
this->MooeeInv(psi,chi);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDeriv(mat,D,V,DaggerNo);
};
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDerivOE(mat,D,V,DaggerNo);
};
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDerivEO(mat,D,V,DaggerNo);
};
// Constructors
template<class Impl>
ContinuedFractionFermion5D<Impl>::ContinuedFractionFermion5D(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5,p),
mass(_mass)
{
int Ls = this->Ls;
assert((Ls&0x1)==1); // Odd Ls required
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
conformable(solution5d.Grid(),this->FermionGrid());
conformable(exported4d.Grid(),this->GaugeGrid());
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
int Ls = this->Ls;
conformable(imported5d.Grid(),this->FermionGrid());
conformable(input4d.Grid() ,this->GaugeGrid());
FermionField tmp(this->FermionGrid());
tmp=Zero();
InsertSlice(input4d, tmp, Ls-1, Ls-1);
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
this->Dminus(tmp,imported5d);
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,227 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards..
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
int Ls = this->Ls;
GridBase* grid = psi_i.Grid();
auto phi = phi_i.View();
auto psi = psi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
auto nloop=grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
auto ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
for(int s=0; s<Ls; s++){
spinor tmp1, tmp2;
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1, psi(idx_u));
spProj5p(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
auto nloop=grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
typedef decltype(coalescedRead(psi[0])) spinor;
auto ss=sss*Ls;
for(int s=0; s<Ls; s++){
spinor tmp1, tmp2;
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1, psi(idx_u));
spProj5m(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionField& chi_i)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi=psi_i.View();
auto chi=chi_i.View();
int Ls = this->Ls;
auto plee = & this->lee[0];
auto pdee = & this->dee[0];
auto puee = & this->uee[0];
auto pleem = & this->leem[0];
auto pueem = & this->ueem[0];
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
uint64_t nloop=grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
auto ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2;
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
// Apply (L^{\prime})^{-1}
coalescedWrite(chi[ss],psi(ss)); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp1);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp1);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls])*tmp1);
}
spProj5m(tmp2, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1],(1.0/pdee[Ls])*tmp1 + (1.0/pdee[Ls-1])*tmp2);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp1, chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp1);
}
});
this->MooeeInvTime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, FermionField& chi_i)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
assert(psi.Checkerboard() == psi.Checkerboard());
Vector<Coeff_t> ueec(Ls);
Vector<Coeff_t> deec(Ls+1);
Vector<Coeff_t> leec(Ls);
Vector<Coeff_t> ueemc(Ls);
Vector<Coeff_t> leemc(Ls);
for(int s=0; s<ueec.size(); s++){
ueec[s] = conjugate(this->uee[s]);
deec[s] = conjugate(this->dee[s]);
leec[s] = conjugate(this->lee[s]);
ueemc[s] = conjugate(this->ueem[s]);
leemc[s] = conjugate(this->leem[s]);
}
deec[Ls] = conjugate(this->dee[Ls]);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
auto nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2;
auto ss=sss*Ls;
// Apply (U^{\prime})^{-dagger}
coalescedWrite(chi[ss], psi(ss));
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - ueec[s-1]*tmp1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - ueemc[s]*tmp1);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s] ,(1.0/deec[s])*chi(ss+s) - (leemc[s]/deec[Ls-1])*tmp1);
}
spProj5p(tmp2, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], (1.0/deec[Ls-1])*tmp1 + (1.0/deec[Ls])*tmp2);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp1, chi(ss+s+1));
coalescedWrite(chi[ss+s],chi(ss+s) - leec[s]*tmp1);
}
});
this->MooeeInvTime += usecond();
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,321 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
template<class Impl>
DomainWallEOFAFermion<Impl>::DomainWallEOFAFermion(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3,
RealD _shift, int _pm, RealD _M5, const ImplParams &p) :
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
_shift, _pm, _M5, 1.0, 0.0, p)
{
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);
assert(zdata->n == this->Ls);
std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl;
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
Approx::zolotarev_free(zdata);
}
/***************************************************************
* Additional EOFA operators only called outside the inverter.
* Since speed is not essential, simple axpby-style
* implementations should be fine.
***************************************************************/
template<class Impl>
void DomainWallEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
{
int Ls = this->Ls;
Din = Zero();
if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); }
else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); }
else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
}
// This is just the identity for DWF
template<class Impl>
void DomainWallEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; }
// This is just the identity for DWF
template<class Impl>
void DomainWallEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; }
/*****************************************************************************************************/
template<class Impl>
RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
{
FermionField Din(psi.Grid());
this->Meooe5D(psi, Din);
this->DW(Din, chi, DaggerNo);
axpby(chi, 1.0, 1.0, chi, psi);
this->M5D(psi, chi);
return(norm2(chi));
}
template<class Impl>
RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
{
FermionField Din(psi.Grid());
this->DW(psi, Din, DaggerYes);
this->MeooeDag5D(Din, chi);
this->M5Ddag(psi, chi);
axpby(chi, 1.0, 1.0, chi, psi);
return(norm2(chi));
}
/********************************************************************
* Performance critical fermion operators called inside the inverter
********************************************************************/
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int pm = this->pm;
RealD shift = this->shift;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
Coeff_t shiftp(0.0), shiftm(0.0);
if(shift != 0.0){
if(pm == 1){ shiftp = shift*(mq3-mq2); }
else{ shiftm = -shift*(mq3-mq2); }
}
Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftp;
#if(0)
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl;
for(int i=0; i<diag.size(); ++i){
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
}
for(int i=0; i<upper.size(); ++i){
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
}
for(int i=0; i<lower.size(); ++i){
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
}
#endif
this->M5D(psi, chi, chi, lower, diag, upper);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int pm = this->pm;
RealD shift = this->shift;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
Coeff_t shiftp(0.0), shiftm(0.0);
if(shift != 0.0){
if(pm == 1){ shiftp = shift*(mq3-mq2); }
else{ shiftm = -shift*(mq3-mq2); }
}
Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftm;
this->M5Ddag(psi, chi, chi, lower, diag, upper);
}
// half checkerboard operations
template<class Impl>
void DomainWallEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
Vector<Coeff_t> diag = this->bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] = this->dm;
lower[0] = this->dp;
this->M5D(psi, psi, chi, lower, diag, upper);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
Vector<Coeff_t> diag = this->bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] = this->dp;
lower[0] = this->dm;
this->M5Ddag(psi, psi, chi, lower, diag, upper);
}
/****************************************************************************************/
//Zolo
template<class Impl>
void DomainWallEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, Vector<Coeff_t>& gamma, RealD b, RealD c)
{
int Ls = this->Ls;
int pm = this->pm;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
RealD shift = this->shift;
////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form
////////////////////////////////////////////////////////
this->bs.resize(Ls);
this->cs.resize(Ls);
this->aee.resize(Ls);
this->aeo.resize(Ls);
this->bee.resize(Ls);
this->beo.resize(Ls);
this->cee.resize(Ls);
this->ceo.resize(Ls);
for(int i=0; i<Ls; ++i){
this->bee[i] = 4.0 - this->M5 + 1.0;
this->cee[i] = 1.0;
}
for(int i=0; i<Ls; ++i){
this->aee[i] = this->cee[i];
this->bs[i] = this->beo[i] = 1.0;
this->cs[i] = this->ceo[i] = 0.0;
}
//////////////////////////////////////////
// EOFA shift terms
//////////////////////////////////////////
if(pm == 1){
this->dp = mq1*this->cee[0] + shift*(mq3-mq2);
this->dm = mq1*this->cee[Ls-1];
} else if(this->pm == -1) {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1] - shift*(mq3-mq2);
} else {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1];
}
//////////////////////////////////////////
// LDU decomposition of eeoo
//////////////////////////////////////////
this->dee.resize(Ls+1);
this->lee.resize(Ls);
this->leem.resize(Ls);
this->uee.resize(Ls);
this->ueem.resize(Ls);
for(int i=0; i<Ls; ++i){
if(i < Ls-1){
this->lee[i] = -this->cee[i+1]/this->bee[i]; // sub-diag entry on the ith column
this->leem[i] = this->dm/this->bee[i];
for(int j=0; j<i; j++){ this->leem[i] *= this->aee[j]/this->bee[j]; }
this->dee[i] = this->bee[i];
this->uee[i] = -this->aee[i]/this->bee[i]; // up-diag entry on the ith row
this->ueem[i] = this->dp / this->bee[0];
for(int j=1; j<=i; j++){ this->ueem[i] *= this->cee[j]/this->bee[j]; }
} else {
this->lee[i] = 0.0;
this->leem[i] = 0.0;
this->uee[i] = 0.0;
this->ueem[i] = 0.0;
}
}
{
Coeff_t delta_d = 1.0 / this->bee[0];
for(int j=1; j<Ls-1; j++){ delta_d *= this->cee[j] / this->bee[j]; }
this->dee[Ls-1] = this->bee[Ls-1] + this->cee[0] * this->dm * delta_d;
this->dee[Ls] = this->bee[Ls-1] + this->cee[Ls-1] * this->dp * delta_d;
}
}
// Recompute Cayley-form coefficients for different shift
template<class Impl>
void DomainWallEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
{
this->shift = new_shift;
Approx::zolotarev_data *zdata = Approx::higham(1.0, this->Ls);
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
}
NAMESPACE_END(Grid);

View File

@ -24,22 +24,17 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
#include <Grid/perfmon/PerfCount.h>
namespace Grid {
namespace QCD {
// S-direction is INNERMOST and takes no part in the parity.
const std::vector<int>
ImprovedStaggeredFermion5DStatic::directions({1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4});
const std::vector<int>
ImprovedStaggeredFermion5DStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
#pragma once
// 5d lattice for DWF.
NAMESPACE_BEGIN(Grid);
// 5d lattice for DWF.
template<class Impl>
ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
@ -53,9 +48,9 @@ ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GridCartesian
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
_FourDimGrid (&FourDimGrid),
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
Stencil (&FiveDimGrid,npoint,Even,directions,displacements),
StencilEven(&FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
StencilOdd (&FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
Stencil (&FiveDimGrid,npoint,Even,directions,displacements,p),
StencilEven(&FiveDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even
StencilOdd (&FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd
mass(_mass),
c1(_c1),
c2(_c2),
@ -108,8 +103,8 @@ ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GridCartesian
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
for(int d=0;d<4;d++){
assert(FourDimGrid._simd_layout[d]=1);
assert(FourDimRedBlackGrid._simd_layout[d]=1);
assert(FourDimGrid._simd_layout[d]==1);
assert(FourDimRedBlackGrid._simd_layout[d]==1);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
}
@ -226,24 +221,27 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionFi
Compressor compressor;
Stencil.HaloExchange(in,compressor);
parallel_for(int ss=0;ss<Umu._grid->oSites();ss++){
auto Umu_v = Umu.View();
auto UUUmu_v = UUUmu.View();
auto in_v = in.View();
auto out_v = out.View();
thread_for( ss,Umu.Grid()->oSites(),{
for(int s=0;s<Ls;s++){
int sU=ss;
int sF = s+Ls*sU;
Kernels::DhopDir(Stencil, Umu, UUUmu, Stencil.CommBuf(), sF, sU, in, out, dir, disp);
Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp);
}
}
});
};
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
DoubledGaugeField & UUU,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
DoubledGaugeField & U,
DoubledGaugeField & UUU,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
// No force terms in multi-rhs solver staggered
assert(0);
@ -251,18 +249,18 @@ void ImprovedStaggeredFermion5D<Impl>::DerivInternal(StencilImpl & st,
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopDeriv(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
const FermionField &A,
const FermionField &B,
int dag)
{
assert(0);
}
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
const FermionField &A,
const FermionField &B,
int dag)
{
assert(0);
}
@ -270,9 +268,9 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
const FermionField &A,
const FermionField &B,
int dag)
{
assert(0);
}
@ -301,8 +299,8 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
Compressor compressor;
int LLs = in._grid->_rdimensions[0];
int len = U._grid->oSites();
int LLs = in.Grid()->_rdimensions[0];
int len = U.Grid()->oSites();
DhopFaceTime-=usecond();
st.Prepare();
@ -328,7 +326,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
double start = usecond();
nthreads -= ncomms;
int ttid = tid - ncomms;
int n = U._grid->oSites(); // 4d vol
int n = U.Grid()->oSites(); // 4d vol
int chunk = n / nthreads;
int rem = n % nthreads;
int myblock, myn;
@ -341,17 +339,22 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
}
// do the compute
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss;
// Interior = 1; Exterior = 0; must implement for staggered
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,1,0); //<---------
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<---------
}
} else {
for (int ss = myblock; ss < myblock+myn; ++ss) {
// Interior = 1; Exterior = 0;
int sU = ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,1,0); //<------------
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<------------
}
}
ptime = usecond() - start;
@ -372,18 +375,23 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
DhopFaceTime+=usecond();
DhopComputeTime2-=usecond();
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
thread_for( ss,sz,{
int sU = st.surface_list[ss];
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,0,1); //<----------
}
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1); //<----------
});
} else {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
thread_for( ss,sz,{
int sU = st.surface_list[ss];
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,0,1);//<----------
}
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1);//<----------
});
}
DhopComputeTime2+=usecond();
#else
@ -398,7 +406,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
const FermionField &in, FermionField &out,int dag)
{
Compressor compressor;
int LLs = in._grid->_rdimensions[0];
int LLs = in.Grid()->_rdimensions[0];
@ -410,16 +418,20 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
DhopComputeTime -= usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
thread_for( ss,U.Grid()->oSites(),{
int sU=ss;
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
}
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
});
} else {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
thread_for( ss,U.Grid()->oSites(),{
int sU=ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
}
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
});
}
DhopComputeTime += usecond();
DhopTotalTime += usecond();
@ -432,50 +444,17 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
}
/*CHANGE END*/
/* ORG
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U,DoubledGaugeField & UUU,
const FermionField &in, FermionField &out,int dag)
{
Compressor compressor;
int LLs = in._grid->_rdimensions[0];
DhopTotalTime -= usecond();
DhopCommTime -= usecond();
st.HaloExchange(in,compressor);
DhopCommTime += usecond();
DhopComputeTime -= usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
if (dag == DaggerYes) {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU=ss;
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
}
} else {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU=ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
}
}
DhopComputeTime += usecond();
DhopTotalTime += usecond();
}
*/
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
{
DhopCalls+=1;
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.checkerboard==Even);
out.checkerboard = Odd;
assert(in.Checkerboard()==Even);
out.Checkerboard() = Odd;
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,UUUmuOdd,in,out,dag);
}
@ -483,11 +462,11 @@ template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
{
DhopCalls+=1;
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.checkerboard==Odd);
out.checkerboard = Even;
assert(in.Checkerboard()==Odd);
out.Checkerboard() = Even;
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,UUUmuEven,in,out,dag);
}
@ -495,10 +474,10 @@ template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
{
DhopCalls+=2;
conformable(in._grid,FermionGrid()); // verifies full grid
conformable(in._grid,out._grid);
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag);
}
@ -506,7 +485,7 @@ void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::Report(void)
{
std::vector<int> latt = GridDefaultLatt();
Coordinate latt = GridDefaultLatt();
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
RealD NP = _FourDimGrid->_Nprocessors;
RealD NN = _FourDimGrid->NodeCount();
@ -564,21 +543,21 @@ void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField
}
template <class Impl>
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerNo);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
RealD ImprovedStaggeredFermion5D<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerYes);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
@ -586,7 +565,7 @@ void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionFiel
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
@ -595,27 +574,27 @@ void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionF
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
typename FermionField::scalar_type scal(mass);
out = scal * in;
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Mooee(in, out);
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
out = (1.0 / (mass)) * in;
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,
FermionField &out) {
out.checkerboard = in.checkerboard;
FermionField &out) {
out.Checkerboard() = in.Checkerboard();
MooeeInv(in, out);
}
@ -624,31 +603,28 @@ void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,
////////////////////////////////////////////////////////
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)
{
assert(0);
assert(0);
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
void ImprovedStaggeredFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx)
{
assert(0);
assert(0);
}
FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion5D);
FermOpStaggeredVec5dTemplateInstantiate(ImprovedStaggeredFermion5D);
}}
NAMESPACE_END(Grid);

View File

@ -28,40 +28,35 @@ directory
/* END LEGAL */
#include <Grid/Grid.h>
namespace Grid {
namespace QCD {
#pragma once
const std::vector<int>
ImprovedStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
const std::vector<int>
ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
NAMESPACE_BEGIN(Grid);
/////////////////////////////////
// Constructor and gauge import
/////////////////////////////////
template <class Impl>
ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid,
RealD _mass,
RealD _c1, RealD _c2,RealD _u0,
const ImplParams &p)
: Kernels(p),
_grid(&Fgrid),
_cbgrid(&Hgrid),
Stencil(&Fgrid, npoint, Even, directions, displacements),
StencilEven(&Hgrid, npoint, Even, directions, displacements), // source is Even
StencilOdd(&Hgrid, npoint, Odd, directions, displacements), // source is Odd
mass(_mass),
Lebesgue(_grid),
LebesgueEvenOdd(_cbgrid),
Umu(&Fgrid),
UmuEven(&Hgrid),
UmuOdd(&Hgrid),
UUUmu(&Fgrid),
UUUmuEven(&Hgrid),
UUUmuOdd(&Hgrid) ,
_tmp(&Hgrid)
: Kernels(p),
_grid(&Fgrid),
_cbgrid(&Hgrid),
Stencil(&Fgrid, npoint, Even, directions, displacements,p),
StencilEven(&Hgrid, npoint, Even, directions, displacements,p), // source is Even
StencilOdd(&Hgrid, npoint, Odd, directions, displacements,p), // source is Odd
mass(_mass),
Lebesgue(_grid),
LebesgueEvenOdd(_cbgrid),
Umu(&Fgrid),
UmuEven(&Hgrid),
UmuOdd(&Hgrid),
UUUmu(&Fgrid),
UUUmuEven(&Hgrid),
UUUmuOdd(&Hgrid) ,
_tmp(&Hgrid)
{
int vol4;
int LLs=1;
@ -85,17 +80,17 @@ ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GaugeField &_Uthin, Gau
ImportGauge(_Uthin,_Ufat);
}
////////////////////////////////////////////////////////////
// Momentum space propagator should be
// https://arxiv.org/pdf/hep-lat/9712010.pdf
//
// mom space action.
// gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m
//
// must track through staggered flavour/spin reduction in literature to
// turn to free propagator for the one component chi field, a la page 4/5
// of above link to implmement fourier based solver.
////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////
// Momentum space propagator should be
// https://arxiv.org/pdf/hep-lat/9712010.pdf
//
// mom space action.
// gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m
//
// must track through staggered flavour/spin reduction in literature to
// turn to free propagator for the one component chi field, a la page 4/5
// of above link to implmement fourier based solver.
////////////////////////////////////////////////////////////
template <class Impl>
void ImprovedStaggeredFermion<Impl>::ImportGaugeSimple(const GaugeField &_Utriple,const GaugeField &_Ufat)
{
@ -177,21 +172,21 @@ void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin,const
template <class Impl>
RealD ImprovedStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerNo);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
RealD ImprovedStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerYes);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
@ -199,7 +194,7 @@ void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
@ -208,27 +203,27 @@ void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionFie
template <class Impl>
void ImprovedStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
typename FermionField::scalar_type scal(mass);
out = scal * in;
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Mooee(in, out);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
out = (1.0 / (mass)) * in;
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,
FermionField &out) {
out.checkerboard = in.checkerboard;
FermionField &out) {
out.Checkerboard() = in.Checkerboard();
MooeeInv(in, out);
}
@ -244,8 +239,8 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
Compressor compressor;
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
FermionField Btilde(B.Grid());
FermionField Atilde(B.Grid());
Atilde = A;
st.HaloExchange(B, compressor);
@ -255,10 +250,13 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
////////////////////////
// Call the single hop
////////////////////////
PARALLEL_FOR_LOOP
for (int sss = 0; sss < B._grid->oSites(); sss++) {
Kernels::DhopDir(st, U, UUU, st.CommBuf(), sss, sss, B, Btilde, mu,1);
}
auto U_v = U.View();
auto UUU_v = UUU.View();
auto B_v = B.View();
auto Btilde_v = Btilde.View();
thread_for(sss,B.Grid()->oSites(),{
Kernels::DhopDirKernel(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
});
// Force in three link terms
//
@ -288,11 +286,11 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _grid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
conformable(U.Grid(), _grid);
conformable(U.Grid(), V.Grid());
conformable(U.Grid(), mat.Grid());
mat.checkerboard = U.checkerboard;
mat.Checkerboard() = U.Checkerboard();
DerivInternal(Stencil, Umu, UUUmu, mat, U, V, dag);
}
@ -300,13 +298,13 @@ void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionFie
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
conformable(U.Grid(), _cbgrid);
conformable(U.Grid(), V.Grid());
conformable(U.Grid(), mat.Grid());
assert(V.checkerboard == Even);
assert(U.checkerboard == Odd);
mat.checkerboard = Odd;
assert(V.Checkerboard() == Even);
assert(U.Checkerboard() == Odd);
mat.Checkerboard() = Odd;
DerivInternal(StencilEven, UmuOdd, UUUmuOdd, mat, U, V, dag);
}
@ -314,48 +312,51 @@ void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionF
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
conformable(U.Grid(), _cbgrid);
conformable(U.Grid(), V.Grid());
conformable(U.Grid(), mat.Grid());
assert(V.checkerboard == Odd);
assert(U.checkerboard == Even);
mat.checkerboard = Even;
assert(V.Checkerboard() == Odd);
assert(U.Checkerboard() == Even);
mat.Checkerboard() = Even;
DerivInternal(StencilOdd, UmuEven, UUUmuEven, mat, U, V, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
{
DhopCalls+=2;
conformable(in._grid, _grid); // verifies full grid
conformable(in._grid, out._grid);
conformable(in.Grid(), _grid); // verifies full grid
conformable(in.Grid(), out.Grid());
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
DhopInternal(Stencil, Lebesgue, Umu, UUUmu, in, out, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
{
DhopCalls+=1;
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check
assert(in.checkerboard == Even);
out.checkerboard = Odd;
assert(in.Checkerboard() == Even);
out.Checkerboard() = Odd;
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, UUUmuOdd, in, out, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag) {
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag)
{
DhopCalls+=1;
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check
assert(in.checkerboard == Odd);
out.checkerboard = Even;
assert(in.Checkerboard() == Odd);
out.Checkerboard() = Even;
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, UUUmuEven, in, out, dag);
}
@ -370,11 +371,13 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
Compressor compressor;
Stencil.HaloExchange(in, compressor);
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopDir(Stencil, Umu, UUUmu, Stencil.CommBuf(), sss, sss, in, out, dir, disp);
}
auto Umu_v = Umu.View();
auto UUUmu_v = UUUmu.View();
auto in_v = in.View();
auto out_v = out.View();
thread_for( sss, in.Grid()->oSites(),{
Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
});
};
template <class Impl>
@ -400,7 +403,7 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
{
#ifdef GRID_OMP
Compressor compressor;
int len = U._grid->oSites();
int len = U.Grid()->oSites();
const int LLs = 1;
DhopTotalTime -= usecond();
@ -439,17 +442,21 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
}
// do the compute
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss;
// Interior = 1; Exterior = 0; must implement for staggered
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,1,0);
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0);
}
} else {
for (int ss = myblock; ss < myblock+myn; ++ss) {
// Interior = 1; Exterior = 0;
int sU = ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,1,0);
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0);
}
}
} else {
@ -464,17 +471,23 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
DhopFaceTime -= usecond();
DhopComputeTime2 -= usecond();
if (dag == DaggerYes) {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1);
}
} else {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1);
{
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
int sz=st.surface_list.size();
thread_for(ss,sz,{
int sU = st.surface_list[ss];
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1);
});
} else {
int sz=st.surface_list.size();
thread_for(ss,sz,{
int sU = st.surface_list[ss];
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1);
});
}
}
DhopComputeTime2 += usecond();
@ -500,15 +513,19 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Le
st.HaloExchange(in, compressor);
DhopCommTime += usecond();
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
DhopComputeTime -= usecond();
if (dag == DaggerYes) {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
}
thread_for(sss, in.Grid()->oSites(),{
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
});
} else {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
}
thread_for(sss, in.Grid()->oSites(),{
Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
});
}
DhopComputeTime += usecond();
DhopTotalTime += usecond();
@ -520,7 +537,7 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Le
template<class Impl>
void ImprovedStaggeredFermion<Impl>::Report(void)
{
std::vector<int> latt = GridDefaultLatt();
Coordinate latt = _grid->GlobalDimensions();
RealD volume = 1; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
RealD NP = _grid->_Nprocessors;
RealD NN = _grid->NodeCount();
@ -574,31 +591,25 @@ void ImprovedStaggeredFermion<Impl>::ZeroCounters(void)
////////////////////////////////////////////////////////
template <class Impl>
void ImprovedStaggeredFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)
{
assert(0);
assert(0);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
void ImprovedStaggeredFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx)
{
assert(0);
assert(0);
}
FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion);
//AdjointFermOpTemplateInstantiate(ImprovedStaggeredFermion);
//TwoIndexFermOpTemplateInstantiate(ImprovedStaggeredFermion);
}}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,453 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
Vector<Coeff_t> &lower, Vector<Coeff_t> &diag, Vector<Coeff_t> &upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1;
spinor tmp2;
for(int s=0; s<Ls; s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1, psi(idx_u));
spProj5p(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
Vector<Coeff_t> &lower, Vector<Coeff_t> &diag, Vector<Coeff_t> &upper,
Vector<Coeff_t> &shift_coeffs)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
auto pm = this->pm;
int shift_s = (pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1;
spinor tmp2;
spinor tmp;
for(int s=0; s<Ls; s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1, psi(idx_u));
spProj5p(tmp2, psi(idx_l));
if(pm == 1){ spProj5p(tmp, psi(ss+shift_s)); }
else { spProj5m(tmp, psi(ss+shift_s)); }
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 +lower[s]*tmp2 + shift_coeffs[s]*tmp);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
Vector<Coeff_t> &lower, Vector<Coeff_t> &diag, Vector<Coeff_t> &upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(), {
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1, tmp2;
for(int s=0; s<Ls; s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1, psi(idx_u));
spProj5m(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
Vector<Coeff_t> &lower, Vector<Coeff_t> &diag, Vector<Coeff_t> &upper,
Vector<Coeff_t> &shift_coeffs)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
auto pm = this->pm;
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1, tmp2, tmp;
tmp1=Zero();
coalescedWrite(chi[ss+Ls-1],tmp1);
for(int s=0; s<Ls; s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1, psi(idx_u));
spProj5m(tmp2, psi(idx_l));
if(s==(Ls-1)) coalescedWrite(chi[ss+s], chi(ss+s)+ diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
else coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
if(pm == 1){ spProj5p(tmp, psi(ss+s)); }
else { spProj5m(tmp, psi(ss+s)); }
coalescedWrite(chi[ss+shift_s],chi(ss+shift_s)+shift_coeffs[s]*tmp);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &chi_i)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
// Apply (L^{\prime})^{-1}
coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp, chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp);
}
});
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionField &chi_i)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
auto pm = this->pm;
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
auto pMooeeInv_shift_lc = &MooeeInv_shift_lc[0];
auto pMooeeInv_shift_norm = &MooeeInv_shift_norm[0];
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2,tmp2_spProj;
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0]
tmp2 = pMooeeInv_shift_lc[0]*psi(ss);
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp1);
tmp2 = tmp2 + pMooeeInv_shift_lc[s]*psi(ss+s);
}
if(pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else { spProj5m(tmp2_spProj, tmp2); }
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp1);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls-1])*tmp1);
}
// chi[ss+Ls-1] = (1.0/pdee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
spProj5m(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + pMooeeInv_shift_norm[Ls-1]*tmp2_spProj);
// Apply U^{-1} and add shift term
for(int s=Ls-2; s>=0; s--){
coalescedWrite(chi[ss+s] , chi(ss+s) - puee[s]*tmp1);
spProj5m(tmp1, chi(ss+s));
coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInv_shift_norm[s]*tmp2_spProj);
}
});
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionField &chi_i)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi_i,chi_i); return; }
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
// Apply (U^{\prime})^{-dag}
coalescedWrite(chi[ss], psi(ss));
for(int s=1; s<Ls; s++){
spProj5m(tmp, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - puee[s-1]*tmp);
}
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pueem[s]*tmp);
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pleem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp, chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp);
}
});
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, FermionField &chi_i)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
auto pm = this->pm;
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0];
auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0];
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2,tmp2_spProj;
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
coalescedWrite(chi[ss], psi(ss));
tmp2 = pMooeeInvDag_shift_lc[0]*psi(ss);
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s],psi(ss+s) - puee[s-1]*tmp1);
tmp2 = tmp2 + pMooeeInvDag_shift_lc[s]*psi(ss+s);
}
if(pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else { spProj5m(tmp2_spProj, tmp2);}
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pueem[s]*tmp1);
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pleem[s]/pdee[Ls-1])*tmp1);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
spProj5p(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + pMooeeInvDag_shift_norm[Ls-1]*tmp2_spProj);
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp1);
spProj5p(tmp1, chi(ss+s));
coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInvDag_shift_norm[s]*tmp2_spProj);
}
});
this->MooeeInvTime += usecond();
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,407 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
template<class Impl>
MobiusEOFAFermion<Impl>::MobiusEOFAFermion(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3,
RealD _shift, int _pm, RealD _M5,
RealD _b, RealD _c, const ImplParams &p) :
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
_shift, _pm, _M5, _b, _c, p)
{
int Ls = this->Ls;
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps, this->Ls);
assert(zdata->n == this->Ls);
std::cout << GridLogMessage << "MobiusEOFAFermion (b=" << _b <<
",c=" << _c << ") with Ls=" << Ls << std::endl;
this->SetCoefficientsTanh(zdata, _b, _c);
std::cout << GridLogMessage << "EOFA parameters: (mq1=" << _mq1 <<
",mq2=" << _mq2 << ",mq3=" << _mq3 << ",shift=" << _shift <<
",pm=" << _pm << ")" << std::endl;
Approx::zolotarev_free(zdata);
if(_shift != 0.0){
SetCoefficientsPrecondShiftOps();
} else {
Mooee_shift.resize(Ls, 0.0);
MooeeInv_shift_lc.resize(Ls, 0.0);
MooeeInv_shift_norm.resize(Ls, 0.0);
MooeeInvDag_shift_lc.resize(Ls, 0.0);
MooeeInvDag_shift_norm.resize(Ls, 0.0);
}
}
/****************************************************************
* Additional EOFA operators only called outside the inverter.
* Since speed is not essential, simple axpby-style
* implementations should be fine.
***************************************************************/
template<class Impl>
void MobiusEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
{
int Ls = this->Ls;
RealD alpha = this->alpha;
Din = Zero();
if((sign == 1) && (dag == 0)) { // \Omega_{+}
for(int s=0; s<Ls; ++s){
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,Ls-s-1)/std::pow(1.0+alpha,Ls-s), psi, s, 0);
}
} else if((sign == -1) && (dag == 0)) { // \Omega_{-}
for(int s=0; s<Ls; ++s){
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,s)/std::pow(1.0+alpha,s+1), psi, s, 0);
}
} else if((sign == 1 ) && (dag == 1)) { // \Omega_{+}^{\dagger}
for(int sp=0; sp<Ls; ++sp){
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,Ls-sp-1)/std::pow(1.0+alpha,Ls-sp), psi, 0, sp);
}
} else if((sign == -1) && (dag == 1)) { // \Omega_{-}^{\dagger}
for(int sp=0; sp<Ls; ++sp){
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,sp)/std::pow(1.0+alpha,sp+1), psi, 0, sp);
}
}
}
// This is the operator relating the usual Ddwf to TWQCD's EOFA Dirac operator (arXiv:1706.05843, Eqn. 6).
// It also relates the preconditioned and unpreconditioned systems described in Appendix B.2.
template<class Impl>
void MobiusEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
RealD b = 0.5 * ( 1.0 + this->alpha );
RealD c = 0.5 * ( 1.0 - this->alpha );
RealD mq1 = this->mq1;
for(int s=0; s<Ls; ++s){
if(s == 0) {
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
axpby_ssp_pplus (chi, 1.0, chi, mq1*c, psi, s, Ls-1);
} else if(s == (Ls-1)) {
axpby_ssp_pminus(chi, b, psi, mq1*c, psi, s, 0);
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
} else {
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
RealD m = this->mq1;
RealD c = 0.5 * this->alpha;
RealD d = 0.5;
RealD DtInv_p(0.0), DtInv_m(0.0);
RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls);
FermionField tmp(this->FermionGrid());
for(int s=0; s<Ls; ++s){
for(int sp=0; sp<Ls; ++sp){
DtInv_p = m * std::pow(-1.0,s-sp+1) * std::pow(c-d,Ls+s-sp) / std::pow(c+d,s-sp+1) / N;
DtInv_p += (s < sp) ? 0.0 : std::pow(-1.0,s-sp) * std::pow(c-d,s-sp) / std::pow(c+d,s-sp+1);
DtInv_m = m * std::pow(-1.0,sp-s+1) * std::pow(c-d,Ls+sp-s) / std::pow(c+d,sp-s+1) / N;
DtInv_m += (s > sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1);
if(sp == 0){
axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp);
axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp);
} else {
axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp);
axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp);
}
}}
}
/*****************************************************************************************************/
template<class Impl>
RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
{
FermionField Din(psi.Grid());
this->Meooe5D(psi, Din);
this->DW(Din, chi, DaggerNo);
axpby(chi, 1.0, 1.0, chi, psi);
this->M5D(psi, chi);
return(norm2(chi));
}
template<class Impl>
RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
{
FermionField Din(psi.Grid());
this->DW(psi, Din, DaggerYes);
this->MeooeDag5D(Din, chi);
this->M5Ddag(psi, chi);
axpby(chi, 1.0, 1.0, chi, psi);
return(norm2(chi));
}
/********************************************************************
* Performance critical fermion operators called inside the inverter
********************************************************************/
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5Ddag_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
// half checkerboard operations
template<class Impl>
void MobiusEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
// coefficients of Mooee
Vector<Coeff_t> diag = this->bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] *= -this->mq1;
lower[0] *= -this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, psi, chi, lower, diag, upper); }
// fused M + shift operation
else { this->M5D_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
// coefficients of MooeeDag
Vector<Coeff_t> diag = this->bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
if(s==0) {
upper[s] = -this->cee[s+1];
lower[s] = this->mq1*this->cee[Ls-1];
} else if(s==(Ls-1)) {
upper[s] = this->mq1*this->cee[0];
lower[s] = -this->cee[s-1];
} else {
upper[s] = -this->cee[s+1];
lower[s] = -this->cee[s-1];
}
}
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, psi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5Ddag_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
/****************************************************************************************/
// Computes coefficients for applying Cayley preconditioned shift operators
// (Mooee + \Delta) --> Mooee_shift
// (Mooee + \Delta)^{-1} --> MooeeInv_shift_lc, MooeeInv_shift_norm
// (Mooee + \Delta)^{-dag} --> MooeeInvDag_shift_lc, MooeeInvDag_shift_norm
// For the latter two cases, the operation takes the form
// [ (Mooee + \Delta)^{-1} \psi ]_{i} = Mooee_{ij} \psi_{j} +
// ( MooeeInv_shift_norm )_{i} ( \sum_{j} [ MooeeInv_shift_lc ]_{j} P_{pm} \psi_{j} )
template<class Impl>
void MobiusEOFAFermion<Impl>::SetCoefficientsPrecondShiftOps()
{
int Ls = this->Ls;
int pm = this->pm;
RealD alpha = this->alpha;
RealD k = this->k;
RealD mq1 = this->mq1;
RealD shift = this->shift;
// Initialize
Mooee_shift.resize(Ls);
MooeeInv_shift_lc.resize(Ls);
MooeeInv_shift_norm.resize(Ls);
MooeeInvDag_shift_lc.resize(Ls);
MooeeInvDag_shift_norm.resize(Ls);
// Construct Mooee_shift
int idx(0);
Coeff_t N = ( (pm == 1) ? 1.0 : -1.0 ) * (2.0*shift*k) *
( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
for(int s=0; s<Ls; ++s){
idx = (pm == 1) ? (s) : (Ls-1-s);
Mooee_shift[idx] = N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1);
}
// Tridiagonal solve for MooeeInvDag_shift_lc
{
Coeff_t m(0.0);
Vector<Coeff_t> d = Mooee_shift;
Vector<Coeff_t> u(Ls,0.0);
Vector<Coeff_t> y(Ls,0.0);
Vector<Coeff_t> q(Ls,0.0);
if(pm == 1){ u[0] = 1.0; }
else{ u[Ls-1] = 1.0; }
// Tridiagonal matrix algorithm + Sherman-Morrison formula
//
// We solve
// ( Mooee' + u \otimes v ) MooeeInvDag_shift_lc = Mooee_shift
// where Mooee' is the tridiagonal part of Mooee_{+}, and
// u = (1,0,...,0) and v = (0,...,0,mq1*cee[0]) are chosen
// so that the outer-product u \otimes v gives the (0,Ls-1)
// entry of Mooee_{+}.
//
// We do this as two solves: Mooee'*y = d and Mooee'*q = u,
// and then construct the solution to the original system
// MooeeInvDag_shift_lc = y - <v,y> / ( 1 + <v,q> ) q
if(pm == 1){
for(int s=1; s<Ls; ++s){
m = -this->cee[s] / this->bee[s-1];
d[s] -= m*d[s-1];
u[s] -= m*u[s-1];
}
}
y[Ls-1] = d[Ls-1] / this->bee[Ls-1];
q[Ls-1] = u[Ls-1] / this->bee[Ls-1];
for(int s=Ls-2; s>=0; --s){
if(pm == 1){
y[s] = d[s] / this->bee[s];
q[s] = u[s] / this->bee[s];
} else {
y[s] = ( d[s] + this->cee[s]*y[s+1] ) / this->bee[s];
q[s] = ( u[s] + this->cee[s]*q[s+1] ) / this->bee[s];
}
}
// Construct MooeeInvDag_shift_lc
for(int s=0; s<Ls; ++s){
if(pm == 1){
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[0]*y[Ls-1] /
(1.0+mq1*this->cee[0]*q[Ls-1]) * q[s];
} else {
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[Ls-1]*y[0] /
(1.0+mq1*this->cee[Ls-1]*q[0]) * q[s];
}
}
// Compute remaining coefficients
N = (pm == 1) ? (1.0 + MooeeInvDag_shift_lc[Ls-1]) : (1.0 + MooeeInvDag_shift_lc[0]);
for(int s=0; s<Ls; ++s){
// MooeeInv_shift_lc
if(pm == 1){ MooeeInv_shift_lc[s] = pow(this->bee[s],s) * pow(this->cee[s],Ls-1-s); }
else { MooeeInv_shift_lc[s] = pow(this->bee[s],Ls-1-s) * pow(this->cee[s],s); }
// MooeeInv_shift_norm
MooeeInv_shift_norm[s] = -MooeeInvDag_shift_lc[s] /
( pow(this->bee[s],Ls) + mq1*pow(this->cee[s],Ls) ) / N;
// MooeeInvDag_shift_norm
if(pm == 1){ MooeeInvDag_shift_norm[s] = -pow(this->bee[s],s) * pow(this->cee[s],(Ls-1-s)) /
( pow(this->bee[s],Ls) + mq1*pow(this->cee[s],Ls) ) / N; }
else{ MooeeInvDag_shift_norm[s] = -pow(this->bee[s],(Ls-1-s)) * pow(this->cee[s],s) /
( pow(this->bee[s],Ls) + mq1*pow(this->cee[s],Ls) ) / N; }
}
}
}
// Recompute coefficients for a different value of shift constant
template<class Impl>
void MobiusEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
{
this->shift = new_shift;
if(new_shift != 0.0){
SetCoefficientsPrecondShiftOps();
} else {
int Ls = this->Ls;
Mooee_shift.resize(Ls,0.0);
MooeeInv_shift_lc.resize(Ls,0.0);
MooeeInv_shift_norm.resize(Ls,0.0);
MooeeInvDag_shift_lc.resize(Ls,0.0);
MooeeInvDag_shift_norm.resize(Ls,0.0);
}
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,450 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h>
NAMESPACE_BEGIN(Grid);
template<class Impl>
void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
// this does both dag and undag but is trivial; make a common helper routing
int Ls = this->Ls;
this->DhopDir(psi,chi,dir,disp);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(chi,-scale,chi,0.0,chi,s,s);
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
}
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
{
int Ls = this->Ls;
if ( psi.Checkerboard() == Odd ) {
this->DhopEO(psi,chi,DaggerNo);
} else {
this->DhopOE(psi,chi,DaggerNo);
}
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(chi,-scale,chi,0.0,chi,s,s);
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
}
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Mooee_internal(const FermionField &psi, FermionField &chi,int dag)
{
// again dag and undag are trivially related
int sign = dag ? (-1) : 1;
int Ls = this->Ls;
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
// Do each 2x2 block aligned at s and multiplies Dw site diagonal by G5 so Hw
ag5xpby_ssp(chi,-dw_diag*scale,psi,amax*sqrt(qq)*scale,psi, s ,s+1);
ag5xpby_ssp(chi, dw_diag*scale,psi,amax*sqrt(qq)*scale,psi, s+1,s);
axpby_ssp (chi, 1.0, chi,sqrt(amax*pp)*scale*sign,psi,s+1,Ls-1);
}
{
RealD R=(1+mass)/(1-mass);
//R g5 psi[Ls-1] + p[0] H
ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale*dw_diag/amax,psi,Ls-1,Ls-1);
for(int b=0;b<nblock;b++){
int s = 2*b+1;
RealD pp = p[nblock-1-b];
axpby_ssp(chi,1.0,chi,-sqrt(amax*pp)*scale*sign,psi,Ls-1,s);
}
}
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInv_internal(const FermionField &psi, FermionField &chi,int dag)
{
int sign = dag ? (-1) : 1;
int Ls = this->Ls;
FermionField tmp(psi.Grid());
///////////////////////////////////////////////////////////////////////////////////////
//Linv
///////////////////////////////////////////////////////////////////////////////////////
int nblock=(Ls-1)/2;
axpy(chi,0.0,psi,psi); // Identity piece
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=sign*sqrt(amax*amax*amax*pp*qq) / ( dw_diag*dw_diag + amax*amax* qq);
RealD coeff2=sign*sqrt(amax*pp)*dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
axpby_ssp (chi,1.0,chi,coeff1,psi,Ls-1,s);
axpbg5y_ssp(chi,1.0,chi,coeff2,psi,Ls-1,s+1);
}
///////////////////////////////////////////////////////////////////////////////////////
//Dinv (note D isn't really diagonal -- just diagonal enough that we can still invert)
// Compute Seeinv (coeff of gamma5)
///////////////////////////////////////////////////////////////////////////////////////
RealD R=(1+mass)/(1-mass);
RealD Seeinv = R + p[nblock]*dw_diag/amax;
for(int b=0;b<nblock;b++){
Seeinv += p[nblock-1-b]*dw_diag/amax / ( dw_diag*dw_diag/amax/amax + q[nblock-1-b]);
}
Seeinv = 1.0/Seeinv;
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
RealD coeff2=amax*sqrt(qq) / ( dw_diag*dw_diag + amax*amax* qq);
ag5xpby_ssp (tmp,-coeff1,chi,coeff2,chi,s,s+1);
ag5xpby_ssp (tmp, coeff1,chi,coeff2,chi,s+1,s);
}
ag5xpby_ssp (tmp, Seeinv,chi,0.0,chi,Ls-1,Ls-1);
///////////////////////////////////////////////////////////////////////////////////////
// Uinv
///////////////////////////////////////////////////////////////////////////////////////
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=-sign*sqrt(amax*amax*amax*pp*qq) / ( dw_diag*dw_diag + amax*amax* qq);
RealD coeff2=-sign*sqrt(amax*pp)*dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
axpby_ssp (chi,1.0/scale,tmp,coeff1/scale,tmp,s,Ls-1);
axpbg5y_ssp(chi,1.0/scale,tmp,coeff2/scale,tmp,s+1,Ls-1);
}
axpby_ssp (chi, 1.0/scale,tmp,0.0,tmp,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::M_internal(const FermionField &psi, FermionField &chi,int dag)
{
FermionField D(psi.Grid());
int Ls = this->Ls;
int sign = dag ? (-1) : 1;
// For partial frac Hw case (b5=c5=1) chroma quirkily computes
//
// Conventions for partfrac appear to be a mess.
// Tony's Nara lectures have
//
// BlockDiag( H/p_i 1 | 1 )
// ( 1 p_i H / q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R +p0 H )
//
//Chroma ( -2H 2sqrt(q_i) | 0 )
// (2 sqrt(q_i) 2H | 2 sqrt(p_i) )
// ---------------------------------
// ( 0 -2 sqrt(p_i) | 2 R gamma_5 + p0 2H
//
// Edwards/Joo/Kennedy/Wenger
//
// Here, the "beta's" selected by chroma to scale the unphysical bulk constraint fields
// incorporate the approx scale factor. This is obtained by propagating the
// scale on "H" out to the off diagonal elements as follows:
//
// BlockDiag( H/p_i 1 | 1 )
// ( 1 p_i H / q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R + p_0 H )
//
// becomes:
// BlockDiag( H/ sp_i 1 | 1 )
// ( 1 sp_i H / s^2q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R + p_0/s H )
//
//
// This is implemented in Chroma by
// p0' = p0/approxMax
// p_i' = p_i*approxMax
// q_i' = q_i*approxMax*approxMax
//
// After the equivalence transform is applied the matrix becomes
//
//Chroma ( -2H sqrt(q'_i) | 0 )
// (sqrt(q'_i) 2H | sqrt(p'_i) )
// ---------------------------------
// ( 0 -sqrt(p'_i) | 2 R gamma_5 + p'0 2H
//
// = ( -2H sqrt(q_i)amax | 0 )
// (sqrt(q_i)amax 2H | sqrt(p_i*amax) )
// ---------------------------------
// ( 0 -sqrt(p_i)*amax | 2 R gamma_5 + p0/amax 2H
//
this->DW(psi,D,DaggerNo);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
double pp = p[nblock-1-b];
double qq = q[nblock-1-b];
// Do each 2x2 block aligned at s and
ag5xpby_ssp(chi,-1.0*scale,D,amax*sqrt(qq)*scale,psi, s ,s+1); // Multiplies Dw by G5 so Hw
ag5xpby_ssp(chi, 1.0*scale,D,amax*sqrt(qq)*scale,psi, s+1,s);
// Pick up last column
axpby_ssp (chi, 1.0, chi,sqrt(amax*pp)*scale*sign,psi,s+1,Ls-1);
}
{
double R=(1+this->mass)/(1-this->mass);
//R g5 psi[Ls] + p[0] H
ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale/amax,D,Ls-1,Ls-1);
for(int b=0;b<nblock;b++){
int s = 2*b+1;
double pp = p[nblock-1-b];
axpby_ssp(chi,1.0,chi,-sqrt(amax*pp)*scale*sign,psi,Ls-1,s);
}
}
}
template<class Impl>
RealD PartialFractionFermion5D<Impl>::M (const FermionField &in, FermionField &out)
{
M_internal(in,out,DaggerNo);
return norm2(out);
}
template<class Impl>
RealD PartialFractionFermion5D<Impl>::Mdag (const FermionField &in, FermionField &out)
{
M_internal(in,out,DaggerYes);
return norm2(out);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe (const FermionField &in, FermionField &out)
{
Meooe_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MeooeDag (const FermionField &in, FermionField &out)
{
Meooe_internal(in,out,DaggerYes);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Mooee (const FermionField &in, FermionField &out)
{
Mooee_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeDag (const FermionField &in, FermionField &out)
{
Mooee_internal(in,out,DaggerYes);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInv (const FermionField &in, FermionField &out)
{
MooeeInv_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInvDag (const FermionField &in, FermionField &out)
{
MooeeInv_internal(in,out,DaggerYes);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void PartialFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDeriv(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDerivOE(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDerivEO(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){
SetCoefficientsZolotarev(1.0/scale,zdata);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){
// check on degree matching
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
int Ls = this->Ls;
assert(Ls == (2*zdata->da -1) );
// Part frac
// RealD R;
R=(1+mass)/(1-mass);
dw_diag = (4.0-this->M5);
// std::vector<RealD> p;
// std::vector<RealD> q;
p.resize(zdata->da);
q.resize(zdata->dd);
for(int n=0;n<zdata->da;n++){
p[n] = zdata -> alpha[n];
}
for(int n=0;n<zdata->dd;n++){
q[n] = -zdata -> ap[n];
}
scale= part_frac_chroma_convention ? 2.0 : 1.0; // Chroma conventions annoy me
amax=zolo_hi;
}
template<class Impl>
void PartialFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
conformable(solution5d.Grid(),this->FermionGrid());
conformable(exported4d.Grid(),this->GaugeGrid());
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
int Ls = this->Ls;
conformable(imported5d.Grid(),this->FermionGrid());
conformable(input4d.Grid() ,this->GaugeGrid());
FermionField tmp(this->FermionGrid());
tmp=Zero();
InsertSlice(input4d, tmp, Ls-1, Ls-1);
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
this->Dminus(tmp,imported5d);
}
// Constructors
template<class Impl>
PartialFractionFermion5D<Impl>::PartialFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,
const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5,p),
mass(_mass)
{
int Ls = this->Ls;
assert((Ls&0x1)==1); // Odd Ls required
int nrational=Ls-1;
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);
// NB: chroma uses a cast to "float" for the zolotarev range(!?).
// this creates a real difference in the operator which I do not like but we can replicate here
// to demonstrate compatibility
// RealD eps = (zolo_lo / zolo_hi);
// zdata = bfm_zolotarev(eps,nrational,0);
SetCoefficientsTanh(zdata,1.0);
Approx::zolotarev_free(zdata);
}
NAMESPACE_END(Grid);

View File

@ -26,6 +26,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/Grid.h>
#ifdef AVX512
@ -586,11 +588,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
VADD(UChi_00,UChi_10,UChi_00) \
VADD(UChi_01,UChi_11,UChi_01) \
VADD(UChi_02,UChi_12,UChi_02) ); \
asm ( \
VSTORE(0,%0,pUChi_00) \
VSTORE(1,%0,pUChi_01) \
VSTORE(2,%0,pUChi_02) \
: : "r" (out) : "memory" );
asm ( \
VSTORE(0,%0,pUChi_00) \
VSTORE(1,%0,pUChi_01) \
VSTORE(2,%0,pUChi_02) \
: : "r" (out) : "memory" );
// FIXME is sign right in the VSUB ?
#define nREDUCEa(out) \
@ -613,20 +615,20 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
permute##dir(Chi_1,Chi_1);\
permute##dir(Chi_2,Chi_2);
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
{
assert(0);
};
//#define CONDITIONAL_MOVE(l,o,out) if ( l ) { out = (uint64_t) &in._odata[o] ; } else { out =(uint64_t) &buf[o]; }
//#define CONDITIONAL_MOVE(l,o,out) if ( l ) { out = (uint64_t) &in[o] ; } else { out =(uint64_t) &buf[o]; }
#define CONDITIONAL_MOVE(l,o,out) { const SiteSpinor *ptr = l? in_p : buf; out = (uint64_t) &ptr[o]; }
@ -673,22 +675,23 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
CONDITIONAL_MOVE(l3,o3,addr3); \
PF_CHI(addr3); \
\
gauge0 =(uint64_t)&UU._odata[sU]( X ); \
gauge1 =(uint64_t)&UU._odata[sU]( Y ); \
gauge2 =(uint64_t)&UU._odata[sU]( Z ); \
gauge3 =(uint64_t)&UU._odata[sU]( T );
gauge0 =(uint64_t)&UU[sU]( X ); \
gauge1 =(uint64_t)&UU[sU]( Y ); \
gauge2 =(uint64_t)&UU[sU]( Z ); \
gauge3 =(uint64_t)&UU[sU]( T );
// This is the single precision 5th direction vectorised kernel
#include <Grid/simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
const SiteSpinor *in_p; in_p = &in[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
@ -719,7 +722,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
addr0 = (uint64_t) &out[sF];
if ( dag ) {
nREDUCE(addr0);
} else {
@ -734,14 +737,15 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
#include <Grid/simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionFieldView &in, FermionFieldView &out, int dag)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
const SiteSpinor *in_p; in_p = &in[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
@ -771,7 +775,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
addr0 = (uint64_t) &out[sF];
if ( dag ) {
nREDUCE(addr0);
} else {
@ -818,14 +822,15 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
#include <Grid/simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
const SiteSpinor *in_p; in_p = &in[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
@ -872,7 +877,7 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
addr0 = (uint64_t) &out[sF];
if ( dag ) {
nREDUCEa(addr0);
} else {
@ -886,14 +891,15 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
#include <Grid/simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
const SiteSpinor *in_p; in_p = &in[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
@ -940,7 +946,7 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
addr0 = (uint64_t) &out[sF];
if ( dag ) {
nREDUCEa(addr0);
} else {
@ -952,17 +958,5 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
#endif
}
#define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \
template void CLASS<IMPL>::FUNC(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U, \
DoubledGaugeField &UUU, \
SiteSpinor *buf, int LLs, \
int sU, const FermionField &in, FermionField &out,int dag);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplD);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplF);
}}
NAMESPACE_END(Grid);

View File

@ -28,6 +28,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#include <Grid/Grid.h>
#pragma once
NAMESPACE_BEGIN(Grid);
#define LOAD_CHI(b) \
const SiteSpinor & ref (b[offset]); \
@ -38,7 +41,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
// To splat or not to splat depends on the implementation
#define MULT(A,UChi) \
auto & ref(U._odata[sU](A)); \
auto & ref(U[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
@ -59,7 +62,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
UChi ## _2 += U_22*Chi_2;
#define MULT_ADD(U,A,UChi) \
auto & ref(U._odata[sU](A)); \
auto & ref(U[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
@ -92,7 +95,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
local = SE->_is_local; \
perm = SE->_permute; \
if ( local ) { \
LOAD_CHI(in._odata); \
LOAD_CHI(in); \
if ( perm) { \
PERMUTE_DIR(Perm); \
} \
@ -120,14 +123,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
local = SE->_is_local; \
perm = SE->_permute; \
if ( local ) { \
LOAD_CHI(in._odata); \
LOAD_CHI(in); \
if ( perm) { \
PERMUTE_DIR(Perm); \
} \
} else if ( st.same_node[Dir] ) { \
LOAD_CHI(buf); \
} \
if (SE->_is_local || st.same_node[Dir] ) { \
if (local || st.same_node[Dir] ) { \
MULT_ADD(U,Dir,even); \
}
@ -135,22 +138,18 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
SE=st.GetEntry(ptype,Dir+skew,sF); \
offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
if ((!local) && (!st.same_node[Dir]) ) { \
nmu++; \
{ LOAD_CHI(buf); } \
{ MULT_ADD(U,Dir,even); } \
}
namespace Grid {
namespace QCD {
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
const FermionFieldView &in, FermionFieldView &out,int dag)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -213,16 +212,16 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
result()()(1) = even_1 + odd_1;
result()()(2) = even_2 + odd_2;
}
vstream(out._odata[sF],result);
vstream(out[sF],result);
}
}
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
const FermionFieldView &in, FermionFieldView &out,int dag)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -249,7 +248,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
Simd U_22;
SiteSpinor result;
int offset,local,perm, ptype;
int offset, ptype, local, perm;
StencilEntry *SE;
int skew;
@ -257,8 +256,8 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
for(int s=0;s<LLs;s++){
int sF=s+LLs*sU;
even_0 = zero; even_1 = zero; even_2 = zero;
odd_0 = zero; odd_1 = zero; odd_2 = zero;
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
skew = 0;
HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);
@ -289,16 +288,16 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
result()()(1) = even_1 + odd_1;
result()()(2) = even_2 + odd_2;
}
vstream(out._odata[sF],result);
vstream(out[sF],result);
}
}
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
const FermionFieldView &in, FermionFieldView &out,int dag)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -325,7 +324,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
Simd U_22;
SiteSpinor result;
int offset,local,perm, ptype;
int offset, ptype, local;
StencilEntry *SE;
int skew;
@ -333,8 +332,8 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
for(int s=0;s<LLs;s++){
int sF=s+LLs*sU;
even_0 = zero; even_1 = zero; even_2 = zero;
odd_0 = zero; odd_1 = zero; odd_2 = zero;
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
int nmu=0;
skew = 0;
HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);
@ -366,34 +365,29 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
result()()(1) = even_1 + odd_1;
result()()(2) = even_2 + odd_2;
}
out._odata[sF] = out._odata[sF] + result;
out[sF] = out[sF] + result;
}
}
}
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U,DoubledGaugeField &UUU, \
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
SiteSpinor *buf, int LLs, int sU, \
const FermionField &in, FermionField &out, int dag); \
const FermionFieldView &in, FermionFieldView &out, int dag); \
\
template void StaggeredKernels<IMPL>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U,DoubledGaugeField &UUU, \
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
SiteSpinor *buf, int LLs, int sU, \
const FermionField &in, FermionField &out, int dag); \
const FermionFieldView &in, FermionFieldView &out, int dag); \
\
template void StaggeredKernels<IMPL>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U,DoubledGaugeField &UUU, \
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
SiteSpinor *buf, int LLs, int sU, \
const FermionField &in, FermionField &out, int dag); \
const FermionFieldView &in, FermionFieldView &out, int dag); \
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD);
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF);
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD);
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplF);
#undef LOAD_CHI
NAMESPACE_END(Grid);
}
}

View File

@ -28,40 +28,38 @@ directory
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
namespace QCD {
#pragma once
int StaggeredKernelsStatic::Opt= StaggeredKernelsStatic::OptGeneric;
int StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsAndCompute;
NAMESPACE_BEGIN(Grid);
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \
SE = st.GetEntry(ptype, Dir+skew, sF); \
if (SE->_is_local ) { \
if (SE->_permute) { \
chi_p = &chi; \
permute(chi, in._odata[SE->_offset], ptype); \
permute(chi, in[SE->_offset], ptype); \
} else { \
chi_p = &in._odata[SE->_offset]; \
chi_p = &in[SE->_offset]; \
} \
} else { \
chi_p = &buf[SE->_offset]; \
} \
multLink(Uchi, U._odata[sU], *chi_p, Dir);
multLink(Uchi, U[sU], *chi_p, Dir);
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \
SE = st.GetEntry(ptype, Dir+skew, sF); \
if (SE->_is_local ) { \
if (SE->_permute) { \
chi_p = &chi; \
permute(chi, in._odata[SE->_offset], ptype); \
permute(chi, in[SE->_offset], ptype); \
} else { \
chi_p = &in._odata[SE->_offset]; \
chi_p = &in[SE->_offset]; \
} \
} else if ( st.same_node[Dir] ) { \
chi_p = &buf[SE->_offset]; \
} \
if (SE->_is_local || st.same_node[Dir] ) { \
multLink(Uchi, U._odata[sU], *chi_p, Dir); \
multLink(Uchi, U[sU], *chi_p, Dir); \
}
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
@ -69,7 +67,7 @@ int StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsAndCompute;
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
nmu++; \
chi_p = &buf[SE->_offset]; \
multLink(Uchi, U._odata[sU], *chi_p, Dir); \
multLink(Uchi, U[sU], *chi_p, Dir); \
}
template <class Impl>
@ -81,9 +79,9 @@ StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
////////////////////////////////////////////////////////////////////////////////////
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out, int dag) {
const FermionFieldView &in, FermionFieldView &out, int dag) {
const SiteSpinor *chi_p;
SiteSpinor chi;
SiteSpinor Uchi;
@ -114,7 +112,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
if ( dag ) {
Uchi = - Uchi;
}
vstream(out._odata[sF], Uchi);
vstream(out[sF], Uchi);
}
};
@ -123,9 +121,9 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
///////////////////////////////////////////////////
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag) {
const FermionFieldView &in, FermionFieldView &out,int dag) {
const SiteSpinor *chi_p;
SiteSpinor chi;
SiteSpinor Uchi;
@ -136,7 +134,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
for(int s=0;s<LLs;s++){
int sF=LLs*sU+s;
skew = 0;
Uchi=zero;
Uchi=Zero();
GENERIC_STENCIL_LEG_INT(U,Xp,skew,Impl::multLinkAdd);
GENERIC_STENCIL_LEG_INT(U,Yp,skew,Impl::multLinkAdd);
GENERIC_STENCIL_LEG_INT(U,Zp,skew,Impl::multLinkAdd);
@ -157,7 +155,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
if ( dag ) {
Uchi = - Uchi;
}
vstream(out._odata[sF], Uchi);
vstream(out[sF], Uchi);
}
};
@ -167,11 +165,11 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
///////////////////////////////////////////////////
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag) {
const FermionFieldView &in, FermionFieldView &out,int dag) {
const SiteSpinor *chi_p;
SiteSpinor chi;
// SiteSpinor chi;
SiteSpinor Uchi;
StencilEntry *SE;
int ptype;
@ -181,7 +179,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
for(int s=0;s<LLs;s++){
int sF=LLs*sU+s;
skew = 0;
Uchi=zero;
Uchi=Zero();
GENERIC_STENCIL_LEG_EXT(U,Xp,skew,Impl::multLinkAdd);
GENERIC_STENCIL_LEG_EXT(U,Yp,skew,Impl::multLinkAdd);
GENERIC_STENCIL_LEG_EXT(U,Zp,skew,Impl::multLinkAdd);
@ -202,9 +200,9 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
if ( nmu ) {
if ( dag ) {
out._odata[sF] = out._odata[sF] - Uchi;
out[sF] = out[sF] - Uchi;
} else {
out._odata[sF] = out._odata[sF] + Uchi;
out[sF] = out[sF] + Uchi;
}
}
}
@ -215,9 +213,9 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
////////////////////////////////////////////////////////////////////////////////////
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,
const FermionFieldView &in, FermionFieldView &out,
int interior,int exterior)
{
int dag=1;
@ -225,9 +223,9 @@ void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, Dou
};
template <class Impl>
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,
const FermionFieldView &in, FermionFieldView &out,
int interior,int exterior)
{
int dag=0;
@ -235,9 +233,9 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
};
template <class Impl>
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out,
int sU, const FermionFieldView &in, FermionFieldView &out,
int dag,int interior,int exterior)
{
switch(Opt) {
@ -277,8 +275,8 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
};
template <class Impl>
void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int disp)
void StaggeredKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
{
// Disp should be either +1,-1,+3,-3
// What about "dag" ?
@ -287,8 +285,6 @@ void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U, Do
assert(0);
}
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
NAMESPACE_END(Grid);
}}

View File

@ -26,23 +26,21 @@
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
//#include <Grid/Eigen/Dense>
#include <Grid/qcd/spin/Dirac.h>
namespace Grid
{
namespace QCD
{
#include <Grid/Grid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h>
NAMESPACE_BEGIN(Grid);
// *NOT* EO
template <class Impl>
RealD WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
{
FermionField temp(out._grid);
FermionField temp(out.Grid());
// Wilson term
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
this->Dhop(in, out, DaggerNo);
// Clover term
@ -55,10 +53,10 @@ RealD WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
template <class Impl>
RealD WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
{
FermionField temp(out._grid);
FermionField temp(out.Grid());
// Wilson term
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
this->Dhop(in, out, DaggerYes);
// Clover term
@ -72,7 +70,7 @@ template <class Impl>
void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
{
WilsonFermion<Impl>::ImportGauge(_Umu);
GridBase *grid = _Umu._grid;
GridBase *grid = _Umu.Grid();
typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid);
// Compute the field strength terms mu>nu
@ -93,27 +91,29 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
CloverTerm += fillCloverZT(Ez) * csw_t;
CloverTerm += diag_mass;
int lvol = _Umu._grid->lSites();
int lvol = _Umu.Grid()->lSites();
int DimRep = Impl::Dimension;
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
std::vector<int> lcoor;
typename SiteCloverType::scalar_object Qx = zero, Qxinv = zero;
Coordinate lcoor;
typename SiteCloverType::scalar_object Qx = Zero(), Qxinv = Zero();
for (int site = 0; site < lvol; site++)
{
grid->LocalIndexToLocalCoor(site, lcoor);
EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
peekLocalSite(Qx, CloverTerm, lcoor);
Qxinv = zero;
Qxinv = Zero();
//if (csw!=0){
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++)
EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b);
for (int b = 0; b < DimRep; b++){
auto zz = Qx()(j, k)(a, b);
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
}
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
EigenInvCloverOp = EigenCloverOp.inverse();
@ -169,15 +169,15 @@ void WilsonCloverFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField
template <class Impl>
void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv)
{
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
CloverFieldType *Clover;
assert(in.checkerboard == Odd || in.checkerboard == Even);
assert(in.Checkerboard() == Odd || in.Checkerboard() == Even);
if (dag)
{
if (in._grid->_isCheckerBoarded)
if (in.Grid()->_isCheckerBoarded)
{
if (in.checkerboard == Odd)
if (in.Checkerboard() == Odd)
{
Clover = (inv) ? &CloverTermInvDagOdd : &CloverTermDagOdd;
}
@ -195,10 +195,10 @@ void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionFie
}
else
{
if (in._grid->_isCheckerBoarded)
if (in.Grid()->_isCheckerBoarded)
{
if (in.checkerboard == Odd)
if (in.Checkerboard() == Odd)
{
// std::cout << "Calling clover term Odd" << std::endl;
Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd;
@ -209,7 +209,7 @@ void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionFie
Clover = (inv) ? &CloverTermInvEven : &CloverTermEven;
}
out = *Clover * in;
// std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl;
// std::cout << GridLogMessage << "*Clover.Checkerboard() " << (*Clover).Checkerboard() << std::endl;
}
else
{
@ -235,9 +235,4 @@ void WilsonCloverFermion<Impl>::MeeDeriv(GaugeField &mat, const FermionField &U,
assert(0); // not implemented yet
}
FermOpTemplateInstantiate(WilsonCloverFermion);
AdjointFermOpTemplateInstantiate(WilsonCloverFermion);
TwoIndexFermOpTemplateInstantiate(WilsonCloverFermion);
//GparityFermOpTemplateInstantiate(WilsonCloverFermion);
}
}
NAMESPACE_END(Grid);

View File

@ -36,13 +36,8 @@ Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
#include <Grid/perfmon/PerfCount.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
// S-direction is INNERMOST and takes no part in the parity.
const std::vector<int> WilsonFermion5DStatic::directions ({1,2,3,4, 1, 2, 3, 4});
const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1});
// 5d lattice for DWF.
template<class Impl>
WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
@ -56,9 +51,9 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
_FourDimGrid (&FourDimGrid),
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
Stencil (_FiveDimGrid,npoint,Even,directions,displacements,p),
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd
M5(_M5),
Umu(_FourDimGrid),
UmuEven(_FourDimRedBlackGrid),
@ -105,8 +100,8 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
for(int d=0;d<4;d++){
assert(FourDimGrid._simd_layout[d]=1);
assert(FourDimRedBlackGrid._simd_layout[d]=1);
assert(FourDimGrid._simd_layout[d]==1);
assert(FourDimRedBlackGrid._simd_layout[d]==1);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
}
@ -141,7 +136,7 @@ void WilsonFermion5D<Impl>::Report(void)
RealD NP = _FourDimGrid->_Nprocessors;
RealD NN = _FourDimGrid->NodeCount();
RealD volume = Ls;
std::vector<int> latt = _FourDimGrid->GlobalDimensions();
Coordinate latt = _FourDimGrid->GlobalDimensions();
for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
if ( DhopCalls > 0 ) {
@ -221,7 +216,7 @@ void WilsonFermion5D<Impl>::ZeroCounters(void) {
template<class Impl>
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
{
GaugeField HUmu(_Umu._grid);
GaugeField HUmu(_Umu.Grid());
HUmu = _Umu*(-0.5);
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
pickCheckerboard(Even,UmuEven,Umu);
@ -235,51 +230,43 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
// assert( (disp==1)||(disp==-1) );
// assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in,compressor);
int skip = (disp==1) ? 0 : 1;
int dirdisp = dir+skip*4;
int gamma = dir+(1-skip)*4;
assert(dirdisp<=7);
assert(dirdisp>=0);
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in,compressor);
uint64_t Nsite = Umu.Grid()->oSites();
Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
parallel_for(int ss=0;ss<Umu._grid->oSites();ss++){
for(int s=0;s<Ls;s++){
int sU=ss;
int sF = s+Ls*sU;
Kernels::DhopDir(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
}
}
};
template<class Impl>
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
DerivCalls++;
assert((dag==DaggerNo) ||(dag==DaggerYes));
conformable(st._grid,A._grid);
conformable(st._grid,B._grid);
conformable(st.Grid(),A.Grid());
conformable(st.Grid(),B.Grid());
Compressor compressor(dag);
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
FermionField Btilde(B.Grid());
FermionField Atilde(B.Grid());
DerivCommTime-=usecond();
st.HaloExchange(B,compressor);
DerivCommTime+=usecond();
Atilde=A;
int LLs = B._grid->_rdimensions[0];
int LLs = B.Grid()->_rdimensions[0];
DerivComputeTime-=usecond();
@ -295,21 +282,11 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
////////////////////////
DerivDhopComputeTime -= usecond();
parallel_for (int sss = 0; sss < U._grid->oSites(); sss++) {
for (int s = 0; s < Ls; s++) {
int sU = sss;
int sF = s + Ls * sU;
assert(sF < B._grid->oSites());
assert(sU < U._grid->oSites());
int Usites = U.Grid()->oSites();
Kernels::DhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma);
////////////////////////////
// spin trace outer product
////////////////////////////
}
}
////////////////////////////
// spin trace outer product
////////////////////////////
@ -325,12 +302,13 @@ void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
const FermionField &B,
int dag)
{
conformable(A._grid,FermionGrid());
conformable(A._grid,B._grid);
conformable(A.Grid(),FermionGrid());
conformable(A.Grid(),B.Grid());
//conformable(GaugeGrid(),mat._grid);// this is not general! leaving as a comment
//conformable(GaugeGrid(),mat.Grid());// this is not general! leaving as a comment
mat.checkerboard = A.checkerboard;
mat.Checkerboard() = A.Checkerboard();
// mat.checkerboard = A.checkerboard;
DerivInternal(Stencil,Umu,mat,A,B,dag);
}
@ -341,12 +319,12 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &B,
int dag)
{
conformable(A._grid,FermionRedBlackGrid());
conformable(A._grid,B._grid);
conformable(A.Grid(),FermionRedBlackGrid());
conformable(A.Grid(),B.Grid());
assert(B.checkerboard==Odd);
assert(A.checkerboard==Even);
mat.checkerboard = Even;
assert(B.Checkerboard()==Odd);
assert(A.Checkerboard()==Even);
mat.Checkerboard() = Even;
DerivInternal(StencilOdd,UmuEven,mat,A,B,dag);
}
@ -358,12 +336,12 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &B,
int dag)
{
conformable(A._grid,FermionRedBlackGrid());
conformable(A._grid,B._grid);
conformable(A.Grid(),FermionRedBlackGrid());
conformable(A.Grid(),B.Grid());
assert(B.checkerboard==Even);
assert(A.checkerboard==Odd);
mat.checkerboard = Odd;
assert(B.Checkerboard()==Even);
assert(A.Checkerboard()==Odd);
mat.Checkerboard() = Odd;
DerivInternal(StencilEven,UmuOdd,mat,A,B,dag);
}
@ -374,11 +352,9 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
const FermionField &in, FermionField &out,int dag)
{
DhopTotalTime-=usecond();
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
else
#endif
DhopInternalSerialComms(st,lo,U,in,out,dag);
DhopTotalTime+=usecond();
}
@ -389,131 +365,84 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
#ifdef GRID_OMP
// assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag);
int LLs = in._grid->_rdimensions[0];
int len = U._grid->oSites();
int LLs = in.Grid()->_rdimensions[0];
int len = U.Grid()->oSites();
/////////////////////////////
// Start comms // Gather intranode and extra node differentiated??
/////////////////////////////
DhopFaceTime-=usecond();
st.HaloExchangeOptGather(in,compressor);
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
DhopFaceTime+=usecond();
double ctime=0;
double ptime=0;
DhopCommTime -=usecond();
std::vector<std::vector<CommsRequest_t> > requests;
st.CommunicateBegin(requests);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Ugly explicit thread mapping introduced for OPA reasons.
//////////////////////////////////////////////////////////////////////////////////////////////////////
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
{
int tid = omp_get_thread_num();
int nthreads = omp_get_num_threads();
int ncomms = CartesianCommunicator::nCommThreads;
if (ncomms == -1) ncomms = 1;
assert(nthreads > ncomms);
if (tid >= ncomms) {
double start = usecond();
nthreads -= ncomms;
int ttid = tid - ncomms;
int n = U._grid->oSites();
int chunk = n / nthreads;
int rem = n % nthreads;
int myblock, myn;
if (ttid < rem) {
myblock = ttid * chunk + ttid;
myn = chunk+1;
} else {
myblock = ttid*chunk + rem;
myn = chunk;
}
/////////////////////////////
// Overlap with comms
/////////////////////////////
DhopFaceTime-=usecond();
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
DhopFaceTime+=usecond();
// do the compute
if (dag == DaggerYes) {
for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
}
} else {
for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
}
}
ptime = usecond() - start;
} else {
double start = usecond();
st.CommunicateThreaded();
ctime = usecond() - start;
}
/////////////////////////////
// do the compute interior
/////////////////////////////
int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know
DhopComputeTime-=usecond();
if (dag == DaggerYes) {
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
} else {
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
}
DhopCommTime += ctime;
DhopComputeTime+=ptime;
DhopComputeTime+=usecond();
// First to enter, last to leave timing
st.CollateThreads();
/////////////////////////////
// Complete comms
/////////////////////////////
st.CommunicateComplete(requests);
DhopCommTime +=usecond();
/////////////////////////////
// do the compute exterior
/////////////////////////////
DhopFaceTime-=usecond();
st.CommsMerge(compressor);
DhopFaceTime+=usecond();
DhopComputeTime2-=usecond();
if (dag == DaggerYes) {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
} else {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
}
DhopComputeTime2+=usecond();
#else
assert(0);
#endif
}
template<class Impl>
void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeField & U,
const FermionField &in,
FermionField &out,int dag)
{
// assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag);
int LLs = in._grid->_rdimensions[0];
int LLs = in.Grid()->_rdimensions[0];
DhopCommTime-=usecond();
st.HaloExchangeOpt(in,compressor);
DhopCommTime+=usecond();
DhopComputeTime-=usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
}
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
} else {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
}
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
}
DhopComputeTime+=usecond();
}
@ -523,11 +452,11 @@ template<class Impl>
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
{
DhopCalls++;
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.checkerboard==Even);
out.checkerboard = Odd;
assert(in.Checkerboard()==Even);
out.Checkerboard() = Odd;
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
}
@ -535,11 +464,11 @@ template<class Impl>
void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
{
DhopCalls++;
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.checkerboard==Odd);
out.checkerboard = Even;
assert(in.Checkerboard()==Odd);
out.Checkerboard() = Even;
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
}
@ -547,17 +476,17 @@ template<class Impl>
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
{
DhopCalls+=2;
conformable(in._grid,FermionGrid()); // verifies full grid
conformable(in._grid,out._grid);
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
}
template<class Impl>
void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
{
out.checkerboard=in.checkerboard;
out.Checkerboard()=in.Checkerboard();
Dhop(in,out,dag); // -0.5 is included
axpy(out,4.0-M5,in,out);
}
@ -569,7 +498,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
GridBase *_grid = _FourDimGrid;
GridBase *_5dgrid = _FiveDimGrid;
conformable(_5dgrid,out._grid);
conformable(_5dgrid,out.Grid());
FermionField PRsource(_5dgrid);
FermionField PLsource(_5dgrid);
@ -580,7 +509,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
FermionField bufL_4d(_grid);
FermionField bufR_4d(_grid);
unsigned int Ls = in._grid->_rdimensions[0];
unsigned int Ls = in.Grid()->_rdimensions[0];
typedef typename FermionField::vector_type vector_type;
typedef typename FermionField::scalar_type ScalComplex;
@ -596,12 +525,12 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
Gamma g5(Gamma::Algebra::Gamma5);
std::vector<int> latt_size = _grid->_fdimensions;
Coordinate latt_size = _grid->_fdimensions;
LatComplex sk(_grid); sk = zero;
LatComplex sk2(_grid); sk2= zero;
LatComplex W(_grid); W= zero;
LatComplex a(_grid); a= zero;
LatComplex sk(_grid); sk = Zero();
LatComplex sk2(_grid); sk2= Zero();
LatComplex W(_grid); W= Zero();
LatComplex a(_grid); a= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
LatComplex cosha(_grid);
LatComplex kmu(_grid);
@ -643,9 +572,9 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
// FIXME Need a Lattice acosh
for(int idx=0;idx<_grid->lSites();idx++){
std::vector<int> lcoor(Nd);
Coordinate lcoor(Nd);
Tcomplex cc;
RealD sgn;
// RealD sgn;
_grid->LocalIndexToLocalCoor(idx,lcoor);
peekLocalSite(cc,cosha,lcoor);
assert((double)real(cc)>=1.0);
@ -678,8 +607,8 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
//calculate GR, GL
for(unsigned int ss=1;ss<=Ls;ss++)
{
bufR_4d = zero;
bufL_4d = zero;
bufR_4d = Zero();
bufL_4d = Zero();
for(unsigned int tt=1;tt<=Ls;tt++)
{
//possible sign if W<0
@ -688,7 +617,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
unsigned int f = (ss > tt) ? ss-tt : tt-ss; //f = abs(ss-tt)
//GR
buf1_4d = zero;
buf1_4d = Zero();
ExtractSlice(buf1_4d, PRsource, (tt-1), 0);
//G(s,t)
bufR_4d = bufR_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf1_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf1_4d;
@ -702,7 +631,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
bufR_4d = bufR_4d + Amm * exp(-a*ss) * exp(-a*tt) * signW * buf1_4d ;
//GL
buf2_4d = zero;
buf2_4d = Zero();
ExtractSlice(buf2_4d, PLsource, (tt-1), 0);
//G(s,t)
bufL_4d = bufL_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf2_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf2_4d;
@ -722,13 +651,13 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
//calculate propagator
for(unsigned int ss=1;ss<=Ls;ss++)
{
bufR_4d = zero;
bufL_4d = zero;
bufR_4d = Zero();
bufL_4d = Zero();
//(i*gamma_mu*sin(p_mu) - W)*(GL*P- source)
buf1_4d = zero;
buf1_4d = Zero();
ExtractSlice(buf1_4d, GL, (ss-1), 0);
buf2_4d = zero;
buf2_4d = Zero();
for(int mu=0;mu<Nd;mu++) {
LatticeCoordinate(kmu,mu);
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
@ -738,9 +667,9 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
bufL_4d = buf2_4d - W * buf1_4d;
//(i*gamma_mu*sin(p_mu) - W)*(GR*P+ source)
buf1_4d = zero;
buf1_4d = Zero();
ExtractSlice(buf1_4d, GR, (ss-1), 0);
buf2_4d = zero;
buf2_4d = Zero();
for(int mu=0;mu<Nd;mu++) {
LatticeCoordinate(kmu,mu);
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
@ -781,7 +710,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
{
// what type LatticeComplex
GridBase *_grid = _FourDimGrid;
conformable(_grid,out._grid);
conformable(_grid,out.Grid());
typedef typename FermionField::vector_type vector_type;
typedef typename FermionField::scalar_type ScalComplex;
@ -795,17 +724,17 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
Gamma::Algebra::GammaT
};
std::vector<int> latt_size = _grid->_fdimensions;
Coordinate latt_size = _grid->_fdimensions;
FermionField num (_grid); num = zero;
FermionField num (_grid); num = Zero();
LatComplex sk(_grid); sk = zero;
LatComplex sk2(_grid); sk2= zero;
LatComplex W(_grid); W= zero;
LatComplex a(_grid); a= zero;
LatComplex sk(_grid); sk = Zero();
LatComplex sk2(_grid); sk2= Zero();
LatComplex W(_grid); W= Zero();
LatComplex a(_grid); a= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
LatComplex denom(_grid); denom= zero;
LatComplex denom(_grid); denom= Zero();
LatComplex cosha(_grid);
LatComplex kmu(_grid);
LatComplex Wea(_grid);
@ -838,9 +767,9 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
// FIXME Need a Lattice acosh
for(int idx=0;idx<_grid->lSites();idx++){
std::vector<int> lcoor(Nd);
Coordinate lcoor(Nd);
Tcomplex cc;
RealD sgn;
// RealD sgn;
_grid->LocalIndexToLocalCoor(idx,lcoor);
peekLocalSite(cc,cosha,lcoor);
assert((double)real(cc)>=1.0);
@ -868,7 +797,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
};
GridBase *_grid = _FourDimGrid;
conformable(_grid,out._grid);
conformable(_grid,out.Grid());
typedef typename FermionField::vector_type vector_type;
typedef typename FermionField::scalar_type ScalComplex;
@ -876,18 +805,18 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
typedef Lattice<iSinglet<vector_type> > LatComplex;
std::vector<int> latt_size = _grid->_fdimensions;
Coordinate latt_size = _grid->_fdimensions;
LatComplex sk(_grid); sk = zero;
LatComplex sk2(_grid); sk2= zero;
LatComplex sk(_grid); sk = Zero();
LatComplex sk2(_grid); sk2= Zero();
LatComplex w_k(_grid); w_k= zero;
LatComplex b_k(_grid); b_k= zero;
LatComplex w_k(_grid); w_k= Zero();
LatComplex b_k(_grid); b_k= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
FermionField num (_grid); num = zero;
LatComplex denom(_grid); denom= zero;
FermionField num (_grid); num = Zero();
LatComplex denom(_grid); denom= Zero();
LatComplex kmu(_grid);
ScalComplex ci(0.0,1.0);
@ -928,7 +857,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
// Helper macro to reverse Simd vector. Fixme: slow, generic implementation.
#define REVERSE_LS(qSite, qSiteRev, Nsimd) \
{ \
std::vector<typename SitePropagator::scalar_object> qSiteVec(Nsimd); \
ExtractBuffer<typename SitePropagator::scalar_object> qSiteVec(Nsimd); \
extract(qSite, qSiteVec); \
for (int i = 0; i < Nsimd / 2; ++i) \
{ \
@ -946,31 +875,35 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
template<class vobj>
Lattice<vobj> spProj5p(const Lattice<vobj> & in)
{
GridBase *grid=in._grid;
GridBase *grid=in.Grid();
Gamma G5(Gamma::Algebra::Gamma5);
Lattice<vobj> ret(grid);
parallel_for(int ss=0;ss<grid->oSites();ss++){
ret._odata[ss] = in._odata[ss] + G5*in._odata[ss];
}
auto ret_v = ret.View();
auto in_v = in.View();
thread_for(ss,grid->oSites(),{
ret_v[ss] = in_v[ss] + G5*in_v[ss];
});
return ret;
}
template<class vobj>
Lattice<vobj> spProj5m(const Lattice<vobj> & in)
{
Gamma G5(Gamma::Algebra::Gamma5);
GridBase *grid=in._grid;
GridBase *grid=in.Grid();
Lattice<vobj> ret(grid);
parallel_for(int ss=0;ss<grid->oSites();ss++){
ret._odata[ss] = in._odata[ss] - G5*in._odata[ss];
}
auto ret_v = ret.View();
auto in_v = in.View();
thread_for(ss,grid->oSites(),{
ret_v[ss] = in_v[ss] - G5*in_v[ss];
});
return ret;
}
template <class Impl>
void WilsonFermion5D<Impl>::ContractJ5q(FermionField &q_in,ComplexField &J5q)
{
conformable(GaugeGrid(), J5q._grid);
conformable(q_in._grid, FermionGrid());
conformable(GaugeGrid(), J5q.Grid());
conformable(q_in.Grid(), FermionGrid());
// 4d field
int Ls = this->Ls;
@ -990,8 +923,8 @@ void WilsonFermion5D<Impl>::ContractJ5q(FermionField &q_in,ComplexField &J5q)
template <class Impl>
void WilsonFermion5D<Impl>::ContractJ5q(PropagatorField &q_in,ComplexField &J5q)
{
conformable(GaugeGrid(), J5q._grid);
conformable(q_in._grid, FermionGrid());
conformable(GaugeGrid(), J5q.Grid());
conformable(q_in.Grid(), FermionGrid());
// 4d field
int Ls = this->Ls;
@ -1015,20 +948,26 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
Current curr_type,
unsigned int mu)
{
conformable(q_in_1._grid, FermionGrid());
conformable(q_in_1._grid, q_in_2._grid);
conformable(_FourDimGrid, q_out._grid);
conformable(q_in_1.Grid(), FermionGrid());
conformable(q_in_1.Grid(), q_in_2.Grid());
conformable(_FourDimGrid, q_out.Grid());
PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid());
unsigned int LLs = q_in_1._grid->_rdimensions[0];
q_out = zero;
unsigned int LLs = q_in_1.Grid()->_rdimensions[0];
q_out = Zero();
// Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s),
// q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one.
tmp1 = Cshift(q_in_1, mu + 1, 1);
tmp2 = Cshift(q_in_2, mu + 1, 1);
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
{
auto q_in_1_v = q_in_1.View();
auto q_in_2_v = q_in_2.View();
auto tmp1_v = tmp1.View();
auto tmp2_v = tmp2.View();
auto q_out_v = q_out.View();
auto Umu_v = Umu.View();
thread_for(sU, Umu.Grid()->oSites(),{
unsigned int sF1 = sU * LLs;
unsigned int sF2 = (sU + 1) * LLs - 1;
@ -1042,26 +981,26 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
// sites correctly.
if (Impl::LsVectorised)
{
REVERSE_LS(q_in_2._odata[sF2], qSite2, Ls / LLs);
REVERSE_LS(tmp2._odata[sF2], qmuSite2, Ls / LLs);
REVERSE_LS(q_in_2_v[sF2], qSite2, Ls / LLs);
REVERSE_LS(tmp2_v[sF2], qmuSite2, Ls / LLs);
}
else
{
qSite2 = q_in_2._odata[sF2];
qmuSite2 = tmp2._odata[sF2];
qSite2 = q_in_2_v[sF2];
qmuSite2 = tmp2_v[sF2];
}
Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sF1],
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sF1],
qSite2,
q_out._odata[sU],
Umu, sU, mu, axial_sign);
Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sF1],
q_out_v[sU],
Umu_v, sU, mu, axial_sign);
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sF1],
qmuSite2,
q_out._odata[sU],
Umu, sU, mu, axial_sign);
q_out_v[sU],
Umu_v, sU, mu, axial_sign);
sF1++;
sF2--;
}
}
});
}
@ -1074,18 +1013,21 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int tmax,
ComplexField &lattice_cmplx)
{
conformable(q_in._grid, FermionGrid());
conformable(q_in._grid, q_out._grid);
conformable(q_in.Grid(), FermionGrid());
conformable(q_in.Grid(), q_out.Grid());
PropagatorField tmp(GaugeGrid()),tmp2(GaugeGrid());
unsigned int tshift = (mu == Tp) ? 1 : 0;
unsigned int LLs = q_in._grid->_rdimensions[0];
unsigned int LLs = q_in.Grid()->_rdimensions[0];
unsigned int LLt = GridDefaultLatt()[Tp];
q_out = zero;
q_out = Zero();
LatticeInteger coords(_FourDimGrid);
LatticeCoordinate(coords, Tp);
auto q_out_v = q_out.View();
auto tmp2_v = tmp2.View();
auto coords_v= coords.View();
auto Umu_v = Umu.View();
for (unsigned int s = 0; s < LLs; ++s)
{
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
@ -1098,59 +1040,51 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
tmp = Cshift(tmp2, mu, 1); //q(x+mu,s)
tmp2 = tmp*lattice_cmplx; //q(x+mu,s)*A(x)
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
{
thread_for(sU, Umu.Grid()->oSites(),{
// Compute the sequential conserved current insertion only if our simd
// object contains a timeslice we need.
vInteger t_mask = ((coords._odata[sU] >= tmin) &&
(coords._odata[sU] <= tmax));
Integer timeSlices = Reduce(t_mask);
vPredicate t_mask;
t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax));
Integer timeSlices = Reduce(t_mask());
if (timeSlices > 0)
{
unsigned int sF = sU * LLs + s;
Kernels::SeqConservedCurrentSiteFwd(tmp2._odata[sU],
q_out._odata[sF], Umu, sU,
mu, t_mask, switch_sgn);
Kernels::SeqConservedCurrentSiteFwd(tmp2_v[sU],
q_out_v[sF], Umu_v, sU,
mu, t_mask, switch_sgn);
}
}
});
//backward direction: Need q(x - mu, s)*A(x-mu)
ExtractSlice(tmp2, q_in, s, 0); //q(x,s)
tmp = lattice_cmplx*tmp2; //q(x,s)*A(x)
tmp2 = Cshift(tmp, mu, -1); //q(x-mu,s)*A(x-mu,s)
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
thread_for(sU, Umu.Grid()->oSites(),
{
vInteger t_mask = ((coords._odata[sU] >= (tmin + tshift)) &&
(coords._odata[sU] <= (tmax + tshift)));
vPredicate t_mask;
t_mask()= ((coords_v[sU] >= (tmin + tshift)) && (coords_v[sU] <= (tmax + tshift)));
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords._odata[sU] == t0 ));
Integer timeSlices = Reduce(t_mask);
if (timeSlices > 0)
{
unsigned int sF = sU * LLs + s;
Kernels::SeqConservedCurrentSiteBwd(tmp2._odata[sU],
q_out._odata[sF], Umu, sU,
mu, t_mask, axial_sign);
}
}
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 ));
Integer timeSlices = Reduce(t_mask());
if (timeSlices > 0) {
unsigned int sF = sU * LLs + s;
Kernels::SeqConservedCurrentSiteBwd(tmp2_v[sU],
q_out_v[sF], Umu_v, sU,
mu, t_mask, axial_sign);
}
});
}
}
FermOpTemplateInstantiate(WilsonFermion5D);
GparityFermOpTemplateInstantiate(WilsonFermion5D);
}}
NAMESPACE_END(Grid);

View File

@ -1,4 +1,3 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -29,16 +28,11 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonFermion.h>
namespace Grid {
namespace QCD {
const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
int WilsonFermionStatic::HandOptDslash;
NAMESPACE_BEGIN(Grid);
/////////////////////////////////
// Constructor and gauge import
@ -49,18 +43,19 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid, RealD _mass,
const ImplParams &p,
const WilsonAnisotropyCoefficients &anis)
: Kernels(p),
_grid(&Fgrid),
_cbgrid(&Hgrid),
Stencil(&Fgrid, npoint, Even, directions, displacements),
StencilEven(&Hgrid, npoint, Even, directions,displacements), // source is Even
StencilOdd(&Hgrid, npoint, Odd, directions,displacements), // source is Odd
mass(_mass),
Lebesgue(_grid),
LebesgueEvenOdd(_cbgrid),
Umu(&Fgrid),
UmuEven(&Hgrid),
UmuOdd(&Hgrid),
:
Kernels(p),
_grid(&Fgrid),
_cbgrid(&Hgrid),
Stencil(&Fgrid, npoint, Even, directions, displacements,p),
StencilEven(&Hgrid, npoint, Even, directions,displacements,p), // source is Even
StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p), // source is Odd
mass(_mass),
Lebesgue(_grid),
LebesgueEvenOdd(_cbgrid),
Umu(&Fgrid),
UmuEven(&Hgrid),
UmuOdd(&Hgrid),
_tmp(&Hgrid),
anisotropyCoeff(anis)
{
@ -76,8 +71,9 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
}
template <class Impl>
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) {
GaugeField HUmu(_Umu._grid);
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
{
GaugeField HUmu(_Umu.Grid());
//Here multiply the anisotropy coefficients
if (anisotropyCoeff.isAnisotropic)
@ -107,21 +103,21 @@ void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) {
template <class Impl>
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerNo);
return axpy_norm(out, diag_mass, in, out);
}
template <class Impl>
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerYes);
return axpy_norm(out, diag_mass, in, out);
}
template <class Impl>
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
@ -130,7 +126,7 @@ void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
template <class Impl>
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
@ -139,26 +135,26 @@ void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
template <class Impl>
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
typename FermionField::scalar_type scal(diag_mass);
out = scal * in;
}
template <class Impl>
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Mooee(in, out);
}
template<class Impl>
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
out = (1.0/(diag_mass))*in;
}
template<class Impl>
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
MooeeInv(in,out);
}
template<class Impl>
@ -169,7 +165,7 @@ void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const Fermi
typedef Lattice<iSinglet<vector_type> > LatComplex;
// what type LatticeComplex
conformable(_grid,out._grid);
conformable(_grid,out.Grid());
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
@ -178,13 +174,13 @@ void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const Fermi
Gamma::Algebra::GammaT
};
std::vector<int> latt_size = _grid->_fdimensions;
Coordinate latt_size = _grid->_fdimensions;
FermionField num (_grid); num = zero;
LatComplex wilson(_grid); wilson= zero;
FermionField num (_grid); num = Zero();
LatComplex wilson(_grid); wilson= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
LatComplex denom(_grid); denom= zero;
LatComplex denom(_grid); denom= Zero();
LatComplex kmu(_grid);
ScalComplex ci(0.0,1.0);
// momphase = n * 2pi / L
@ -229,9 +225,9 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
Compressor compressor(dag);
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
Atilde = A;//redundant
FermionField Btilde(B.Grid());
FermionField Atilde(B.Grid());
Atilde = A;
st.HaloExchange(B, compressor);
@ -242,12 +238,8 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
int gamma = mu;
if (!dag) gamma += Nd;
////////////////////////
// Call the single hop
////////////////////////
parallel_for (int sss = 0; sss < B._grid->oSites(); sss++) {
Kernels::DhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, gamma);
}
int Ls=1;
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
//////////////////////////////////////////////////
// spin trace outer product
@ -258,70 +250,70 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
template <class Impl>
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _grid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
conformable(U.Grid(), _grid);
conformable(U.Grid(), V.Grid());
conformable(U.Grid(), mat.Grid());
mat.checkerboard = U.checkerboard;
mat.Checkerboard() = U.Checkerboard();
DerivInternal(Stencil, Umu, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
//conformable(U._grid, mat._grid); not general, leaving as a comment (Guido)
conformable(U.Grid(), _cbgrid);
conformable(U.Grid(), V.Grid());
//conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido)
// Motivation: look at the SchurDiff operator
assert(V.checkerboard == Even);
assert(U.checkerboard == Odd);
mat.checkerboard = Odd;
assert(V.Checkerboard() == Even);
assert(U.Checkerboard() == Odd);
mat.Checkerboard() = Odd;
DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
//conformable(U._grid, mat._grid);
conformable(U.Grid(), _cbgrid);
conformable(U.Grid(), V.Grid());
//conformable(U.Grid(), mat.Grid());
assert(V.checkerboard == Odd);
assert(U.checkerboard == Even);
mat.checkerboard = Even;
assert(V.Checkerboard() == Odd);
assert(U.Checkerboard() == Even);
mat.Checkerboard() = Even;
DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
conformable(in._grid, _grid); // verifies full grid
conformable(in._grid, out._grid);
conformable(in.Grid(), _grid); // verifies full grid
conformable(in.Grid(), out.Grid());
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check
assert(in.checkerboard == Even);
out.checkerboard = Odd;
assert(in.Checkerboard() == Even);
out.Checkerboard() = Odd;
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) {
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check
assert(in.checkerboard == Odd);
out.checkerboard = Even;
assert(in.Checkerboard() == Odd);
out.Checkerboard() = Even;
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
}
@ -332,7 +324,8 @@ void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int di
}
template <class Impl>
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) {
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
{
int skip = (disp == 1) ? 0 : 1;
int dirdisp = dir + skip * 4;
int gamma = dir + (1 - skip) * 4;
@ -341,16 +334,16 @@ void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int
};
template <class Impl>
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag) {
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
{
Compressor compressor(dag);
Stencil.HaloExchange(in, compressor);
int Ls=1;
int Nsite=in.oSites();
Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
};
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma);
}
}
/*Change starts*/
template <class Impl>
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
@ -367,71 +360,51 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
template <class Impl>
void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out, int dag) {
DoubledGaugeField &U,
const FermionField &in,
FermionField &out, int dag) {
assert((dag == DaggerNo) || (dag == DaggerYes));
#ifdef GRID_OMP
Compressor compressor;
int len = U._grid->oSites();
const int LLs = 1;
st.Prepare();
st.HaloGather(in,compressor);
st.CommsMergeSHM(compressor);
#pragma omp parallel
{
int tid = omp_get_thread_num();
int nthreads = omp_get_num_threads();
int ncomms = CartesianCommunicator::nCommThreads;
if (ncomms == -1) ncomms = 1;
assert(nthreads > ncomms);
if (tid >= ncomms) {
nthreads -= ncomms;
int ttid = tid - ncomms;
int n = len;
int chunk = n / nthreads;
int rem = n % nthreads;
int myblock, myn;
if (ttid < rem) {
myblock = ttid * chunk + ttid;
myn = chunk+1;
} else {
myblock = ttid*chunk + rem;
myn = chunk;
}
// do the compute
if (dag == DaggerYes) {
for (int sss = myblock; sss < myblock+myn; ++sss) {
Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
} else {
for (int sss = myblock; sss < myblock+myn; ++sss) {
Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
} //else
} else {
st.CommunicateThreaded();
}
Compressor compressor(dag);
int len = U.Grid()->oSites();
/////////////////////////////
// Start comms // Gather intranode and extra node differentiated??
/////////////////////////////
std::vector<std::vector<CommsRequest_t> > requests;
st.Prepare();
st.HaloGather(in,compressor);
st.CommunicateBegin(requests);
/////////////////////////////
// Overlap with comms
/////////////////////////////
st.CommsMergeSHM(compressor);
/////////////////////////////
// do the compute interior
/////////////////////////////
int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
} else {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
}
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
}
} //pragma
#else
assert(0);
#endif
/////////////////////////////
// Complete comms
/////////////////////////////
st.CommunicateComplete(requests);
st.CommsMerge(compressor);
/////////////////////////////
// do the compute exterior
/////////////////////////////
if (dag == DaggerYes) {
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
} else {
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
}
};
@ -444,14 +417,11 @@ void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
Compressor compressor(dag);
st.HaloExchange(in, compressor);
int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
} else {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
}
};
/*Change ends */
@ -468,28 +438,33 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
Current curr_type,
unsigned int mu)
{
Gamma g5(Gamma::Algebra::Gamma5);
conformable(_grid, q_in_1._grid);
conformable(_grid, q_in_2._grid);
conformable(_grid, q_out._grid);
PropagatorField tmp1(_grid), tmp2(_grid);
q_out = zero;
Gamma g5(Gamma::Algebra::Gamma5);
conformable(_grid, q_in_1.Grid());
conformable(_grid, q_in_2.Grid());
conformable(_grid, q_out.Grid());
PropagatorField tmp1(_grid), tmp2(_grid);
q_out = Zero();
// Forward, need q1(x + mu), q2(x). Backward, need q1(x), q2(x + mu).
// Inefficient comms method but not performance critical.
tmp1 = Cshift(q_in_1, mu, 1);
tmp2 = Cshift(q_in_2, mu, 1);
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
{
Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sU],
q_in_2._odata[sU],
q_out._odata[sU],
Umu, sU, mu);
Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sU],
tmp2._odata[sU],
q_out._odata[sU],
Umu, sU, mu);
}
// Forward, need q1(x + mu), q2(x). Backward, need q1(x), q2(x + mu).
// Inefficient comms method but not performance critical.
tmp1 = Cshift(q_in_1, mu, 1);
tmp2 = Cshift(q_in_2, mu, 1);
auto tmp1_v = tmp1.View();
auto tmp2_v = tmp2.View();
auto q_in_1_v=q_in_1.View();
auto q_in_2_v=q_in_2.View();
auto q_out_v = q_out.View();
auto Umu_v = Umu.View();
thread_for(sU, Umu.Grid()->oSites(),{
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU],
q_in_2_v[sU],
q_out_v[sU],
Umu_v, sU, mu);
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sU],
tmp2_v[sU],
q_out_v[sU],
Umu_v, sU, mu);
});
}
@ -502,61 +477,61 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int tmax,
ComplexField &lattice_cmplx)
{
conformable(_grid, q_in._grid);
conformable(_grid, q_out._grid);
PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid);
unsigned int tshift = (mu == Tp) ? 1 : 0;
unsigned int LLt = GridDefaultLatt()[Tp];
conformable(_grid, q_in.Grid());
conformable(_grid, q_out.Grid());
q_out = zero;
LatticeInteger coords(_grid);
LatticeCoordinate(coords, Tp);
// Lattice<iSinglet<Simd>> ph(_grid), coor(_grid);
Complex i(0.0,1.0);
PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid);
unsigned int tshift = (mu == Tp) ? 1 : 0;
unsigned int LLt = GridDefaultLatt()[Tp];
// Need q(x + mu) and q(x - mu).
tmp = Cshift(q_in, mu, 1);
tmpFwd = tmp*lattice_cmplx;
tmp = lattice_cmplx*q_in;
tmpBwd = Cshift(tmp, mu, -1);
q_out = Zero();
LatticeInteger coords(_grid);
LatticeCoordinate(coords, Tp);
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
{
// Compute the sequential conserved current insertion only if our simd
// object contains a timeslice we need.
vInteger t_mask = ((coords._odata[sU] >= tmin) &&
(coords._odata[sU] <= tmax));
Integer timeSlices = Reduce(t_mask);
// Need q(x + mu) and q(x - mu).
tmp = Cshift(q_in, mu, 1);
tmpFwd = tmp*lattice_cmplx;
tmp = lattice_cmplx*q_in;
tmpBwd = Cshift(tmp, mu, -1);
if (timeSlices > 0)
{
Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sU],
q_out._odata[sU],
Umu, sU, mu, t_mask);
}
auto coords_v = coords.View();
auto tmpFwd_v = tmpFwd.View();
auto tmpBwd_v = tmpBwd.View();
auto Umu_v = Umu.View();
auto q_out_v = q_out.View();
// Repeat for backward direction.
t_mask = ((coords._odata[sU] >= (tmin + tshift)) &&
(coords._odata[sU] <= (tmax + tshift)));
thread_for(sU, Umu.Grid()->oSites(), {
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords._odata[sU] == t0 ));
// Compute the sequential conserved current insertion only if our simd
// object contains a timeslice we need.
vPredicate t_mask;
t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax));
Integer timeSlices = Reduce(t_mask());
timeSlices = Reduce(t_mask);
if (timeSlices > 0)
{
Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sU],
q_out._odata[sU],
Umu, sU, mu, t_mask);
}
if (timeSlices > 0) {
Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU],
q_out_v[sU],
Umu_v, sU, mu, t_mask);
}
// Repeat for backward direction.
t_mask() = ((coords_v[sU] >= (tmin + tshift)) &&
(coords_v[sU] <= (tmax + tshift)));
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 ));
timeSlices = Reduce(t_mask());
if (timeSlices > 0) {
Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU],
q_out_v[sU],
Umu_v, sU, mu, t_mask);
}
});
}
FermOpTemplateInstantiate(WilsonFermion);
AdjointFermOpTemplateInstantiate(WilsonFermion);
TwoIndexFermOpTemplateInstantiate(WilsonFermion);
GparityFermOpTemplateInstantiate(WilsonFermion);
}
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,716 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#if defined(AVX512)
///////////////////////////////////////////////////////////
// If we are AVX512 specialise the single precision routine
///////////////////////////////////////////////////////////
#include <simd/Intel512wilson.h>
#include <simd/Intel512single.h>
/// Switch off the 5d vectorised code optimisations
#undef DWFVEC5D
static Vector<vComplexF> signsF;
template<typename vtype>
int setupSigns(Vector<vtype>& signs ){
Vector<vtype> bother(2);
signs = bother;
vrsign(signs[0]);
visign(signs[1]);
return 1;
}
static int signInitF = setupSigns(signsF);
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define COMPLEX_SIGNS(isigns) vComplexF *isigns = &signsF[0];
/////////////////////////////////////////////////////////////////
// XYZT vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
#define MAYBEPERM(A,B)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
/////////////////////////////////////////////////////////////////
// Ls vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#ifdef DWFVEC5D
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#undef MULT_2SPIN
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf)
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif // VEC 5D
#undef COMPLEX_SIGNS
#undef MAYBEPERM
#undef MULT_2SPIN
///////////////////////////////////////////////////////////
// If we are AVX512 specialise the double precision routine
///////////////////////////////////////////////////////////
#include <simd/Intel512double.h>
static Vector<vComplexD> signsD;
static int signInitD = setupSigns(signsD);
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define COMPLEX_SIGNS(isigns) vComplexD *isigns = &signsD[0];
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
/////////////////////////////////////////////////////////////////
// XYZT vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
#define MAYBEPERM(A,B)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
/////////////////////////////////////////////////////////////////
// Ls vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#ifdef DWFVEC5D
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#undef MULT_2SPIN
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf)
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif // VEC 5D
#undef COMPLEX_SIGNS
#undef MAYBEPERM
#undef MULT_2SPIN
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef UChi_00
#undef UChi_01
#undef UChi_02
#undef UChi_10
#undef UChi_11
#undef UChi_12
#undef UChi_20
#undef UChi_21
#undef UChi_22
#undef UChi_30
#undef UChi_31
#undef UChi_32
#undef Psi_00
#undef Psi_01
#undef Psi_02
#undef Psi_10
#undef Psi_11
#undef Psi_12
#undef Psi_20
#undef Psi_21
#undef Psi_22
#undef Psi_30
#undef Psi_31
#undef Psi_32
#undef Phi_00
#undef Phi_01
#undef Phi_02
#undef Phi_10
#undef Phi_11
#undef Phi_12
#undef Phi_20
#undef Phi_21
#undef Phi_22
#undef Phi_30
#undef Phi_31
#undef Phi_32
#endif //AVX512

View File

@ -130,16 +130,18 @@
int local,perm, ptype;
uint64_t base;
uint64_t basep;
const uint64_t plocal =(uint64_t) & in._odata[0];
const uint64_t plocal =(uint64_t) & in[0];
COMPLEX_SIGNS(isigns);
MASK_REGS;
int nmax=U._grid->oSites();
int nmax=U.oSites();
for(int site=0;site<Ns;site++) {
#ifndef EXTERIOR
int sU =lo.Reorder(ssU);
// int sU =lo.Reorder(ssU);
int sU =ssU;
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
int sUn=lo.Reorder(ssn);
// int sUn=lo.Reorder(ssn);
int sUn=ssn;
LOCK_GAUGE(0);
#else
int sU =ssU;
@ -166,7 +168,7 @@
if (nmu==0) break;
// if (nmu!=0) std::cout << "EXT "<<sU<<std::endl;
#endif
base = (uint64_t) &out._odata[ss];
base = (uint64_t) &out[ss];
basep= st.GetPFInfo(nent,plocal); nent++;
RESULT(base,basep);
}

View File

@ -0,0 +1,86 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsm.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////////////////////////
// Default to no assembler implementation
// Will specialise to
///////////////////////////////////////////////////////////
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
NAMESPACE_END(Grid);

View File

@ -28,7 +28,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#if defined(QPX)
@ -52,18 +52,18 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
WilsonKernels<WilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
@ -75,18 +75,18 @@ WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,Do
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
@ -104,9 +104,9 @@ WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrde
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
WilsonKernels<WilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
@ -115,9 +115,9 @@ WilsonKernels<WilsonImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,Doubl
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
#undef MAYBEPERM
@ -129,9 +129,9 @@ WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,Do
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
@ -139,9 +139,9 @@ WilsonKernels<DomainWallVec5dImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder &
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
#undef MAYBEPERM

View File

@ -26,6 +26,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
#define REGISTER
@ -45,7 +48,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
Chimu_32=ref(F)(3)(2)
#define LOAD_CHIMU(DIR,F,PERM) \
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
{ const SiteSpinor & ref (in[offset]); LOAD_CHIMU_BODY(F); }
#define LOAD_CHI_BODY(F) \
Chi_00 = ref(F)(0)(0);\
@ -92,9 +95,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
g = F; \
direction = st._directions[DIR]; \
distance = st._distances[DIR]; \
sl = st._grid->_simd_layout[direction]; \
sl = st._simd_layout[direction]; \
inplace_twist = 0; \
if(SE->_around_the_world && this->Params.twists[DIR % 4]){ \
if(SE->_around_the_world && st.parameters.twists[DIR % 4]){ \
if(sl == 1){ \
g = (F+1) % 2; \
}else{ \
@ -103,7 +106,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
}
#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
{ const SiteSpinor &ref(in._odata[offset]); \
{ const SiteSpinor &ref(in[offset]); \
LOAD_CHI_SETUP(DIR,F); \
if(!inplace_twist){ \
LOAD_CHIMU_BODY(g); \
@ -201,10 +204,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define MULT_2SPIN(A,F) \
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
{auto & ref(U[sU](A)); MULT_2SPIN_BODY; }
#define MULT_2SPIN_GPARITY(A,F) \
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
{auto & ref(U[sU](F)(A)); MULT_2SPIN_BODY; }
#define PERMUTE_DIR(dir) \
@ -468,8 +471,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
perm = SE->_permute; \
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
LOAD_CHI_IMPL(DIR,F,PERM); \
MULT_2SPIN_IMPL(DIR,F); \
@ -479,7 +481,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define HAND_RESULT(ss,F) \
{ \
SiteSpinor & ref (out._odata[ss]); \
SiteSpinor & ref (out[ss]); \
vstream(ref(F)(0)(0),result_00); \
vstream(ref(F)(0)(1),result_01); \
vstream(ref(F)(0)(2),result_02); \
@ -496,7 +498,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define HAND_RESULT_EXT(ss,F) \
if (nmu){ \
SiteSpinor & ref (out._odata[ss]); \
SiteSpinor & ref (out[ss]); \
ref(F)(0)(0)+=result_00; \
ref(F)(0)(1)+=result_01; \
ref(F)(0)(2)+=result_02; \
@ -545,18 +547,18 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
Simd U_21;
#define ZERO_RESULT \
result_00=zero; \
result_01=zero; \
result_02=zero; \
result_10=zero; \
result_11=zero; \
result_12=zero; \
result_20=zero; \
result_21=zero; \
result_22=zero; \
result_30=zero; \
result_31=zero; \
result_32=zero;
result_00=Zero(); \
result_01=Zero(); \
result_02=Zero(); \
result_10=Zero(); \
result_11=Zero(); \
result_12=Zero(); \
result_20=Zero(); \
result_21=Zero(); \
result_22=Zero(); \
result_30=Zero(); \
result_31=Zero(); \
result_32=Zero();
#define Chimu_00 Chi_00
#define Chimu_01 Chi_01
@ -571,21 +573,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define Chimu_31 UChi_11
#define Chimu_32 UChi_12
namespace Grid {
namespace QCD {
template<class Impl> void
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
NAMESPACE_BEGIN(Grid);
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
@ -598,21 +586,6 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
@ -624,22 +597,6 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
@ -652,21 +609,6 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
@ -678,23 +620,6 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
int nmu=0;
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
@ -708,22 +633,6 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT_EXT(ss,F)
HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
int nmu=0;
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
@ -736,13 +645,10 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT_EXT(ss,F)
HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
#define HAND_SPECIALISE_GPARITY(IMPL) \
template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -756,9 +662,9 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -772,9 +678,9 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -788,9 +694,9 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -805,8 +711,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
} \
\
template<> void \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -814,16 +720,16 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
\
HAND_DECLARATIONS(ignore); \
\
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
int offset,perm, ptype, g, direction, distance, sl, inplace_twist; \
StencilEntry *SE; \
int nmu=0; \
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
nmu = 0; \
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -832,47 +738,11 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DECLARATIONS(ignore); \
\
StencilEntry *SE; \
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
int offset,perm, ptype, g, direction, distance, sl, inplace_twist; \
int nmu=0; \
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
nmu = 0; \
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
}
HAND_SPECIALISE_GPARITY(GparityWilsonImplF);
HAND_SPECIALISE_GPARITY(GparityWilsonImplD);
HAND_SPECIALISE_GPARITY(GparityWilsonImplFH);
HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
////////////// Wilson ; uses this implementation /////////////////////
#define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(GparityWilsonImplF);
INSTANTIATE_THEM(GparityWilsonImplD);
INSTANTIATE_THEM(GparityWilsonImplFH);
INSTANTIATE_THEM(GparityWilsonImplDF);
}}
NAMESPACE_END(Grid);

View File

@ -26,12 +26,58 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
#undef LOAD_CHIMU
#undef LOAD_CHI
#undef MULT_2SPIN
#undef PERMUTE_DIR
#undef XP_PROJ
#undef YP_PROJ
#undef ZP_PROJ
#undef TP_PROJ
#undef XM_PROJ
#undef YM_PROJ
#undef ZM_PROJ
#undef TM_PROJ
#undef XP_RECON
#undef XP_RECON_ACCUM
#undef XM_RECON
#undef XM_RECON_ACCUM
#undef YP_RECON_ACCUM
#undef YM_RECON_ACCUM
#undef ZP_RECON_ACCUM
#undef ZM_RECON_ACCUM
#undef TP_RECON_ACCUM
#undef TM_RECON_ACCUM
#undef ZERO_RESULT
#undef Chimu_00
#undef Chimu_01
#undef Chimu_02
#undef Chimu_10
#undef Chimu_11
#undef Chimu_12
#undef Chimu_20
#undef Chimu_21
#undef Chimu_22
#undef Chimu_30
#undef Chimu_31
#undef Chimu_32
#undef HAND_STENCIL_LEG
#undef HAND_STENCIL_LEG_INT
#undef HAND_STENCIL_LEG_EXT
#undef HAND_RESULT
#undef HAND_RESULT_INT
#undef HAND_RESULT_EXT
#define REGISTER
#define LOAD_CHIMU \
{const SiteSpinor & ref (in._odata[offset]); \
{const SiteSpinor & ref (in[offset]); \
Chimu_00=ref()(0)(0);\
Chimu_01=ref()(0)(1);\
Chimu_02=ref()(0)(2);\
@ -56,7 +102,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
// To splat or not to splat depends on the implementation
#define MULT_2SPIN(A)\
{auto & ref(U._odata[sU](A)); \
{auto & ref(U[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
@ -355,7 +401,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define HAND_RESULT(ss) \
{ \
SiteSpinor & ref (out._odata[ss]); \
SiteSpinor & ref (out[ss]); \
vstream(ref()(0)(0),result_00); \
vstream(ref()(0)(1),result_01); \
vstream(ref()(0)(2),result_02); \
@ -372,7 +418,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define HAND_RESULT_EXT(ss) \
if (nmu){ \
SiteSpinor & ref (out._odata[ss]); \
SiteSpinor & ref (out[ss]); \
ref()(0)(0)+=result_00; \
ref()(0)(1)+=result_01; \
ref()(0)(2)+=result_02; \
@ -421,18 +467,18 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
Simd U_21;
#define ZERO_RESULT \
result_00=zero; \
result_01=zero; \
result_02=zero; \
result_10=zero; \
result_11=zero; \
result_12=zero; \
result_20=zero; \
result_21=zero; \
result_22=zero; \
result_30=zero; \
result_31=zero; \
result_32=zero;
result_00=Zero(); \
result_01=Zero(); \
result_02=Zero(); \
result_10=Zero(); \
result_11=Zero(); \
result_12=Zero(); \
result_20=Zero(); \
result_21=Zero(); \
result_22=Zero(); \
result_30=Zero(); \
result_31=Zero(); \
result_32=Zero();
#define Chimu_00 Chi_00
#define Chimu_01 Chi_01
@ -447,12 +493,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define Chimu_31 UChi_11
#define Chimu_32 UChi_12
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Impl> void
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
@ -475,8 +520,8 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -498,8 +543,8 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
@ -522,8 +567,8 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -545,8 +590,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
@ -554,7 +599,7 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
int offset, ptype;
StencilEntry *SE;
int nmu=0;
ZERO_RESULT;
@ -570,8 +615,8 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -579,7 +624,7 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
int offset, ptype;
int nmu=0;
ZERO_RESULT;
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
@ -595,37 +640,45 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
////////////// Wilson ; uses this implementation /////////////////////
#define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(WilsonImplF);
INSTANTIATE_THEM(WilsonImplD);
INSTANTIATE_THEM(ZWilsonImplF);
INSTANTIATE_THEM(ZWilsonImplD);
INSTANTIATE_THEM(DomainWallVec5dImplF);
INSTANTIATE_THEM(DomainWallVec5dImplD);
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
INSTANTIATE_THEM(WilsonImplFH);
INSTANTIATE_THEM(WilsonImplDF);
INSTANTIATE_THEM(ZWilsonImplFH);
INSTANTIATE_THEM(ZWilsonImplDF);
INSTANTIATE_THEM(DomainWallVec5dImplFH);
INSTANTIATE_THEM(DomainWallVec5dImplDF);
INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplF);
INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplD);
}}
NAMESPACE_END(Grid);
#undef LOAD_CHIMU
#undef LOAD_CHI
#undef MULT_2SPIN
#undef PERMUTE_DIR
#undef XP_PROJ
#undef YP_PROJ
#undef ZP_PROJ
#undef TP_PROJ
#undef XM_PROJ
#undef YM_PROJ
#undef ZM_PROJ
#undef TM_PROJ
#undef XP_RECON
#undef XP_RECON_ACCUM
#undef XM_RECON
#undef XM_RECON_ACCUM
#undef YP_RECON_ACCUM
#undef YM_RECON_ACCUM
#undef ZP_RECON_ACCUM
#undef ZM_RECON_ACCUM
#undef TP_RECON_ACCUM
#undef TM_RECON_ACCUM
#undef ZERO_RESULT
#undef Chimu_00
#undef Chimu_01
#undef Chimu_02
#undef Chimu_10
#undef Chimu_11
#undef Chimu_12
#undef Chimu_20
#undef Chimu_21
#undef Chimu_22
#undef Chimu_30
#undef Chimu_31
#undef Chimu_32
#undef HAND_STENCIL_LEG
#undef HAND_STENCIL_LEG_INT
#undef HAND_STENCIL_LEG_EXT
#undef HAND_RESULT
#undef HAND_RESULT_INT
#undef HAND_RESULT_EXT

View File

@ -0,0 +1,551 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////
// Generic implementation; move to different file?
////////////////////////////////////////////
accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
{
#ifdef __CUDA_ARCH__
static_assert(sizeof(StencilEntry)==sizeof(uint4),"Unexpected Stencil Entry Size");
uint4 * mem_pun = (uint4 *)mem; // force 128 bit loads
uint4 * chip_pun = (uint4 *)&chip;
* chip_pun = * mem_pun;
#else
chip = *mem;
#endif
return;
}
#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
} else { \
chi = coalescedRead(buf[SE->_offset],lane); \
} \
synchronise(); \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi);
#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
} else if ( st.same_node[Dir] ) { \
chi = coalescedRead(buf[SE->_offset],lane); \
} \
synchronise(); \
if (SE->_is_local || st.same_node[Dir] ) { \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi); \
} \
synchronise();
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
auto chi = coalescedRead(buf[SE->_offset],lane); \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi); \
nmu++; \
} \
synchronise();
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
if (gamma == Dir) { \
if (SE->_is_local ) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
} else { \
chi = coalescedRead(buf[SE->_offset],lane); \
} \
synchronise(); \
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
Recon(result, Uchi); \
synchronise(); \
}
////////////////////////////////////////////////////////////////////
// All legs kernels ; comms then compute
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcHalfSpinor chi;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp);
GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm);
coalescedWrite(out[sF],result,lane);
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcHalfSpinor chi;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp);
GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm);
coalescedWrite(out[sF], result,lane);
};
////////////////////////////////////////////////////////////////////
// Interior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcHalfSpinor chi;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_INT(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_INT(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm);
coalescedWrite(out[sF], result,lane);
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
calcHalfSpinor chi;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
result=Zero();
GENERIC_STENCIL_LEG_INT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_INT(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_INT(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm);
coalescedWrite(out[sF], result,lane);
};
////////////////////////////////////////////////////////////////////
// Exterior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_EXT(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_EXT(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_EXT(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm);
if ( nmu ) {
auto out_t = coalescedRead(out[sF],lane);
out_t = out_t + result;
coalescedWrite(out[sF],out_t,lane);
}
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_EXT(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_EXT(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_EXT(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm);
if ( nmu ) {
auto out_t = coalescedRead(out[sF],lane);
out_t = out_t + result;
coalescedWrite(out[sF],out_t,lane);
}
};
template <class Impl>
void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcHalfSpinor chi;
calcSpinor result;
calcHalfSpinor Uchi;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
SE = st.GetEntry(ptype, dir, sF);
if (gamma == Xp) {
if (SE->_is_local ) {
int perm= SE->_permute;
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);
spProjXp(chi,tmp);
} else {
chi = coalescedRead(buf[SE->_offset],lane);
}
Impl::multLink(Uchi, U[sU], chi, dir, SE, st);
spReconXp(result, Uchi);
}
GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp);
GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp);
GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp);
GENERIC_DHOPDIR_LEG(Xm,spProjXm,spReconXm);
GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm);
GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm);
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
coalescedWrite(out[sF], result,lane);
}
template <class Impl>
void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
{
assert(dirdisp<=7);
assert(dirdisp>=0);
auto U_v = U.View();
auto in_v = in.View();
auto out_v = out.View();
auto st_v = st.View();
accelerator_for(ss,Nsite,Simd::Nsimd(),{
for(int s=0;s<Ls;s++){
int sU=ss;
int sF = s+Ls*sU;
DhopDirK(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma);
}
});
}
#define KERNEL_CALLNB(A) \
const uint64_t NN = Nsite*Ls; \
accelerator_forNB( ss, NN, Simd::Nsimd(), { \
int sF = ss; \
int sU = ss/Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
});
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
#define ASM_CALL(A) \
thread_for( ss, Nsite, { \
int sU = ss; \
int sF = ss*Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \
});
template <class Impl>
void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
{
auto U_v = U.View();
auto in_v = in.View();
auto out_v = out.View();
auto st_v = st.View();
if( interior && exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;}
#endif
} else if( interior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;}
#endif
} else if( exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;}
#endif
}
assert(0 && " Kernel optimisation case not covered ");
}
template <class Impl>
void WilsonKernels<Impl>::DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
{
auto U_v = U.View();
auto in_v = in.View();
auto out_v = out.View();
auto st_v = st.View();
if( interior && exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDag); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDag); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;}
#endif
} else if( interior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
#endif
} else if( exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
#endif
}
assert(0 && " Kernel optimisation case not covered ");
}
/*******************************************************************************
* Conserved current utilities for Wilson fermions, for contracting propagators
* to make a conserved current sink or inserting the conserved current
* sequentially. Common to both 4D and 5D.
******************************************************************************/
// N.B. Functions below assume a -1/2 factor within U.
#define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr))
#define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr))
/*******************************************************************************
* Name: ContractConservedCurrentSiteFwd
* Operation: (1/2) * q2[x] * U(x) * (g[mu] - 1) * q1[x + mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in_1 shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLink(tmp, U[sU], q_in_1, mu);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
if (switch_sign) {
q_out -= result;
} else {
q_out += result;
}
}
/*******************************************************************************
* Name: ContractConservedCurrentSiteBwd
* Operation: (1/2) * q2[x + mu] * U^dag(x) * (g[mu] + 1) * q1[x]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in_2 shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLink(tmp, U[sU], q_in_1, mu + Nd);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
if (switch_sign) {
q_out += result;
} else {
q_out -= result;
}
}
/*******************************************************************************
* Name: SeqConservedCurrentSiteFwd
* Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vPredicate t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLink(result, U[sU], q_in, mu);
result = WilsonCurrentFwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign) {
q_out -= result;
} else {
q_out += result;
}
}
/*******************************************************************************
* Name: SeqConservedCurrentSiteFwd
* Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in shifted in -ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vPredicate t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLink(result, U[sU], q_in, mu + Nd);
result = WilsonCurrentBwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign) {
q_out += result;
} else {
q_out -= result;
}
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,97 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc
Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonTMFermion.h>
#pragma once
NAMESPACE_BEGIN(Grid);
/*
* BF sequence
*
void bfmbase<Float>::MooeeInv(Fermion_t psi,
Fermion_t chi,
int dag, int cb)
double m = this->mass;
double tm = this->twistedmass;
double mtil = 4.0+this->mass;
double sq = mtil*mtil + tm*tm;
double a = mtil/sq;
double b = -tm /sq;
if(dag) b=-b;
axpibg5x(chi,psi,a,b);
void bfmbase<Float>::Mooee(Fermion_t psi,
Fermion_t chi,
int dag,int cb)
double a = 4.0+this->mass;
double b = this->twistedmass;
if(dag) b=-b;
axpibg5x(chi,psi,a,b);
*/
template<class Impl>
void WilsonTMFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
RealD a = 4.0+this->mass;
RealD b = this->mu;
out.Checkerboard() = in.Checkerboard();
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
RealD a = 4.0+this->mass;
RealD b = -this->mu;
out.Checkerboard() = in.Checkerboard();
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
RealD m = this->mass;
RealD tm = this->mu;
RealD mtil = 4.0+m;
RealD sq = mtil*mtil+tm*tm;
RealD a = mtil/sq;
RealD b = -tm /sq;
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
RealD m = this->mass;
RealD tm = this->mu;
RealD mtil = 4.0+m;
RealD sq = mtil*mtil+tm*tm;
RealD a = mtil/sq;
RealD b = tm /sq;
axpibg5x(out,in,a,b);
}
NAMESPACE_END(Grid);

Some files were not shown because too many files have changed in this diff Show More