1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-20 00:36:55 +01:00

Merge GPU support (upstream/develop) into distillation branch.

This compiles and looks right ... but may need some testing

* develop: (762 commits)
  Tensor ambiguous fix
  Fix for GCC preprocessor/pragma handling bug
  Trips up NVCC for reasons I dont understand on summit
  Fix GCC complaint
  Zero() change
  Force a couple of things to compile on NVCC
  Remove debug code
  nvcc error suppress
  Merge develop
  Reduction finished and hopefully fixes CI regression fail on single precisoin and force
  Double precision variants for summation accuracy
  Update todo list
  Freeze the seed
  Fix compiling of MSource::Gauss for single precision
  Think the reduction is now sorted and cleaned up
  Fix force term
  Printing improvement
  GPU reduction fix and also exit backtrace option
  GPU friendly
  Simplify the comms benchmark
  ...

# Conflicts:
#	Grid/communicator/SharedMemoryMPI.cc
#	Grid/qcd/action/fermion/WilsonKernelsAsm.cc
#	Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h
#	Grid/qcd/smearing/StoutSmearing.h
#	Hadrons/Modules.hpp
#	Hadrons/Utilities/Contractor.cc
#	Hadrons/modules.inc
#	tests/forces/Test_dwf_force_eofa.cc
#	tests/forces/Test_dwf_gpforce_eofa.cc
This commit is contained in:
Michael Marshall
2019-09-13 13:30:00 +01:00
796 changed files with 41536 additions and 52391 deletions

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -27,114 +27,112 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_BASE_H
#define GRID_QCD_BASE_H
namespace Grid{
namespace QCD {
*************************************************************************************/
/* END LEGAL */
#pragma once
static const int Xdir = 0;
static const int Ydir = 1;
static const int Zdir = 2;
static const int Tdir = 3;
NAMESPACE_BEGIN(Grid);
static const int Xp = 0;
static const int Yp = 1;
static const int Zp = 2;
static const int Tp = 3;
static const int Xm = 4;
static const int Ym = 5;
static const int Zm = 6;
static const int Tm = 7;
static constexpr int Xdir = 0;
static constexpr int Ydir = 1;
static constexpr int Zdir = 2;
static constexpr int Tdir = 3;
static const int Nc=3;
static const int Ns=4;
static const int Nd=4;
static const int Nhs=2; // half spinor
static const int Nds=8; // double stored gauge field
static const int Ngp=2; // gparity index range
static constexpr int Xp = 0;
static constexpr int Yp = 1;
static constexpr int Zp = 2;
static constexpr int Tp = 3;
static constexpr int Xm = 4;
static constexpr int Ym = 5;
static constexpr int Zm = 6;
static constexpr int Tm = 7;
//////////////////////////////////////////////////////////////////////////////
// QCD iMatrix types
// Index conventions: Lorentz x Spin x Colour
// note: static const int or constexpr will work for type deductions
// with the intel compiler (up to version 17)
//////////////////////////////////////////////////////////////////////////////
#define ColourIndex 2
#define SpinIndex 1
#define LorentzIndex 0
static constexpr int Nc=3;
static constexpr int Ns=4;
static constexpr int Nd=4;
static constexpr int Nhs=2; // half spinor
static constexpr int Nds=8; // double stored gauge field
static constexpr int Ngp=2; // gparity index range
// Also should make these a named enum type
static const int DaggerNo=0;
static const int DaggerYes=1;
static const int InverseNo=0;
static const int InverseYes=1;
//////////////////////////////////////////////////////////////////////////////
// QCD iMatrix types
// Index conventions: Lorentz x Spin x Colour
// note: static constexpr int or constexpr will work for type deductions
// with the intel compiler (up to version 17)
//////////////////////////////////////////////////////////////////////////////
#define ColourIndex (2)
#define SpinIndex (1)
#define LorentzIndex (0)
// Useful traits is this a spin index
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
// Also should make these a named enum type
static constexpr int DaggerNo=0;
static constexpr int DaggerYes=1;
static constexpr int InverseNo=0;
static constexpr int InverseYes=1;
const int SpinorIndex = 2;
template<typename T> struct isSpinor {
static const bool value = (SpinorIndex==T::TensorLevel);
};
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
// Useful traits is this a spin index
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
// ChrisK very keen to add extra space for Gparity doubling.
//
// Also add domain wall index, in a way where Wilson operator
// naturally distributes across the 5th dimensions.
//
// That probably makes for GridRedBlack4dCartesian grid.
const int SpinorIndex = 2;
template<typename T> struct isSpinor {
static constexpr bool value = (SpinorIndex==T::TensorLevel);
};
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
// s,sp,c,spc,lc
// ChrisK very keen to add extra space for Gparity doubling.
//
// Also add domain wall index, in a way where Wilson operator
// naturally distributes across the 5th dimensions.
//
// That probably makes for GridRedBlack4dCartesian grid.
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
// s,sp,c,spc,lc
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
template<typename vtype> using iSpinColourSpinColourMatrix = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
// Spin matrix
typedef iSpinMatrix<Complex > SpinMatrix;
typedef iSpinMatrix<ComplexF > SpinMatrixF;
typedef iSpinMatrix<ComplexD > SpinMatrixD;
// Spin matrix
typedef iSpinMatrix<Complex > SpinMatrix;
typedef iSpinMatrix<ComplexF > SpinMatrixF;
typedef iSpinMatrix<ComplexD > SpinMatrixD;
typedef iSpinMatrix<vComplex > vSpinMatrix;
typedef iSpinMatrix<vComplexF> vSpinMatrixF;
typedef iSpinMatrix<vComplexD> vSpinMatrixD;
typedef iSpinMatrix<vComplex > vSpinMatrix;
typedef iSpinMatrix<vComplexF> vSpinMatrixF;
typedef iSpinMatrix<vComplexD> vSpinMatrixD;
// Colour Matrix
typedef iColourMatrix<Complex > ColourMatrix;
typedef iColourMatrix<ComplexF > ColourMatrixF;
typedef iColourMatrix<ComplexD > ColourMatrixD;
// Colour Matrix
typedef iColourMatrix<Complex > ColourMatrix;
typedef iColourMatrix<ComplexF > ColourMatrixF;
typedef iColourMatrix<ComplexD > ColourMatrixD;
typedef iColourMatrix<vComplex > vColourMatrix;
typedef iColourMatrix<vComplexF> vColourMatrixF;
typedef iColourMatrix<vComplexD> vColourMatrixD;
typedef iColourMatrix<vComplex > vColourMatrix;
typedef iColourMatrix<vComplexF> vColourMatrixF;
typedef iColourMatrix<vComplexD> vColourMatrixD;
// SpinColour matrix
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
// SpinColour matrix
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
// SpinColourSpinColour matrix
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
@ -153,383 +151,379 @@ namespace QCD {
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
// LorentzColour
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD;
// LorentzColour
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD;
typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
// DoubleStored gauge field
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<ComplexD > DoubleStoredColourMatrixD;
// DoubleStored gauge field
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<ComplexD > DoubleStoredColourMatrixD;
typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
// Spin vector
typedef iSpinVector<Complex > SpinVector;
typedef iSpinVector<ComplexF> SpinVectorF;
typedef iSpinVector<ComplexD> SpinVectorD;
// Spin vector
typedef iSpinVector<Complex > SpinVector;
typedef iSpinVector<ComplexF> SpinVectorF;
typedef iSpinVector<ComplexD> SpinVectorD;
typedef iSpinVector<vComplex > vSpinVector;
typedef iSpinVector<vComplexF> vSpinVectorF;
typedef iSpinVector<vComplexD> vSpinVectorD;
typedef iSpinVector<vComplex > vSpinVector;
typedef iSpinVector<vComplexF> vSpinVectorF;
typedef iSpinVector<vComplexD> vSpinVectorD;
// Colour vector
typedef iColourVector<Complex > ColourVector;
typedef iColourVector<ComplexF> ColourVectorF;
typedef iColourVector<ComplexD> ColourVectorD;
// Colour vector
typedef iColourVector<Complex > ColourVector;
typedef iColourVector<ComplexF> ColourVectorF;
typedef iColourVector<ComplexD> ColourVectorD;
typedef iColourVector<vComplex > vColourVector;
typedef iColourVector<vComplexF> vColourVectorF;
typedef iColourVector<vComplexD> vColourVectorD;
typedef iColourVector<vComplex > vColourVector;
typedef iColourVector<vComplexF> vColourVectorF;
typedef iColourVector<vComplexD> vColourVectorD;
// SpinColourVector
typedef iSpinColourVector<Complex > SpinColourVector;
typedef iSpinColourVector<ComplexF> SpinColourVectorF;
typedef iSpinColourVector<ComplexD> SpinColourVectorD;
// SpinColourVector
typedef iSpinColourVector<Complex > SpinColourVector;
typedef iSpinColourVector<ComplexF> SpinColourVectorF;
typedef iSpinColourVector<ComplexD> SpinColourVectorD;
typedef iSpinColourVector<vComplex > vSpinColourVector;
typedef iSpinColourVector<vComplexF> vSpinColourVectorF;
typedef iSpinColourVector<vComplexD> vSpinColourVectorD;
typedef iSpinColourVector<vComplex > vSpinColourVector;
typedef iSpinColourVector<vComplexF> vSpinColourVectorF;
typedef iSpinColourVector<vComplexD> vSpinColourVectorD;
// HalfSpin vector
typedef iHalfSpinVector<Complex > HalfSpinVector;
typedef iHalfSpinVector<ComplexF> HalfSpinVectorF;
typedef iHalfSpinVector<ComplexD> HalfSpinVectorD;
// HalfSpin vector
typedef iHalfSpinVector<Complex > HalfSpinVector;
typedef iHalfSpinVector<ComplexF> HalfSpinVectorF;
typedef iHalfSpinVector<ComplexD> HalfSpinVectorD;
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
typedef iHalfSpinVector<vComplexF> vHalfSpinVectorF;
typedef iHalfSpinVector<vComplexD> vHalfSpinVectorD;
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
typedef iHalfSpinVector<vComplexF> vHalfSpinVectorF;
typedef iHalfSpinVector<vComplexD> vHalfSpinVectorD;
// HalfSpinColour vector
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
typedef iHalfSpinColourVector<ComplexF> HalfSpinColourVectorF;
typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
// HalfSpinColour vector
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
typedef iHalfSpinColourVector<ComplexF> HalfSpinColourVectorF;
typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
// singlets
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
// singlets
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<vComplex > vTComplex ; // what if we don't know the tensor structure
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
typedef iSinglet<vComplex > vTComplex ; // what if we don't know the tensor structure
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
typedef iSinglet<vReal > vTReal;
typedef iSinglet<vRealF> vTRealF;
typedef iSinglet<vRealD> vTRealD;
typedef iSinglet<vReal > vTReal;
typedef iSinglet<vRealF> vTRealF;
typedef iSinglet<vRealD> vTRealD;
typedef iSinglet<vInteger> vTInteger;
typedef iSinglet<Integer > TInteger;
typedef iSinglet<vInteger> vTInteger;
typedef iSinglet<Integer > TInteger;
// Lattices of these
typedef Lattice<vColourMatrix> LatticeColourMatrix;
typedef Lattice<vColourMatrixF> LatticeColourMatrixF;
typedef Lattice<vColourMatrixD> LatticeColourMatrixD;
// Lattices of these
typedef Lattice<vColourMatrix> LatticeColourMatrix;
typedef Lattice<vColourMatrixF> LatticeColourMatrixF;
typedef Lattice<vColourMatrixD> LatticeColourMatrixD;
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
typedef Lattice<vSpinMatrixF> LatticeSpinMatrixF;
typedef Lattice<vSpinMatrixD> LatticeSpinMatrixD;
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
typedef Lattice<vSpinMatrixF> LatticeSpinMatrixF;
typedef Lattice<vSpinMatrixD> LatticeSpinMatrixD;
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
typedef Lattice<vSpinColourSpinColourMatrix> LatticeSpinColourSpinColourMatrix;
typedef Lattice<vSpinColourSpinColourMatrixF> LatticeSpinColourSpinColourMatrixF;
typedef Lattice<vSpinColourSpinColourMatrixD> LatticeSpinColourSpinColourMatrixD;
typedef Lattice<vSpinColourSpinColourMatrix> LatticeSpinColourSpinColourMatrix;
typedef Lattice<vSpinColourSpinColourMatrixF> LatticeSpinColourSpinColourMatrixF;
typedef Lattice<vSpinColourSpinColourMatrixD> LatticeSpinColourSpinColourMatrixD;
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
// DoubleStored gauge field
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;
typedef Lattice<vDoubleStoredColourMatrixD> LatticeDoubleStoredColourMatrixD;
// DoubleStored gauge field
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;
typedef Lattice<vDoubleStoredColourMatrixD> LatticeDoubleStoredColourMatrixD;
typedef Lattice<vSpinVector> LatticeSpinVector;
typedef Lattice<vSpinVectorF> LatticeSpinVectorF;
typedef Lattice<vSpinVectorD> LatticeSpinVectorD;
typedef Lattice<vSpinVector> LatticeSpinVector;
typedef Lattice<vSpinVectorF> LatticeSpinVectorF;
typedef Lattice<vSpinVectorD> LatticeSpinVectorD;
typedef Lattice<vColourVector> LatticeColourVector;
typedef Lattice<vColourVectorF> LatticeColourVectorF;
typedef Lattice<vColourVectorD> LatticeColourVectorD;
typedef Lattice<vColourVector> LatticeColourVector;
typedef Lattice<vColourVectorF> LatticeColourVectorF;
typedef Lattice<vColourVectorD> LatticeColourVectorD;
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
typedef Lattice<vSpinColourVectorF> LatticeSpinColourVectorF;
typedef Lattice<vSpinColourVectorD> LatticeSpinColourVectorD;
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
typedef Lattice<vSpinColourVectorF> LatticeSpinColourVectorF;
typedef Lattice<vSpinColourVectorD> LatticeSpinColourVectorD;
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
typedef Lattice<vHalfSpinVectorF> LatticeHalfSpinVectorF;
typedef Lattice<vHalfSpinVectorD> LatticeHalfSpinVectorD;
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
typedef Lattice<vHalfSpinVectorF> LatticeHalfSpinVectorF;
typedef Lattice<vHalfSpinVectorD> LatticeHalfSpinVectorD;
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
typedef Lattice<vHalfSpinColourVectorF> LatticeHalfSpinColourVectorF;
typedef Lattice<vHalfSpinColourVectorD> LatticeHalfSpinColourVectorD;
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
typedef Lattice<vHalfSpinColourVectorF> LatticeHalfSpinColourVectorF;
typedef Lattice<vHalfSpinColourVectorD> LatticeHalfSpinColourVectorD;
typedef Lattice<vTReal> LatticeReal;
typedef Lattice<vTRealF> LatticeRealF;
typedef Lattice<vTRealD> LatticeRealD;
typedef Lattice<vTReal> LatticeReal;
typedef Lattice<vTRealF> LatticeRealF;
typedef Lattice<vTRealD> LatticeRealD;
typedef Lattice<vTComplex> LatticeComplex;
typedef Lattice<vTComplexF> LatticeComplexF;
typedef Lattice<vTComplexD> LatticeComplexD;
typedef Lattice<vTComplex> LatticeComplex;
typedef Lattice<vTComplexF> LatticeComplexF;
typedef Lattice<vTComplexD> LatticeComplexD;
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
///////////////////////////////////////////
// Physical names for things
///////////////////////////////////////////
typedef LatticeHalfSpinColourVector LatticeHalfFermion;
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF;
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD;
///////////////////////////////////////////
// Physical names for things
///////////////////////////////////////////
typedef LatticeHalfSpinColourVector LatticeHalfFermion;
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF;
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD;
typedef LatticeSpinColourVector LatticeFermion;
typedef LatticeSpinColourVectorF LatticeFermionF;
typedef LatticeSpinColourVectorD LatticeFermionD;
typedef LatticeSpinColourVector LatticeFermion;
typedef LatticeSpinColourVectorF LatticeFermionF;
typedef LatticeSpinColourVectorD LatticeFermionD;
typedef LatticeSpinColourMatrix LatticePropagator;
typedef LatticeSpinColourMatrixF LatticePropagatorF;
typedef LatticeSpinColourMatrixD LatticePropagatorD;
typedef LatticeSpinColourMatrix LatticePropagator;
typedef LatticeSpinColourMatrixF LatticePropagatorF;
typedef LatticeSpinColourMatrixD LatticePropagatorD;
typedef LatticeLorentzColourMatrix LatticeGaugeField;
typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF;
typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD;
typedef LatticeLorentzColourMatrix LatticeGaugeField;
typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF;
typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD;
typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField;
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField;
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
template<class GF> using LorentzScalar = Lattice<iScalar<typename GF::vector_object::element> >;
template<class GF> using LorentzScalar = Lattice<iScalar<typename GF::vector_object::element> >;
// Uhgg... typing this hurt ;)
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
typedef Lattice<vColourVector> LatticeStaggeredFermion;
typedef Lattice<vColourVectorF> LatticeStaggeredFermionF;
typedef Lattice<vColourVectorD> LatticeStaggeredFermionD;
// Uhgg... typing this hurt ;)
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
typedef Lattice<vColourVector> LatticeStaggeredFermion;
typedef Lattice<vColourVectorF> LatticeStaggeredFermionF;
typedef Lattice<vColourVectorD> LatticeStaggeredFermionD;
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
typedef Lattice<vColourMatrixF> LatticeStaggeredPropagatorF;
typedef Lattice<vColourMatrixD> LatticeStaggeredPropagatorD;
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
typedef Lattice<vColourMatrixF> LatticeStaggeredPropagatorF;
typedef Lattice<vColourMatrixD> LatticeStaggeredPropagatorD;
//////////////////////////////////////////////////////////////////////////////
// Peek and Poke named after physics attributes
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// Peek and Poke named after physics attributes
//////////////////////////////////////////////////////////////////////////////
//spin
template<class vobj> auto peekSpin(const vobj &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
{
return PeekIndex<SpinIndex>(rhs,i);
}
template<class vobj> auto peekSpin(const vobj &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
{
return PeekIndex<SpinIndex>(rhs,i,j);
}
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
{
return PeekIndex<SpinIndex>(rhs,i);
}
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
{
return PeekIndex<SpinIndex>(rhs,i,j);
}
//colour
template<class vobj> auto peekColour(const vobj &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
{
return PeekIndex<ColourIndex>(rhs,i);
}
template<class vobj> auto peekColour(const vobj &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
{
return PeekIndex<ColourIndex>(rhs,i,j);
}
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
{
return PeekIndex<ColourIndex>(rhs,i);
}
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
{
return PeekIndex<ColourIndex>(rhs,i,j);
}
//lorentz
template<class vobj> auto peekLorentz(const vobj &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
{
return PeekIndex<LorentzIndex>(rhs,i);
}
template<class vobj> auto peekLorentz(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
{
return PeekIndex<LorentzIndex>(rhs,i);
}
//spin
template<class vobj> auto peekSpin(const vobj &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
{
return PeekIndex<SpinIndex>(rhs,i);
}
template<class vobj> auto peekSpin(const vobj &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
{
return PeekIndex<SpinIndex>(rhs,i,j);
}
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
{
return PeekIndex<SpinIndex>(rhs,i);
}
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
{
return PeekIndex<SpinIndex>(rhs,i,j);
}
//colour
template<class vobj> auto peekColour(const vobj &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
{
return PeekIndex<ColourIndex>(rhs,i);
}
template<class vobj> auto peekColour(const vobj &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
{
return PeekIndex<ColourIndex>(rhs,i,j);
}
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
{
return PeekIndex<ColourIndex>(rhs,i);
}
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
{
return PeekIndex<ColourIndex>(rhs,i,j);
}
//lorentz
template<class vobj> auto peekLorentz(const vobj &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
{
return PeekIndex<LorentzIndex>(rhs,i);
}
template<class vobj> auto peekLorentz(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
{
return PeekIndex<LorentzIndex>(rhs,i);
}
//////////////////////////////////////////////
// Poke lattice
//////////////////////////////////////////////
template<class vobj>
void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs,
//////////////////////////////////////////////
// Poke lattice
//////////////////////////////////////////////
template<class vobj>
void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(vobj(),0))> & rhs,
int i)
{
PokeIndex<ColourIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(vobj(),0,0))> & rhs,
int i,int j)
{
PokeIndex<ColourIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(vobj(),0))> & rhs,
int i)
{
PokeIndex<ColourIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs,
{
PokeIndex<SpinIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(vobj(),0,0))> & rhs,
int i,int j)
{
PokeIndex<ColourIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs,
int i)
{
PokeIndex<SpinIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j)
{
PokeIndex<SpinIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeLorentz(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs,
int i)
{
PokeIndex<LorentzIndex>(lhs,rhs,i);
}
{
PokeIndex<SpinIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeLorentz(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<LorentzIndex>(vobj(),0))> & rhs,
int i)
{
PokeIndex<LorentzIndex>(lhs,rhs,i);
}
//////////////////////////////////////////////
// Poke scalars
//////////////////////////////////////////////
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<SpinIndex>(lhs,rhs,i);
}
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0,0)) & rhs,int i,int j)
{
pokeIndex<SpinIndex>(lhs,rhs,i,j);
}
//////////////////////////////////////////////
// Poke scalars
//////////////////////////////////////////////
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<SpinIndex>(lhs,rhs,i);
}
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0,0)) & rhs,int i,int j)
{
pokeIndex<SpinIndex>(lhs,rhs,i,j);
}
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<ColourIndex>(lhs,rhs,i);
}
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0,0)) & rhs,int i,int j)
{
pokeIndex<ColourIndex>(lhs,rhs,i,j);
}
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<ColourIndex>(lhs,rhs,i);
}
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0,0)) & rhs,int i,int j)
{
pokeIndex<ColourIndex>(lhs,rhs,i,j);
}
template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<LorentzIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<LorentzIndex>(lhs,rhs,i);
}
template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<LorentzIndex>(lhs,0)) & rhs,int i)
{
pokeIndex<LorentzIndex>(lhs,rhs,i);
}
//////////////////////////////////////////////
// Fermion <-> propagator assignements
//////////////////////////////////////////////
//////////////////////////////////////////////
// Fermion <-> propagator assignements
//////////////////////////////////////////////
//template <class Prop, class Ferm>
template <class Fimpl>
void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c)
{
for(int j = 0; j < Ns; ++j)
{
for(int j = 0; j < Ns; ++j)
{
auto pjs = peekSpin(p, j, s);
auto fj = peekSpin(f, j);
auto pjs = peekSpin(p, j, s);
auto fj = peekSpin(f, j);
for(int i = 0; i < Fimpl::Dimension; ++i)
{
pokeColour(pjs, peekColour(fj, i), i, c);
}
pokeSpin(p, pjs, j, s);
}
{
pokeColour(pjs, peekColour(fj, i), i, c);
}
pokeSpin(p, pjs, j, s);
}
}
//template <class Prop, class Ferm>
template <class Fimpl>
void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c)
{
for(int j = 0; j < Ns; ++j)
{
for(int j = 0; j < Ns; ++j)
{
auto pjs = peekSpin(p, j, s);
auto fj = peekSpin(f, j);
auto pjs = peekSpin(p, j, s);
auto fj = peekSpin(f, j);
for(int i = 0; i < Fimpl::Dimension; ++i)
{
pokeColour(fj, peekColour(pjs, i, c), i);
}
pokeSpin(f, fj, j);
}
{
pokeColour(fj, peekColour(pjs, i, c), i);
}
pokeSpin(f, fj, j);
}
}
//////////////////////////////////////////////
// transpose array and scalar
//////////////////////////////////////////////
template<int Index,class vobj> inline Lattice<vobj> transposeSpin(const Lattice<vobj> &lhs){
return transposeIndex<SpinIndex>(lhs);
}
template<int Index,class vobj> inline Lattice<vobj> transposeColour(const Lattice<vobj> &lhs){
return transposeIndex<ColourIndex>(lhs);
}
template<int Index,class vobj> inline vobj transposeSpin(const vobj &lhs){
return transposeIndex<SpinIndex>(lhs);
}
template<int Index,class vobj> inline vobj transposeColour(const vobj &lhs){
return transposeIndex<ColourIndex>(lhs);
}
//////////////////////////////////////////////
// transpose array and scalar
//////////////////////////////////////////////
template<int Index,class vobj> inline Lattice<vobj> transposeSpin(const Lattice<vobj> &lhs){
return transposeIndex<SpinIndex>(lhs);
}
template<int Index,class vobj> inline Lattice<vobj> transposeColour(const Lattice<vobj> &lhs){
return transposeIndex<ColourIndex>(lhs);
}
template<int Index,class vobj> inline vobj transposeSpin(const vobj &lhs){
return transposeIndex<SpinIndex>(lhs);
}
template<int Index,class vobj> inline vobj transposeColour(const vobj &lhs){
return transposeIndex<ColourIndex>(lhs);
}
//////////////////////////////////////////
// Trace lattice and non-lattice
//////////////////////////////////////////
template<int Index,class vobj>
inline auto traceSpin(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(lhs._odata[0]))>
{
return traceIndex<SpinIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceColour(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(lhs._odata[0]))>
{
return traceIndex<ColourIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceSpin(const vobj &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(lhs))>
{
return traceIndex<SpinIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceColour(const vobj &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(lhs))>
{
return traceIndex<ColourIndex>(lhs);
}
//////////////////////////////////////////
// Trace lattice and non-lattice
//////////////////////////////////////////
template<int Index,class vobj>
inline auto traceSpin(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(vobj()))>
{
return traceIndex<SpinIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceColour(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(vobj()))>
{
return traceIndex<ColourIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceSpin(const vobj &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(lhs))>
{
return traceIndex<SpinIndex>(lhs);
}
template<int Index,class vobj>
inline auto traceColour(const vobj &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(lhs))>
{
return traceIndex<ColourIndex>(lhs);
}
//////////////////////////////////////////
// Current types
//////////////////////////////////////////
GRID_SERIALIZABLE_ENUM(Current, undef,
Vector, 0,
Axial, 1,
Tadpole, 2);
//////////////////////////////////////////
// Current types
//////////////////////////////////////////
GRID_SERIALIZABLE_ENUM(Current, undef,
Vector, 0,
Axial, 1,
Tadpole, 2);
} //namespace QCD
} // Grid
NAMESPACE_END(Grid);
#endif

View File

@ -37,14 +37,18 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
// Abstract base interface
////////////////////////////////////////////
#include <Grid/qcd/action/ActionCore.h>
NAMESPACE_CHECK(ActionCore);
////////////////////////////////////////////////////////////////////////
// Fermion actions; prevent coupling fermion.cc files to other headers
////////////////////////////////////////////////////////////////////////
#include <Grid/qcd/action/fermion/FermionCore.h>
NAMESPACE_CHECK(FermionCore);
#include <Grid/qcd/action/fermion/Fermion.h>
NAMESPACE_CHECK(Fermion);
////////////////////////////////////////
// Pseudo fermion combinations for HMC
////////////////////////////////////////
#include <Grid/qcd/action/pseudofermion/PseudoFermion.h>
NAMESPACE_CHECK(PseudoFermion);
#endif

View File

@ -27,19 +27,18 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef ACTION_BASE_H
#define ACTION_BASE_H
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template <class GaugeField >
class Action
{
public:
public:
bool is_smeared = false;
// Heatbath?
virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
@ -50,7 +49,6 @@ class Action
virtual ~Action(){}
};
}
}
NAMESPACE_END(Grid);
#endif // ACTION_BASE_H

View File

@ -31,29 +31,37 @@ directory
#define QCD_ACTION_CORE
#include <Grid/qcd/action/ActionBase.h>
NAMESPACE_CHECK(ActionBase);
#include <Grid/qcd/action/ActionSet.h>
NAMESPACE_CHECK(ActionSet);
#include <Grid/qcd/action/ActionParams.h>
NAMESPACE_CHECK(ActionParams);
////////////////////////////////////////////
// Gauge Actions
////////////////////////////////////////////
#include <Grid/qcd/action/gauge/Gauge.h>
NAMESPACE_CHECK(Gauge);
////////////////////////////////////////////
// Fermion prereqs
////////////////////////////////////////////
#include <Grid/qcd/action/fermion/FermionCore.h>
NAMESPACE_CHECK(ActionFermionCore);
////////////////////////////////////////////
// Scalar Actions
////////////////////////////////////////////
#include <Grid/qcd/action/scalar/Scalar.h>
NAMESPACE_CHECK(Scalar);
////////////////////////////////////////////
// Utility functions
////////////////////////////////////////////
#include <Grid/qcd/utils/Metric.h>
NAMESPACE_CHECK(Metric);
#include <Grid/qcd/utils/CovariantLaplacian.h>
NAMESPACE_CHECK(CovariantLaplacian);

View File

@ -27,37 +27,35 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef GRID_QCD_ACTION_PARAMS_H
#define GRID_QCD_ACTION_PARAMS_H
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
// These can move into a params header and be given MacroMagic serialisation
struct GparityWilsonImplParams {
bool overlapCommsCompute;
std::vector<int> twists;
GparityWilsonImplParams() : twists(Nd, 0), overlapCommsCompute(false){};
};
// These can move into a params header and be given MacroMagic serialisation
struct GparityWilsonImplParams {
Coordinate twists;
GparityWilsonImplParams() : twists(Nd, 0) {};
};
struct WilsonImplParams {
bool overlapCommsCompute;
std::vector<Real> twist_n_2pi_L;
std::vector<Complex> boundary_phases;
WilsonImplParams() : overlapCommsCompute(false) {
boundary_phases.resize(Nd, 1.0);
struct WilsonImplParams {
bool overlapCommsCompute;
AcceleratorVector<Real,Nd> twist_n_2pi_L;
AcceleratorVector<Complex,Nd> boundary_phases;
WilsonImplParams() {
boundary_phases.resize(Nd, 1.0);
twist_n_2pi_L.resize(Nd, 0.0);
};
WilsonImplParams(const std::vector<Complex> phi) : boundary_phases(phi), overlapCommsCompute(false) {
twist_n_2pi_L.resize(Nd, 0.0);
}
};
WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) {
twist_n_2pi_L.resize(Nd, 0.0);
}
};
struct StaggeredImplParams {
StaggeredImplParams() {};
};
struct StaggeredImplParams {
StaggeredImplParams() {};
};
struct OneFlavourRationalParams : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,
@ -69,10 +67,10 @@ namespace QCD {
int, precision,
int, BoundsCheckFreq);
// MaxIter and tolerance, vectors??
// MaxIter and tolerance, vectors??
// constructor
OneFlavourRationalParams( RealD _lo = 0.0,
// constructor
OneFlavourRationalParams( RealD _lo = 0.0,
RealD _hi = 1.0,
int _maxit = 1000,
RealD tol = 1.0e-8,
@ -88,11 +86,6 @@ namespace QCD {
BoundsCheckFreq(_BoundsCheckFreq){};
};
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -26,14 +26,11 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef ACTION_SET_H
#define ACTION_SET_H
namespace Grid {
// Should drop this namespace here
namespace QCD {
NAMESPACE_BEGIN(Grid);
//////////////////////////////////
// Indexing of tuple types
@ -62,7 +59,7 @@ struct Index<T, std::tuple<U, Types...>> {
template <class Field, class Repr = NoHirep >
struct ActionLevel {
public:
public:
unsigned int multiplier;
// Fundamental repr actions separated because of the smearing
@ -77,7 +74,7 @@ struct ActionLevel {
std::vector<ActPtr>& actions;
explicit ActionLevel(unsigned int mul = 1) :
actions(std::get<0>(actions_hirep)), multiplier(mul) {
actions(std::get<0>(actions_hirep)), multiplier(mul) {
// initialize the hirep vectors to zero.
// apply(this->resize, actions_hirep, 0); //need a working resize
assert(mul >= 1);
@ -87,7 +84,7 @@ struct ActionLevel {
void push_back(Action<GenField>* ptr) {
// insert only in the correct vector
std::get< Index < GenField, action_hirep_types>::value >(actions_hirep).push_back(ptr);
};
}
template <class ActPtr>
static void resize(ActPtr ap, unsigned int n) {
@ -110,7 +107,6 @@ struct ActionLevel {
template <class GaugeField, class R>
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
} // QCD
} // Grid
NAMESPACE_END(Grid);
#endif // ACTION_SET_H

View File

@ -26,75 +26,75 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef GRID_QCD_ABSTRACT_EOFA_FERMION_H
#define GRID_QCD_ABSTRACT_EOFA_FERMION_H
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
// DJM: Abstract base class for EOFA fermion types.
// Defines layout of additional EOFA-specific parameters and operators.
// Use to construct EOFA pseudofermion actions that are agnostic to
// Shamir / Mobius / etc., and ensure that no one can construct EOFA
// pseudofermion action with non-EOFA fermion type.
template<class Impl>
class AbstractEOFAFermion : public CayleyFermion5D<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
// DJM: Abstract base class for EOFA fermion types.
// Defines layout of additional EOFA-specific parameters and operators.
// Use to construct EOFA pseudofermion actions that are agnostic to
// Shamir / Mobius / etc., and ensure that no one can construct EOFA
// pseudofermion action with non-EOFA fermion type.
template<class Impl>
class AbstractEOFAFermion : public CayleyFermion5D<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm}
RealD mq1;
RealD mq2;
RealD mq3;
RealD shift;
int pm;
public:
// Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm}
RealD mq1;
RealD mq2;
RealD mq3;
RealD shift;
int pm;
RealD alpha; // Mobius scale
RealD k; // EOFA normalization constant
RealD alpha; // Mobius scale
RealD k; // EOFA normalization constant
virtual void Instantiatable(void) = 0;
virtual void Instantiatable(void) = 0;
// EOFA-specific operations
// Force user to implement in derived classes
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0;
virtual void Dtilde (const FermionField& in, FermionField& out) = 0;
virtual void DtildeInv(const FermionField& in, FermionField& out) = 0;
// EOFA-specific operations
// Force user to implement in derived classes
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0;
virtual void Dtilde (const FermionField& in, FermionField& out) = 0;
virtual void DtildeInv(const FermionField& in, FermionField& out) = 0;
// Implement derivatives in base class:
// for EOFA both DWF and Mobius just need d(Dw)/dU
virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDeriv(mat, U, V, dag);
};
virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDerivOE(mat, U, V, dag);
};
virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDerivEO(mat, U, V, dag);
};
// Recompute 5D coefficients for different value of shift constant
// (needed for heatbath loop over poles)
virtual void RefreshShiftCoefficients(RealD new_shift) = 0;
// Constructors
AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm,
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams())
: CayleyFermion5D<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid,
_mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm)
{
int Ls = this->Ls;
this->alpha = _b + _c;
this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
};
// Implement derivatives in base class:
// for EOFA both DWF and Mobius just need d(Dw)/dU
virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDeriv(mat, U, V, dag);
};
}}
virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDerivOE(mat, U, V, dag);
};
virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDerivEO(mat, U, V, dag);
};
// Recompute 5D coefficients for different value of shift constant
// (needed for heatbath loop over poles)
virtual void RefreshShiftCoefficients(RealD new_shift) = 0;
// Constructors
AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm,
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams())
: CayleyFermion5D<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid,
_mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm)
{
int Ls = this->Ls;
this->alpha = _b + _c;
this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
};
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,203 +24,146 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_CAYLEY_FERMION_H
#define GRID_QCD_CAYLEY_FERMION_H
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class CayleyFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<typename T> struct switcheroo {
static inline int iscomplex() { return 0; }
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
template<class vec>
static inline vec mult(vec a, vec b) {
return real_mult(a,b);
}
};
template<> struct switcheroo<ComplexD> {
static inline int iscomplex() { return 1; }
// half checkerboard operations
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
virtual void Meo5D (const FermionField &psi, FermionField &chi);
template<class vec>
static inline vec mult(vec a, vec b) {
return a*b;
}
};
template<> struct switcheroo<ComplexF> {
static inline int iscomplex() { return 1; }
template<class vec>
static inline vec mult(vec a, vec b) {
return a*b;
}
};
virtual void M5D (const FermionField &psi, FermionField &chi);
virtual void M5Ddag(const FermionField &psi, FermionField &chi);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
virtual void Dminus(const FermionField &psi, FermionField &chi);
virtual void DminusDag(const FermionField &psi, FermionField &chi);
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d);
virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d);
virtual void ImportUnphysicalFermion(const FermionField &solution5d, FermionField &exported4d);
template<class Impl>
class CayleyFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
///////////////////////////////////////////////////////////////
// Support for MADWF tricks
///////////////////////////////////////////////////////////////
RealD Mass(void) { return mass; };
void SetMass(RealD _mass) {
mass=_mass;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
} ;
void P(const FermionField &psi, FermionField &chi);
void Pdag(const FermionField &psi, FermionField &chi);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper);
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
void M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper);
// half checkerboard operations
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
virtual void Meo5D (const FermionField &psi, FermionField &chi);
virtual void Instantiatable(void)=0;
virtual void M5D (const FermionField &psi, FermionField &chi);
virtual void M5Ddag(const FermionField &psi, FermionField &chi);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
virtual void Dminus(const FermionField &psi, FermionField &chi);
virtual void DminusDag(const FermionField &psi, FermionField &chi);
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d);
virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d);
virtual void ImportUnphysicalFermion(const FermionField &solution5d, FermionField &exported4d);
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
///////////////////////////////////////////////////////////////
// Support for MADWF tricks
///////////////////////////////////////////////////////////////
RealD Mass(void) { return mass; };
void SetMass(RealD _mass) {
mass=_mass;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
} ;
void P(const FermionField &psi, FermionField &chi);
void Pdag(const FermionField &psi, FermionField &chi);
void Meooe5D (const FermionField &in, FermionField &out);
void MeooeDag5D (const FermionField &in, FermionField &out);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper);
// protected:
RealD mass;
void M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper);
// Save arguments to SetCoefficientsInternal
Vector<Coeff_t> _gamma;
RealD _zolo_hi;
RealD _b;
RealD _c;
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd> > & Matp, Vector<iSinglet<Simd> > & Matm);
// Cayley form Moebius (tanh and zolotarev)
Vector<Coeff_t> omega;
Vector<Coeff_t> bs; // S dependent coeffs
Vector<Coeff_t> cs;
Vector<Coeff_t> as;
// For preconditioning Cayley form
Vector<Coeff_t> bee;
Vector<Coeff_t> cee;
Vector<Coeff_t> aee;
Vector<Coeff_t> beo;
Vector<Coeff_t> ceo;
Vector<Coeff_t> aeo;
// LDU factorisation of the eeoo matrix
Vector<Coeff_t> lee;
Vector<Coeff_t> leem;
Vector<Coeff_t> uee;
Vector<Coeff_t> ueem;
Vector<Coeff_t> dee;
void MooeeInternalAsm(const FermionField &in, FermionField &out,
int LLs, int site,
Vector<iSinglet<Simd> > &Matp,
Vector<iSinglet<Simd> > &Matm);
void MooeeInternalZAsm(const FermionField &in, FermionField &out,
int LLs, int site,
Vector<iSinglet<Simd> > &Matp,
Vector<iSinglet<Simd> > &Matm);
// Matrices of 5d ee inverse params
Vector<iSinglet<Simd> > MatpInv;
Vector<iSinglet<Simd> > MatmInv;
Vector<iSinglet<Simd> > MatpInvDag;
Vector<iSinglet<Simd> > MatmInvDag;
// Constructors
CayleyFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
virtual void Instantiatable(void)=0;
void CayleyReport(void);
void CayleyZeroCounters(void);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
double M5Dflops;
double M5Dcalls;
double M5Dtime;
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
double MooeeInvFlops;
double MooeeInvCalls;
double MooeeInvTime;
void Meooe5D (const FermionField &in, FermionField &out);
void MeooeDag5D (const FermionField &in, FermionField &out);
protected:
virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
virtual void SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t> & gamma,RealD b,RealD c);
};
// protected:
RealD mass;
NAMESPACE_END(Grid);
// Save arguments to SetCoefficientsInternal
std::vector<Coeff_t> _gamma;
RealD _zolo_hi;
RealD _b;
RealD _c;
// Cayley form Moebius (tanh and zolotarev)
std::vector<Coeff_t> omega;
std::vector<Coeff_t> bs; // S dependent coeffs
std::vector<Coeff_t> cs;
std::vector<Coeff_t> as;
// For preconditioning Cayley form
std::vector<Coeff_t> bee;
std::vector<Coeff_t> cee;
std::vector<Coeff_t> aee;
std::vector<Coeff_t> beo;
std::vector<Coeff_t> ceo;
std::vector<Coeff_t> aeo;
// LDU factorisation of the eeoo matrix
std::vector<Coeff_t> lee;
std::vector<Coeff_t> leem;
std::vector<Coeff_t> uee;
std::vector<Coeff_t> ueem;
std::vector<Coeff_t> dee;
// Matrices of 5d ee inverse params
Vector<iSinglet<Simd> > MatpInv;
Vector<iSinglet<Simd> > MatmInv;
Vector<iSinglet<Simd> > MatpInvDag;
Vector<iSinglet<Simd> > MatmInvDag;
// Constructors
CayleyFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
void CayleyReport(void);
void CayleyZeroCounters(void);
double M5Dflops;
double M5Dcalls;
double M5Dtime;
double MooeeInvFlops;
double MooeeInvCalls;
double MooeeInvTime;
protected:
virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
virtual void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
};
}
}
#define INSTANTIATE_DPERP(A)\
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);
#undef CAYLEY_DPERP_DENSE
#define CAYLEY_DPERP_CACHE
#undef CAYLEY_DPERP_LINALG
#define CAYLEY_DPERP_VEC
#endif

View File

@ -1,249 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards..
template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls =this->Ls;
GridBase *grid=psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
M5Dcalls++;
M5Dtime-=usecond();
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
for(int s=0;s<Ls;s++){
auto tmp = psi._odata[0];
if ( s==0 ) {
spProj5m(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi._odata[ss+Ls-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else if ( s==(Ls-1)) {
spProj5m(tmp,psi._odata[ss+0]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else {
spProj5m(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
}
}
}
M5Dtime+=usecond();
}
template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls =this->Ls;
GridBase *grid=psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
M5Dcalls++;
M5Dtime-=usecond();
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
for(int s=0;s<Ls;s++){
if ( s==0 ) {
spProj5p(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5m(tmp,psi._odata[ss+Ls-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else if ( s==(Ls-1)) {
spProj5p(tmp,psi._odata[ss+0]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5m(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else {
spProj5p(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5m(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
}
}
}
M5Dtime+=usecond();
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
GridBase *grid=psi._grid;
int Ls=this->Ls;
chi.checkerboard=psi.checkerboard;
MooeeInvCalls++;
MooeeInvTime-=usecond();
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
// Apply (L^{\prime})^{-1}
chi[ss]=psi[ss]; // chi[0]=psi[0]
for(int s=1;s<Ls;s++){
spProj5p(tmp,chi[ss+s-1]);
chi[ss+s] = psi[ss+s]-lee[s-1]*tmp;
}
// L_m^{-1}
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp,chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - leem[s]*tmp;
}
// U_m^{-1} D^{-1}
for (int s=0;s<Ls-1;s++){
// Chi[s] + 1/d chi[s]
spProj5p(tmp,chi[ss+Ls-1]);
chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(ueem[s]/dee[Ls-1])*tmp;
}
chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1];
// Apply U^{-1}
for (int s=Ls-2;s>=0;s--){
spProj5m(tmp,chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - uee[s]*tmp;
}
}
MooeeInvTime+=usecond();
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
GridBase *grid=psi._grid;
int Ls=this->Ls;
assert(psi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
std::vector<Coeff_t> ueec(Ls);
std::vector<Coeff_t> deec(Ls);
std::vector<Coeff_t> leec(Ls);
std::vector<Coeff_t> ueemc(Ls);
std::vector<Coeff_t> leemc(Ls);
for(int s=0;s<ueec.size();s++){
ueec[s] = conjugate(uee[s]);
deec[s] = conjugate(dee[s]);
leec[s] = conjugate(lee[s]);
ueemc[s]= conjugate(ueem[s]);
leemc[s]= conjugate(leem[s]);
}
MooeeInvCalls++;
MooeeInvTime-=usecond();
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
// Apply (U^{\prime})^{-dagger}
chi[ss]=psi[ss];
for (int s=1;s<Ls;s++){
spProj5m(tmp,chi[ss+s-1]);
chi[ss+s] = psi[ss+s]-ueec[s-1]*tmp;
}
// U_m^{-\dagger}
for (int s=0;s<Ls-1;s++){
spProj5p(tmp,chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - ueemc[s]*tmp;
}
// L_m^{-\dagger} D^{-dagger}
for (int s=0;s<Ls-1;s++){
spProj5m(tmp,chi[ss+Ls-1]);
chi[ss+s] = (1.0/deec[s])*chi[ss+s]-(leemc[s]/deec[Ls-1])*tmp;
}
chi[ss+Ls-1]= (1.0/deec[Ls-1])*chi[ss+Ls-1];
// Apply L^{-dagger}
for (int s=Ls-2;s>=0;s--){
spProj5p(tmp,chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - leec[s]*tmp;
}
}
MooeeInvTime+=usecond();
}
#ifdef CAYLEY_DPERP_CACHE
INSTANTIATE_DPERP(WilsonImplF);
INSTANTIATE_DPERP(WilsonImplD);
INSTANTIATE_DPERP(GparityWilsonImplF);
INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
#endif
}}

View File

@ -1,828 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
{
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd= Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs==nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
scalar_type * l_p = (scalar_type *)&l[0];
scalar_type * d_p = (scalar_type *)&d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o+i*LLs;
int ss = o*nsimd+i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
M5Dcalls++;
M5Dtime-=usecond();
assert(Nc==3);
parallel_for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0;v<LLs;v++){
int vp=(v+1)%LLs;
int vm=(v+LLs-1)%LLs;
spProj5m(hp,psi[ss+vp]);
spProj5p(hm,psi[ss+vm]);
if ( vp<=v ) rotate(hp,hp,1);
if ( vm>=v ) rotate(hm,hm,nsimd-1);
hp=0.5*hp;
hm=0.5*hm;
spRecon5m(fp,hp);
spRecon5p(fm,hm);
chi[ss+v] = d[v]*phi[ss+v];
chi[ss+v] = chi[ss+v] +u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0;v<LLs;v++){
vprefetch(psi[ss+v+LLs]);
int vp= (v==LLs-1) ? 0 : v+1;
int vm= (v==0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if ( vp<=v ) {
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if ( vm>=v ) {
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_12);
vstream(chi[ss+v]()(0)(0),p_00);
vstream(chi[ss+v]()(0)(1),p_01);
vstream(chi[ss+v]()(0)(2),p_02);
vstream(chi[ss+v]()(1)(0),p_10);
vstream(chi[ss+v]()(1)(1),p_11);
vstream(chi[ss+v]()(1)(2),p_12);
vstream(chi[ss+v]()(2)(0),p_20);
vstream(chi[ss+v]()(2)(1),p_21);
vstream(chi[ss+v]()(2)(2),p_22);
vstream(chi[ss+v]()(3)(0),p_30);
vstream(chi[ss+v]()(3)(1),p_31);
vstream(chi[ss+v]()(3)(2),p_32);
}
#endif
}
M5Dtime+=usecond();
}
template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd= Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs==nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
scalar_type * l_p = (scalar_type *)&l[0];
scalar_type * d_p = (scalar_type *)&d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o+i*LLs;
int ss = o*nsimd+i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
M5Dcalls++;
M5Dtime-=usecond();
parallel_for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0;v<LLs;v++){
int vp=(v+1)%LLs;
int vm=(v+LLs-1)%LLs;
spProj5p(hp,psi[ss+vp]);
spProj5m(hm,psi[ss+vm]);
if ( vp<=v ) rotate(hp,hp,1);
if ( vm>=v ) rotate(hm,hm,nsimd-1);
hp=hp*0.5;
hm=hm*0.5;
spRecon5p(fp,hp);
spRecon5m(fm,hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0;v<LLs;v++){
vprefetch(psi[ss+v+LLs]);
int vp= (v==LLs-1) ? 0 : v+1;
int vm= (v==0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if ( vp<=v ) {
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if ( vm>=v ) {
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_12);
vstream(chi[ss+v]()(0)(0),p_00);
vstream(chi[ss+v]()(0)(1),p_01);
vstream(chi[ss+v]()(0)(2),p_02);
vstream(chi[ss+v]()(1)(0),p_10);
vstream(chi[ss+v]()(1)(1),p_11);
vstream(chi[ss+v]()(1)(2),p_12);
vstream(chi[ss+v]()(2)(0),p_20);
vstream(chi[ss+v]()(2)(1),p_21);
vstream(chi[ss+v]()(2)(2),p_22);
vstream(chi[ss+v]()(3)(0),p_30);
vstream(chi[ss+v]()(3)(1),p_31);
vstream(chi[ss+v]()(3)(2),p_32);
}
#endif
}
M5Dtime+=usecond();
}
#ifdef AVX512
#include <simd/Intel512common.h>
#include <simd/Intel512avx.h>
#include <simd/Intel512single.h>
#endif
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionField &chi,
int LLs, int site,
Vector<iSinglet<Simd> > &Matp,
Vector<iSinglet<Simd> > &Matm)
{
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
int s=s2+l*LLs;
int lex=s2+LLs*site;
if ( s2==0 && l==0) {
SiteChiP=zero;
SiteChiM=zero;
}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastP()(sp )(co),psi[lex]()(sp)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastM()(sp )(co),psi[lex]()(sp+2)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
SiteChiP()(sp)(co)=real_madd(Matp[LLs*s+s1]()()(),BcastP()(sp)(co),SiteChiP()(sp)(co)); // 1100 us.
SiteChiM()(sp)(co)=real_madd(Matm[LLs*s+s1]()()(),BcastM()(sp)(co),SiteChiM()(sp)(co)); // each found by commenting out
}}
}}
{
int lex = s1+LLs*site;
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %%zmm1
#define Chi_01 %%zmm2
#define Chi_02 %%zmm3
#define Chi_10 %%zmm4
#define Chi_11 %%zmm5
#define Chi_12 %%zmm6
#define Chi_20 %%zmm7
#define Chi_21 %%zmm8
#define Chi_22 %%zmm9
#define Chi_30 %%zmm10
#define Chi_31 %%zmm11
#define Chi_32 %%zmm12
#define BCAST0 %%zmm13
#define BCAST1 %%zmm14
#define BCAST2 %%zmm15
#define BCAST3 %%zmm16
#define BCAST4 %%zmm17
#define BCAST5 %%zmm18
#define BCAST6 %%zmm19
#define BCAST7 %%zmm20
#define BCAST8 %%zmm21
#define BCAST9 %%zmm22
#define BCAST10 %%zmm23
#define BCAST11 %%zmm24
int incr=LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
int lex=s2+LLs*site;
uint64_t a0 = (uint64_t)&Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t)&Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t)&psi[lex];
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
if ( (s2+l)==0 ) {
asm (
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
VBCASTCDUP(0,%2,BCAST0)
VBCASTCDUP(1,%2,BCAST1)
VBCASTCDUP(2,%2,BCAST2)
VBCASTCDUP(3,%2,BCAST3)
VBCASTCDUP(4,%2,BCAST4) VMULMEM (0,%0,BCAST0,Chi_00)
VBCASTCDUP(5,%2,BCAST5) VMULMEM (0,%0,BCAST1,Chi_01)
VBCASTCDUP(6,%2,BCAST6) VMULMEM (0,%0,BCAST2,Chi_02)
VBCASTCDUP(7,%2,BCAST7) VMULMEM (0,%0,BCAST3,Chi_10)
VBCASTCDUP(8,%2,BCAST8) VMULMEM (0,%0,BCAST4,Chi_11)
VBCASTCDUP(9,%2,BCAST9) VMULMEM (0,%0,BCAST5,Chi_12)
VBCASTCDUP(10,%2,BCAST10) VMULMEM (0,%1,BCAST6,Chi_20)
VBCASTCDUP(11,%2,BCAST11) VMULMEM (0,%1,BCAST7,Chi_21)
VMULMEM (0,%1,BCAST8,Chi_22)
VMULMEM (0,%1,BCAST9,Chi_30)
VMULMEM (0,%1,BCAST10,Chi_31)
VMULMEM (0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
} else {
asm (
VBCASTCDUP(0,%2,BCAST0) VMADDMEM (0,%0,BCAST0,Chi_00)
VBCASTCDUP(1,%2,BCAST1) VMADDMEM (0,%0,BCAST1,Chi_01)
VBCASTCDUP(2,%2,BCAST2) VMADDMEM (0,%0,BCAST2,Chi_02)
VBCASTCDUP(3,%2,BCAST3) VMADDMEM (0,%0,BCAST3,Chi_10)
VBCASTCDUP(4,%2,BCAST4) VMADDMEM (0,%0,BCAST4,Chi_11)
VBCASTCDUP(5,%2,BCAST5) VMADDMEM (0,%0,BCAST5,Chi_12)
VBCASTCDUP(6,%2,BCAST6) VMADDMEM (0,%1,BCAST6,Chi_20)
VBCASTCDUP(7,%2,BCAST7) VMADDMEM (0,%1,BCAST7,Chi_21)
VBCASTCDUP(8,%2,BCAST8) VMADDMEM (0,%1,BCAST8,Chi_22)
VBCASTCDUP(9,%2,BCAST9) VMADDMEM (0,%1,BCAST9,Chi_30)
VBCASTCDUP(10,%2,BCAST10) VMADDMEM (0,%1,BCAST10,Chi_31)
VBCASTCDUP(11,%2,BCAST11) VMADDMEM (0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
}
a0 = a0+incr;
a1 = a1+incr;
a2 = a2+sizeof(typename Simd::scalar_type);
}}
{
int lexa = s1+LLs*site;
asm (
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
// Z-mobius version
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionField &chi,
int LLs, int site, Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matm)
{
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
int s=s2+l*LLs;
int lex=s2+LLs*site;
if ( s2==0 && l==0) {
SiteChiP=zero;
SiteChiM=zero;
}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastP()(sp )(co),psi[lex]()(sp)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastM()(sp )(co),psi[lex]()(sp+2)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
SiteChiP()(sp)(co)=SiteChiP()(sp)(co)+ Matp[LLs*s+s1]()()()*BcastP()(sp)(co);
SiteChiM()(sp)(co)=SiteChiM()(sp)(co)+ Matm[LLs*s+s1]()()()*BcastM()(sp)(co);
}}
}}
{
int lex = s1+LLs*site;
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %zmm0
#define Chi_01 %zmm1
#define Chi_02 %zmm2
#define Chi_10 %zmm3
#define Chi_11 %zmm4
#define Chi_12 %zmm5
#define Chi_20 %zmm6
#define Chi_21 %zmm7
#define Chi_22 %zmm8
#define Chi_30 %zmm9
#define Chi_31 %zmm10
#define Chi_32 %zmm11
#define pChi_00 %%zmm0
#define pChi_01 %%zmm1
#define pChi_02 %%zmm2
#define pChi_10 %%zmm3
#define pChi_11 %%zmm4
#define pChi_12 %%zmm5
#define pChi_20 %%zmm6
#define pChi_21 %%zmm7
#define pChi_22 %%zmm8
#define pChi_30 %%zmm9
#define pChi_31 %%zmm10
#define pChi_32 %%zmm11
#define BCAST_00 %zmm12
#define SHUF_00 %zmm13
#define BCAST_01 %zmm14
#define SHUF_01 %zmm15
#define BCAST_02 %zmm16
#define SHUF_02 %zmm17
#define BCAST_10 %zmm18
#define SHUF_10 %zmm19
#define BCAST_11 %zmm20
#define SHUF_11 %zmm21
#define BCAST_12 %zmm22
#define SHUF_12 %zmm23
#define Mp %zmm24
#define Mps %zmm25
#define Mm %zmm26
#define Mms %zmm27
#define N 8
int incr=LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
int lex=s2+LLs*site;
uint64_t a0 = (uint64_t)&Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t)&Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t)&psi[lex];
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
if ( (s2+l)==0 ) {
LOAD64(%r8,a0);
LOAD64(%r9,a1);
LOAD64(%r10,a2);
asm (
VLOAD(0,%r8,Mp)// i r
VLOAD(0,%r9,Mm)
VSHUF(Mp,Mps) // r i
VSHUF(Mm,Mms)
VPREFETCH1(12,%r10) VPREFETCH1(13,%r10)
VPREFETCH1(14,%r10) VPREFETCH1(15,%r10)
VMULIDUP(0*N,%r10,Mps,Chi_00)
VMULIDUP(1*N,%r10,Mps,Chi_01)
VMULIDUP(2*N,%r10,Mps,Chi_02)
VMULIDUP(3*N,%r10,Mps,Chi_10)
VMULIDUP(4*N,%r10,Mps,Chi_11)
VMULIDUP(5*N,%r10,Mps,Chi_12)
VMULIDUP(6*N ,%r10,Mms,Chi_20)
VMULIDUP(7*N ,%r10,Mms,Chi_21)
VMULIDUP(8*N ,%r10,Mms,Chi_22)
VMULIDUP(9*N ,%r10,Mms,Chi_30)
VMULIDUP(10*N,%r10,Mms,Chi_31)
VMULIDUP(11*N,%r10,Mms,Chi_32)
VMADDSUBRDUP(0*N,%r10,Mp,Chi_00)
VMADDSUBRDUP(1*N,%r10,Mp,Chi_01)
VMADDSUBRDUP(2*N,%r10,Mp,Chi_02)
VMADDSUBRDUP(3*N,%r10,Mp,Chi_10)
VMADDSUBRDUP(4*N,%r10,Mp,Chi_11)
VMADDSUBRDUP(5*N,%r10,Mp,Chi_12)
VMADDSUBRDUP(6*N ,%r10,Mm,Chi_20)
VMADDSUBRDUP(7*N ,%r10,Mm,Chi_21)
VMADDSUBRDUP(8*N ,%r10,Mm,Chi_22)
VMADDSUBRDUP(9*N ,%r10,Mm,Chi_30)
VMADDSUBRDUP(10*N,%r10,Mm,Chi_31)
VMADDSUBRDUP(11*N,%r10,Mm,Chi_32)
);
} else {
LOAD64(%r8,a0);
LOAD64(%r9,a1);
LOAD64(%r10,a2);
asm (
VLOAD(0,%r8,Mp)
VSHUF(Mp,Mps)
VLOAD(0,%r9,Mm)
VSHUF(Mm,Mms)
VMADDSUBIDUP(0*N,%r10,Mps,Chi_00) // Mri * Pii +- Cir
VMADDSUBIDUP(1*N,%r10,Mps,Chi_01)
VMADDSUBIDUP(2*N,%r10,Mps,Chi_02)
VMADDSUBIDUP(3*N,%r10,Mps,Chi_10)
VMADDSUBIDUP(4*N,%r10,Mps,Chi_11)
VMADDSUBIDUP(5*N,%r10,Mps,Chi_12)
VMADDSUBIDUP(6 *N,%r10,Mms,Chi_20)
VMADDSUBIDUP(7 *N,%r10,Mms,Chi_21)
VMADDSUBIDUP(8 *N,%r10,Mms,Chi_22)
VMADDSUBIDUP(9 *N,%r10,Mms,Chi_30)
VMADDSUBIDUP(10*N,%r10,Mms,Chi_31)
VMADDSUBIDUP(11*N,%r10,Mms,Chi_32)
VMADDSUBRDUP(0*N,%r10,Mp,Chi_00) // Cir = Mir * Prr +- ( Mri * Pii +- Cir)
VMADDSUBRDUP(1*N,%r10,Mp,Chi_01) // Ci = MiPr + Ci + MrPi ; Cr = MrPr - ( MiPi - Cr)
VMADDSUBRDUP(2*N,%r10,Mp,Chi_02)
VMADDSUBRDUP(3*N,%r10,Mp,Chi_10)
VMADDSUBRDUP(4*N,%r10,Mp,Chi_11)
VMADDSUBRDUP(5*N,%r10,Mp,Chi_12)
VMADDSUBRDUP(6 *N,%r10,Mm,Chi_20)
VMADDSUBRDUP(7 *N,%r10,Mm,Chi_21)
VMADDSUBRDUP(8 *N,%r10,Mm,Chi_22)
VMADDSUBRDUP(9 *N,%r10,Mm,Chi_30)
VMADDSUBRDUP(10*N,%r10,Mm,Chi_31)
VMADDSUBRDUP(11*N,%r10,Mm,Chi_32)
);
}
a0 = a0+incr;
a1 = a1+incr;
a2 = a2+sizeof(typename Simd::scalar_type);
}}
{
int lexa = s1+LLs*site;
/*
SiteSpinor tmp;
asm (
VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02)
VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12)
VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22)
VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32)
: : "r" ((uint64_t)&tmp) : "memory" );
*/
asm (
VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02)
VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12)
VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22)
VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
// if ( 1 || (site==0) ) {
// std::cout<<site << " s1 "<<s1<<"\n\t"<<tmp << "\n't" << chi[lexa] <<"\n\t"<<tmp-chi[lexa]<<std::endl;
// }
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{
int Ls=this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard=psi.checkerboard;
Vector<iSinglet<Simd> > Matp;
Vector<iSinglet<Simd> > Matm;
Vector<iSinglet<Simd> > *_Matp;
Vector<iSinglet<Simd> > *_Matm;
// MooeeInternalCompute(dag,inv,Matp,Matm);
if ( inv && dag ) {
_Matp = &MatpInvDag;
_Matm = &MatmInvDag;
}
if ( inv && (!dag) ) {
_Matp = &MatpInv;
_Matm = &MatmInv;
}
if ( !inv ) {
MooeeInternalCompute(dag,inv,Matp,Matm);
_Matp = &Matp;
_Matm = &Matm;
}
assert(_Matp->size()==Ls*LLs);
MooeeInvCalls++;
MooeeInvTime-=usecond();
if ( switcheroo<Coeff_t>::iscomplex() ) {
parallel_for(auto site=0;site<vol;site++){
MooeeInternalZAsm(psi,chi,LLs,site,*_Matp,*_Matm);
}
} else {
parallel_for(auto site=0;site<vol;site++){
MooeeInternalAsm(psi,chi,LLs,site,*_Matp,*_Matm);
}
}
MooeeInvTime+=usecond();
}
INSTANTIATE_DPERP(DomainWallVec5dImplD);
INSTANTIATE_DPERP(DomainWallVec5dImplF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
INSTANTIATE_DPERP(DomainWallVec5dImplDF);
INSTANTIATE_DPERP(DomainWallVec5dImplFH);
INSTANTIATE_DPERP(ZDomainWallVec5dImplDF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplFH);
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
}}

View File

@ -1,323 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>
namespace Grid {
namespace QCD {
template<class Impl>
void ContinuedFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale)
{
SetCoefficientsZolotarev(1.0/scale,zdata);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
{
// How to check Ls matches??
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
int Ls = this->Ls;
assert(zdata->db==Ls);// Beta has Ls coeffs
R=(1+this->mass)/(1-this->mass);
Beta.resize(Ls);
cc.resize(Ls);
cc_d.resize(Ls);
sqrt_cc.resize(Ls);
for(int i=0; i < Ls ; i++){
Beta[i] = zdata -> beta[i];
cc[i] = 1.0/Beta[i];
cc_d[i]=sqrt(cc[i]);
}
cc_d[Ls-1]=1.0;
for(int i=0; i < Ls-1 ; i++){
sqrt_cc[i]= sqrt(cc[i]*cc[i+1]);
}
sqrt_cc[Ls-2]=sqrt(cc[Ls-2]);
ZoloHiInv =1.0/zolo_hi;
dw_diag = (4.0-this->M5)*ZoloHiInv;
See.resize(Ls);
Aee.resize(Ls);
int sign=1;
for(int s=0;s<Ls;s++){
Aee[s] = sign * Beta[s] * dw_diag;
sign = - sign;
}
Aee[Ls-1] += R;
See[0] = Aee[0];
for(int s=1;s<Ls;s++){
See[s] = Aee[s] - 1.0/See[s-1];
}
for(int s=0;s<Ls;s++){
std::cout<<GridLogMessage <<"s = "<<s<<" Beta "<<Beta[s]<<" Aee "<<Aee[s] <<" See "<<See[s] <<std::endl;
}
}
template<class Impl>
RealD ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
FermionField D(psi._grid);
this->DW(psi,D,DaggerNo);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*ZoloHiInv,D,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
} else if ( s==(Ls-1) ){
RealD R=(1.0+mass)/(1.0-mass);
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,D,sqrt_cc[s-1],psi,s,s-1);
ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,D,sqrt_cc[s],psi,s,s+1);
axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1);
}
sign=-sign;
}
return norm2(chi);
}
template<class Impl>
RealD ContinuedFractionFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
{
// This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
// The rest of matrix is symmetric.
// Can ignore "dag"
return M(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
int Ls = this->Ls;
this->DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,chi,0.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,chi,0.0,chi,s,s);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
this->DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
} else {
this->DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
}
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,chi,0.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,chi,0.0,chi,s,s);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
{
this->Meooe(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*dw_diag,psi,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
} else if ( s==(Ls-1) ){
// Drop the CC here.
double R=(1+mass)/(1-mass);
ag5xpby_ssp(chi,Beta[s]*dw_diag,psi,sqrt_cc[s-1],psi,s,s-1);
ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*dw_diag,psi,sqrt_cc[s],psi,s,s+1);
axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
this->Mooee(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
// Apply Linv
axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0);
for(int s=1;s<Ls;s++){
axpbg5y_ssp(chi,1.0/cc_d[s],psi,-1.0/See[s-1],chi,s,s-1);
}
// Apply Dinv
for(int s=0;s<Ls;s++){
ag5xpby_ssp(chi,1.0/See[s],chi,0.0,chi,s,s); //only appearance of See[0]
}
// Apply Uinv = (Linv)^T
axpby_ssp(chi,1.0/cc_d[Ls-1],chi,0.0,chi,Ls-1,Ls-1);
for(int s=Ls-2;s>=0;s--){
axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1);
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
this->MooeeInv(psi,chi);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDeriv(mat,D,V,DaggerNo);
};
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDerivOE(mat,D,V,DaggerNo);
};
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDerivEO(mat,D,V,DaggerNo);
};
// Constructors
template<class Impl>
ContinuedFractionFermion5D<Impl>::ContinuedFractionFermion5D(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5,p),
mass(_mass)
{
int Ls = this->Ls;
assert((Ls&0x1)==1); // Odd Ls required
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
conformable(solution5d._grid,this->FermionGrid());
conformable(exported4d._grid,this->GaugeGrid());
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
int Ls = this->Ls;
conformable(imported5d._grid,this->FermionGrid());
conformable(input4d._grid ,this->GaugeGrid());
FermionField tmp(this->FermionGrid());
tmp=zero;
InsertSlice(input4d, tmp, Ls-1, Ls-1);
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
this->Dminus(tmp,imported5d);
}
FermOpTemplateInstantiate(ContinuedFractionFermion5D);
}
}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,46 +24,44 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_CONTINUED_FRACTION_H
#define GRID_QCD_CONTINUED_FRACTION_H
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class ContinuedFractionFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class ContinuedFractionFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// virtual void Instantiatable(void)=0;
virtual void Instantiatable(void) =0;
// virtual void Instantiatable(void)=0;
virtual void Instantiatable(void) =0;
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
@ -73,35 +71,34 @@ namespace Grid {
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
// Constructors
ContinuedFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
// Constructors
ContinuedFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
protected:
protected:
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);;
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);;
// Cont frac
RealD dw_diag;
RealD mass;
RealD R;
RealD ZoloHiInv;
std::vector<double> Beta;
std::vector<double> cc;;
std::vector<double> cc_d;;
std::vector<double> sqrt_cc;
std::vector<double> See;
std::vector<double> Aee;
// Cont frac
RealD dw_diag;
RealD mass;
RealD R;
RealD ZoloHiInv;
Vector<double> Beta;
Vector<double> cc;;
Vector<double> cc_d;;
Vector<double> sqrt_cc;
Vector<double> See;
Vector<double> Aee;
};
};
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -1,438 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
template<class Impl>
DomainWallEOFAFermion<Impl>::DomainWallEOFAFermion(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3,
RealD _shift, int _pm, RealD _M5, const ImplParams &p) :
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
_shift, _pm, _M5, 1.0, 0.0, p)
{
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);
assert(zdata->n == this->Ls);
std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl;
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
Approx::zolotarev_free(zdata);
}
/***************************************************************
* Additional EOFA operators only called outside the inverter.
* Since speed is not essential, simple axpby-style
* implementations should be fine.
***************************************************************/
template<class Impl>
void DomainWallEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
{
int Ls = this->Ls;
Din = zero;
if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); }
else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); }
else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
}
// This is just the identity for DWF
template<class Impl>
void DomainWallEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; }
// This is just the identity for DWF
template<class Impl>
void DomainWallEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; }
/*****************************************************************************************************/
template<class Impl>
RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->Meooe5D(psi, Din);
this->DW(Din, chi, DaggerNo);
axpby(chi, 1.0, 1.0, chi, psi);
this->M5D(psi, chi);
return(norm2(chi));
}
template<class Impl>
RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->DW(psi, Din, DaggerYes);
this->MeooeDag5D(Din, chi);
this->M5Ddag(psi, chi);
axpby(chi, 1.0, 1.0, chi, psi);
return(norm2(chi));
}
/********************************************************************
* Performance critical fermion operators called inside the inverter
********************************************************************/
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int pm = this->pm;
RealD shift = this->shift;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
Coeff_t shiftp(0.0), shiftm(0.0);
if(shift != 0.0){
if(pm == 1){ shiftp = shift*(mq3-mq2); }
else{ shiftm = -shift*(mq3-mq2); }
}
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftp;
#if(0)
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl;
for(int i=0; i<diag.size(); ++i){
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
}
for(int i=0; i<upper.size(); ++i){
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
}
for(int i=0; i<lower.size(); ++i){
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
}
#endif
this->M5D(psi, chi, chi, lower, diag, upper);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int pm = this->pm;
RealD shift = this->shift;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
Coeff_t shiftp(0.0), shiftm(0.0);
if(shift != 0.0){
if(pm == 1){ shiftp = shift*(mq3-mq2); }
else{ shiftm = -shift*(mq3-mq2); }
}
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftm;
#if(0)
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5Ddag(FF&,FF&):" << std::endl;
for(int i=0; i<diag.size(); ++i){
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
}
for(int i=0; i<upper.size(); ++i){
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
}
for(int i=0; i<lower.size(); ++i){
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
}
#endif
this->M5Ddag(psi, chi, chi, lower, diag, upper);
}
// half checkerboard operations
template<class Impl>
void DomainWallEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] = this->dm;
lower[0] = this->dp;
this->M5D(psi, psi, chi, lower, diag, upper);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] = this->dp;
lower[0] = this->dm;
this->M5Ddag(psi, psi, chi, lower, diag, upper);
}
/****************************************************************************************/
//Zolo
template<class Impl>
void DomainWallEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c)
{
int Ls = this->Ls;
int pm = this->pm;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
RealD shift = this->shift;
////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form
////////////////////////////////////////////////////////
this->bs.resize(Ls);
this->cs.resize(Ls);
this->aee.resize(Ls);
this->aeo.resize(Ls);
this->bee.resize(Ls);
this->beo.resize(Ls);
this->cee.resize(Ls);
this->ceo.resize(Ls);
for(int i=0; i<Ls; ++i){
this->bee[i] = 4.0 - this->M5 + 1.0;
this->cee[i] = 1.0;
}
for(int i=0; i<Ls; ++i){
this->aee[i] = this->cee[i];
this->bs[i] = this->beo[i] = 1.0;
this->cs[i] = this->ceo[i] = 0.0;
}
//////////////////////////////////////////
// EOFA shift terms
//////////////////////////////////////////
if(pm == 1){
this->dp = mq1*this->cee[0] + shift*(mq3-mq2);
this->dm = mq1*this->cee[Ls-1];
} else if(this->pm == -1) {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1] - shift*(mq3-mq2);
} else {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1];
}
//////////////////////////////////////////
// LDU decomposition of eeoo
//////////////////////////////////////////
this->dee.resize(Ls+1);
this->lee.resize(Ls);
this->leem.resize(Ls);
this->uee.resize(Ls);
this->ueem.resize(Ls);
for(int i=0; i<Ls; ++i){
if(i < Ls-1){
this->lee[i] = -this->cee[i+1]/this->bee[i]; // sub-diag entry on the ith column
this->leem[i] = this->dm/this->bee[i];
for(int j=0; j<i; j++){ this->leem[i] *= this->aee[j]/this->bee[j]; }
this->dee[i] = this->bee[i];
this->uee[i] = -this->aee[i]/this->bee[i]; // up-diag entry on the ith row
this->ueem[i] = this->dp / this->bee[0];
for(int j=1; j<=i; j++){ this->ueem[i] *= this->cee[j]/this->bee[j]; }
} else {
this->lee[i] = 0.0;
this->leem[i] = 0.0;
this->uee[i] = 0.0;
this->ueem[i] = 0.0;
}
}
{
Coeff_t delta_d = 1.0 / this->bee[0];
for(int j=1; j<Ls-1; j++){ delta_d *= this->cee[j] / this->bee[j]; }
this->dee[Ls-1] = this->bee[Ls-1] + this->cee[0] * this->dm * delta_d;
this->dee[Ls] = this->bee[Ls-1] + this->cee[Ls-1] * this->dp * delta_d;
}
int inv = 1;
this->MooeeInternalCompute(0, inv, this->MatpInv, this->MatmInv);
this->MooeeInternalCompute(1, inv, this->MatpInvDag, this->MatmInvDag);
}
// Recompute Cayley-form coefficients for different shift
template<class Impl>
void DomainWallEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
{
this->shift = new_shift;
Approx::zolotarev_data *zdata = Approx::higham(1.0, this->Ls);
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
int Ls = this->Ls;
GridBase* grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0];
if(LLs == Ls){ return; } // Not vectorised in 5th direction
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0; s<Ls; s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->dp;
Pminus(Ls-1,0) = this->dm;
Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXcd PminusMat;
#if(0)
std::cout << GridLogMessage << "Pplus:" << std::endl;
for(int s=0; s<Ls; ++s){
for(int ss=0; ss<Ls; ++ss){
std::cout << Pplus(s,ss) << "\t";
}
std::cout << std::endl;
}
std::cout << GridLogMessage << "Pminus:" << std::endl;
for(int s=0; s<Ls; ++s){
for(int ss=0; ss<Ls; ++ss){
std::cout << Pminus(s,ss) << "\t";
}
std::cout << std::endl;
}
#endif
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
typedef typename SiteHalfSpinor::scalar_type scalar_type;
const int Nsimd = Simd::Nsimd();
Matp.resize(Ls*LLs);
Matm.resize(Ls*LLs);
for(int s2=0; s2<Ls; s2++){
for(int s1=0; s1<LLs; s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type*) &Vp;
scalar_type *sm = (scalar_type*) &Vm;
for(int l=0; l<Nsimd; l++){
if(switcheroo<Coeff_t>::iscomplex()) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
}
}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
}
FermOpTemplateInstantiate(DomainWallEOFAFermion);
GparityFermOpTemplateInstantiate(DomainWallEOFAFermion);
}}

View File

@ -26,90 +26,65 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
#define GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Impl>
class DomainWallEOFAFermion : public AbstractEOFAFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class DomainWallEOFAFermion : public AbstractEOFAFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Modified (0,Ls-1) and (Ls-1,0) elements of Mooee
// for red-black preconditioned Shamir EOFA
Coeff_t dm;
Coeff_t dp;
public:
// Modified (0,Ls-1) and (Ls-1,0) elements of Mooee
// for red-black preconditioned Shamir EOFA
Coeff_t dm;
Coeff_t dp;
virtual void Instantiatable(void) {};
virtual void Instantiatable(void) {};
// EOFA-specific operations
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
virtual void Dtilde (const FermionField& in, FermionField& out);
virtual void DtildeInv (const FermionField& in, FermionField& out);
// EOFA-specific operations
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
virtual void Dtilde (const FermionField& in, FermionField& out);
virtual void DtildeInv (const FermionField& in, FermionField& out);
// override multiply
virtual RealD M (const FermionField& in, FermionField& out);
virtual RealD Mdag (const FermionField& in, FermionField& out);
// override multiply
virtual RealD M (const FermionField& in, FermionField& out);
virtual RealD Mdag (const FermionField& in, FermionField& out);
// half checkerboard operations
virtual void Mooee (const FermionField& in, FermionField& out);
virtual void MooeeDag (const FermionField& in, FermionField& out);
virtual void MooeeInv (const FermionField& in, FermionField& out);
virtual void MooeeInvDag(const FermionField& in, FermionField& out);
// half checkerboard operations
virtual void Mooee (const FermionField& in, FermionField& out);
virtual void MooeeDag (const FermionField& in, FermionField& out);
virtual void MooeeInv (const FermionField& in, FermionField& out);
virtual void MooeeInvDag(const FermionField& in, FermionField& out);
virtual void M5D (const FermionField& psi, FermionField& chi);
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
virtual void M5D (const FermionField& psi, FermionField& chi);
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper);
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper);
void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
virtual void RefreshShiftCoefficients(RealD new_shift);
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
// Constructors
DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
RealD _M5, const ImplParams& p=ImplParams());
void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
protected:
void SetCoefficientsInternal(RealD zolo_hi, Vector<Coeff_t>& gamma, RealD b, RealD c);
};
void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
NAMESPACE_END(Grid);
virtual void RefreshShiftCoefficients(RealD new_shift);
// Constructors
DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
RealD _M5, const ImplParams& p=ImplParams());
protected:
void SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c);
};
}}
#define INSTANTIATE_DPERP_DWF_EOFA(A)\
template void DomainWallEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void DomainWallEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void DomainWallEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
template void DomainWallEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi);
#undef DOMAIN_WALL_EOFA_DPERP_DENSE
#define DOMAIN_WALL_EOFA_DPERP_CACHE
#undef DOMAIN_WALL_EOFA_DPERP_LINALG
#define DOMAIN_WALL_EOFA_DPERP_VEC
#endif

View File

@ -1,248 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards..
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
int Ls = this->Ls;
GridBase* grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0];
if(s==0) {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5m(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
int Ls = this->Ls;
GridBase* grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){
if(s==0) {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5p(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
// Apply (L^{\prime})^{-1}
chi[ss] = psi[ss]; // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls])*tmp1;
}
spProj5m(tmp2, chi[ss+Ls-1]);
chi[ss+Ls-1] = (1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2;
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp1, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
assert(psi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
std::vector<Coeff_t> ueec(Ls);
std::vector<Coeff_t> deec(Ls+1);
std::vector<Coeff_t> leec(Ls);
std::vector<Coeff_t> ueemc(Ls);
std::vector<Coeff_t> leemc(Ls);
for(int s=0; s<ueec.size(); s++){
ueec[s] = conjugate(this->uee[s]);
deec[s] = conjugate(this->dee[s]);
leec[s] = conjugate(this->lee[s]);
ueemc[s] = conjugate(this->ueem[s]);
leemc[s] = conjugate(this->leem[s]);
}
deec[Ls] = conjugate(this->dee[Ls]);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
// Apply (U^{\prime})^{-dagger}
chi[ss] = psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - ueec[s-1]*tmp1;
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - ueemc[s]*tmp1;
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/deec[s])*chi[ss+s] - (leemc[s]/deec[Ls-1])*tmp1;
}
spProj5p(tmp2, chi[ss+Ls-1]);
chi[ss+Ls-1] = (1.0/deec[Ls-1])*tmp1 + (1.0/deec[Ls])*tmp2;
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp1, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - leec[s]*tmp1;
}
}
this->MooeeInvTime += usecond();
}
#ifdef DOMAIN_WALL_EOFA_DPERP_CACHE
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -1,159 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard = psi.checkerboard;
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->dp;
Pminus(Ls-1,0) = this->dm;
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
for(auto site=0; site<vol; site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = zero;
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
}
if(PminusMat(s1,s2) != 0.0){
spProj5m(SitePminus, psi[lex2]);
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
}
#ifdef DOMAIN_WALL_EOFA_DPERP_DENSE
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
template void DomainWallEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
template void DomainWallEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -1,168 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1);
}
axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1);
axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (U^{\prime})^{-dagger}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1);
axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
}
}
#ifdef DOMAIN_WALL_EOFA_DPERP_LINALG
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -1,605 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
assert(Nc == 3);
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5m(hp, psi[ss+vp]);
spProj5p(hm, psi[ss+vm]);
if (vp <= v){ rotate(hp, hp, 1); }
if (vm >= v){ rotate(hm, hm, nsimd-1); }
hp = 0.5*hp;
hm = 0.5*hm;
spRecon5m(fp, hp);
spRecon5p(fm, hm);
chi[ss+v] = d[v]*phi[ss+v];
chi[ss+v] = chi[ss+v] + u[v]*fp;
chi[ss+v] = chi[ss+v] + l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v==LLs-1) ? 0 : v+1;
int vm = (v==0) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if(vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5p(hp, psi[ss+vp]);
spProj5m(hm, psi[ss+vm]);
if(vp <= v){ rotate(hp, hp, 1); }
if(vm >= v){ rotate(hm, hm, nsimd-1); }
hp = hp*0.5;
hm = hm*0.5;
spRecon5p(fp, hp);
spRecon5m(fm, hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if (vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
#ifdef AVX512
#include<simd/Intel512common.h>
#include<simd/Intel512avx.h>
#include<simd/Intel512single.h>
#endif
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
int s = s2 + l*LLs;
int lex = s2 + LLs*site;
if( s2==0 && l==0 ){
SiteChiP=zero;
SiteChiM=zero;
}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
}}
}}
{
int lex = s1 + LLs*site;
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %%zmm1
#define Chi_01 %%zmm2
#define Chi_02 %%zmm3
#define Chi_10 %%zmm4
#define Chi_11 %%zmm5
#define Chi_12 %%zmm6
#define Chi_20 %%zmm7
#define Chi_21 %%zmm8
#define Chi_22 %%zmm9
#define Chi_30 %%zmm10
#define Chi_31 %%zmm11
#define Chi_32 %%zmm12
#define BCAST0 %%zmm13
#define BCAST1 %%zmm14
#define BCAST2 %%zmm15
#define BCAST3 %%zmm16
#define BCAST4 %%zmm17
#define BCAST5 %%zmm18
#define BCAST6 %%zmm19
#define BCAST7 %%zmm20
#define BCAST8 %%zmm21
#define BCAST9 %%zmm22
#define BCAST10 %%zmm23
#define BCAST11 %%zmm24
int incr = LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
int lex = s2 + LLs*site;
uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t) &psi[lex];
for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
if((s2+l)==0) {
asm(
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
VBCASTCDUP(0,%2,BCAST0)
VBCASTCDUP(1,%2,BCAST1)
VBCASTCDUP(2,%2,BCAST2)
VBCASTCDUP(3,%2,BCAST3)
VBCASTCDUP(4,%2,BCAST4) VMULMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(5,%2,BCAST5) VMULMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(6,%2,BCAST6) VMULMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(7,%2,BCAST7) VMULMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(8,%2,BCAST8) VMULMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(9,%2,BCAST9) VMULMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(10,%2,BCAST10) VMULMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(11,%2,BCAST11) VMULMEM(0,%1,BCAST7,Chi_21)
VMULMEM(0,%1,BCAST8,Chi_22)
VMULMEM(0,%1,BCAST9,Chi_30)
VMULMEM(0,%1,BCAST10,Chi_31)
VMULMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
} else {
asm(
VBCASTCDUP(0,%2,BCAST0) VMADDMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(1,%2,BCAST1) VMADDMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(2,%2,BCAST2) VMADDMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(3,%2,BCAST3) VMADDMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(4,%2,BCAST4) VMADDMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(5,%2,BCAST5) VMADDMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(6,%2,BCAST6) VMADDMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(7,%2,BCAST7) VMADDMEM(0,%1,BCAST7,Chi_21)
VBCASTCDUP(8,%2,BCAST8) VMADDMEM(0,%1,BCAST8,Chi_22)
VBCASTCDUP(9,%2,BCAST9) VMADDMEM(0,%1,BCAST9,Chi_30)
VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
}
a0 = a0 + incr;
a1 = a1 + incr;
a2 = a2 + sizeof(typename Simd::scalar_type);
}
}
{
int lexa = s1+LLs*site;
asm (
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
// Z-mobius version
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
exit(-1);
};
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard = psi.checkerboard;
Vector<iSinglet<Simd> > Matp;
Vector<iSinglet<Simd> > Matm;
Vector<iSinglet<Simd> > *_Matp;
Vector<iSinglet<Simd> > *_Matm;
// MooeeInternalCompute(dag,inv,Matp,Matm);
if(inv && dag){
_Matp = &this->MatpInvDag;
_Matm = &this->MatmInvDag;
}
if(inv && (!dag)){
_Matp = &this->MatpInv;
_Matm = &this->MatmInv;
}
if(!inv){
MooeeInternalCompute(dag, inv, Matp, Matm);
_Matp = &Matp;
_Matm = &Matm;
}
assert(_Matp->size() == Ls*LLs);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
if(switcheroo<Coeff_t>::iscomplex()){
parallel_for(auto site=0; site<vol; site++){
MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
} else {
parallel_for(auto site=0; site<vol; site++){
MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
}
this->MooeeInvTime += usecond();
}
#ifdef DOMAIN_WALL_EOFA_DPERP_VEC
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplD);
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplF);
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplDF);
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplFH);
template void DomainWallEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -25,34 +25,33 @@ Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_DOMAIN_WALL_FERMION_H
#define GRID_QCD_DOMAIN_WALL_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class DomainWallFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class DomainWallFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary, std::vector<double> twist, bool fiveD) {
FermionField in_k(in.Grid());
FermionField prop_k(in.Grid());
void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary, std::vector<double> twist, bool fiveD) {
FermionField in_k(in._grid);
FermionField prop_k(in._grid);
FFT theFFT((GridCartesian *) in._grid);
FFT theFFT((GridCartesian *) in.Grid());
//phase for boundary condition
ComplexField coor(in._grid);
ComplexField ph(in._grid); ph = zero;
FermionField in_buf(in._grid); in_buf = zero;
ComplexField coor(in.Grid());
ComplexField ph(in.Grid()); ph = Zero();
FermionField in_buf(in.Grid()); in_buf = Zero();
typedef typename Simd::scalar_type Scalar;
Scalar ci(0.0,1.0);
assert(twist.size() == Nd);//check that twist is Nd
assert(boundary.size() == Nd);//check that boundary conditions is Nd
@ -63,13 +62,12 @@ namespace Grid {
// Shift coordinate lattice index by 1 to account for 5th dimension.
LatticeCoordinate(coor, nu + shift);
double boundary_phase = ::acos(real(boundary[nu]));
ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu+shift])));
ph = ph + boundary_phase*coor*((1./(in.Grid()->_fdimensions[nu+shift])));
//momenta for propagator shifted by twist+boundary
twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI));
}
in_buf = exp(ci*ph*(-1.0))*in;
if(fiveD){//FFT only on temporal and spatial dimensions
std::vector<int> mask(Nd+1,1); mask[0] = 0;
theFFT.FFT_dim_mask(in_k,in_buf,mask,FFT::forward);
@ -82,7 +80,7 @@ namespace Grid {
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
}
//phase for boundary condition
out = out * exp(ci*ph);
out = out * exp(Scalar(2.0*M_PI)*ci*ph);
};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {
@ -105,38 +103,37 @@ namespace Grid {
FreePropagator(in,out,mass,boundary,twist,fiveD);
};
virtual void Instantiatable(void) {};
// Constructors
DomainWallFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void) {};
// Constructors
DomainWallFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
{
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls);
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls);
std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
// Call base setter
this->SetCoefficientsTanh(zdata,1.0,0.0);
Approx::zolotarev_free(zdata);
}
};
// std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
// Call base setter
this->SetCoefficientsTanh(zdata,1.0,0.0);
Approx::zolotarev_free(zdata);
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -0,0 +1,213 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template<class S,class Representation = FundamentalRepresentation, class Options=CoeffReal>
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Representation::Dimension> > {
public:
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
static const bool LsVectorised=true;
static const int Nhcs = Options::Nhcs;
typedef typename Options::_Coeff_t Coeff_t;
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhcs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nd>;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
/////////////////////////////////////////////////
// Make the doubled gauge field a *scalar*
/////////////////////////////////////////////////
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor,ImplParams> StencilImpl;
typedef typename StencilImpl::View_type StencilView;
ImplParams Params;
DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
vsplat(reg, memory);
}
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi, const SiteDoubledGaugeField &U,
const _Spinor &chi, int mu, StencilEntry *SE,
StencilView &St)
{
#ifdef GPU_VEC
// Gauge link is scalarised
mult(&phi(), &U(mu), &chi());
#else
SiteGaugeLink UU;
for (int i = 0; i < Dimension; i++) {
for (int j = 0; j < Dimension; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
}
}
mult(&phi(), &UU(), &chi());
#endif
}
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu)
{
SiteScalarGaugeField ScalarUmu;
SiteDoubledGaugeField ScalarUds;
GaugeLinkField U(Umu.Grid());
GaugeField Uadj(Umu.Grid());
for (int mu = 0; mu < Nd; mu++) {
U = PeekIndex<LorentzIndex>(Umu, mu);
U = adj(Cshift(U, mu, -1));
PokeIndex<LorentzIndex>(Uadj, U, mu);
}
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
Coordinate lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
peekLocalSite(ScalarUmu, Umu, lcoor);
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
peekLocalSite(ScalarUmu, Uadj, lcoor);
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
pokeLocalSite(ScalarUds, Uds, lcoor);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu)
{
assert(0);
}
inline void outerProductImpl(PropagatorField &mat, const FermionField &Btilde, const FermionField &A){
assert(0);
}
inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) {
assert(0);
}
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
assert(0);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
assert(0);
// Following lines to be revised after Peter's addition of half prec
// missing put lane...
/*
typedef decltype(traceIndex<SpinIndex>(outerProduct(Btilde[0], Atilde[0]))) result_type;
unsigned int LLs = Btilde.Grid()->_rdimensions[0];
conformable(Atilde.Grid(),Btilde.Grid());
GridBase* grid = mat.Grid();
GridBase* Bgrid = Btilde.Grid();
unsigned int dimU = grid->Nd();
unsigned int dimF = Bgrid->Nd();
GaugeLinkField tmp(grid);
tmp = Zero();
// FIXME
// Current implementation works, thread safe, probably suboptimal
// Passing through the local coordinate for grid transformation
// the force grid is in general very different from the Ls vectorized grid
for (int so = 0; so < grid->oSites(); so++) {
std::vector<typename result_type::scalar_object> vres(Bgrid->Nsimd());
std::vector<int> ocoor; grid->oCoorFromOindex(ocoor,so);
for (int si = 0; si < tmp.Grid()->iSites(); si++){
typename result_type::scalar_object scalar_object; scalar_object = Zero();
std::vector<int> local_coor;
std::vector<int> icoor; grid->iCoorFromIindex(icoor,si);
grid->InOutCoorToLocalCoor(ocoor, icoor, local_coor);
for (int s = 0; s < LLs; s++) {
std::vector<int> slocal_coor(dimF);
slocal_coor[0] = s;
for (int s4d = 1; s4d< dimF; s4d++) slocal_coor[s4d] = local_coor[s4d-1];
int sF = Bgrid->oIndexReduced(slocal_coor);
assert(sF < Bgrid->oSites());
extract(traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])), vres);
// sum across the 5d dimension
for (auto v : vres) scalar_object += v;
}
tmp[so].putlane(scalar_object, si);
}
}
PokeIndex<LorentzIndex>(mat, tmp, mu);
*/
}
};
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation, CoeffReal> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation, CoeffReal> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation, CoeffReal> DomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation, CoeffRealHalfComms> DomainWallVec5dImplRL; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation, CoeffRealHalfComms> DomainWallVec5dImplFH; // Float
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation, CoeffRealHalfComms> DomainWallVec5dImplDF; // Double
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation,CoeffComplex> ZDomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation,CoeffComplex> ZDomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation,CoeffComplex> ZDomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation,CoeffComplexHalfComms> ZDomainWallVec5dImplRL; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation,CoeffComplexHalfComms> ZDomainWallVec5dImplFH; // Float
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation,CoeffComplexHalfComms> ZDomainWallVec5dImplDF; // Double
NAMESPACE_END(Grid);

View File

@ -23,10 +23,9 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_FERMION_H
#define GRID_QCD_FERMION_H
*************************************************************************************/
/* END LEGAL */
#pragma once
////////////////////////////////////////////////////////////////////////////////////////////////////
// Explicit explicit template instantiation is still required in the .cc files
@ -50,12 +49,17 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
////////////////////////////////////////////
#include <Grid/qcd/action/fermion/WilsonFermion.h> // 4d wilson like
#include <Grid/qcd/action/fermion/WilsonTMFermion.h> // 4d wilson like
NAMESPACE_CHECK(Wilson);
#include <Grid/qcd/action/fermion/WilsonTMFermion.h> // 4d wilson like
NAMESPACE_CHECK(WilsonTM);
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h> // 4d wilson clover fermions
NAMESPACE_CHECK(WilsonClover);
#include <Grid/qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
NAMESPACE_CHECK(Wilson5D);
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion.h>
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
NAMESPACE_CHECK(Staggered);
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> // Cayley types
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
@ -63,7 +67,8 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
#include <Grid/qcd/action/fermion/MobiusFermion.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
#include <Grid/qcd/action/fermion/SchurDiagTwoKappa.h>
NAMESPACE_CHECK(DomainWall);
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
@ -75,6 +80,7 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
NAMESPACE_CHECK(Overlap);
///////////////////////////////////////////////////////////////////////////////
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
///////////////////////////////////////////////////////////////////////////////
@ -84,14 +90,17 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
// Fourier accelerated Pauli Villars inverse support
///////////////////////////////////////////////////////////////////////////////
#include <Grid/qcd/action/fermion/WilsonTMFermion5D.h>
NAMESPACE_CHECK(WilsonTM5);
////////////////////////////////////////////////////////////////////////////////
// Move this group to a DWF specific tools/algorithms subdir?
////////////////////////////////////////////////////////////////////////////////
#include <Grid/qcd/action/fermion/SchurDiagTwoKappa.h>
#include <Grid/qcd/action/fermion/FourierAcceleratedPV.h>
#include <Grid/qcd/action/fermion/PauliVillarsInverters.h>
#include <Grid/qcd/action/fermion/Reconstruct5Dprop.h>
#include <Grid/qcd/action/fermion/MADWF.h>
NAMESPACE_CHECK(DWFutils);
////////////////////////////////////////////////////////////////////////////////////////////////////
// More maintainable to maintain the following typedef list centrally, as more "impl" targets
@ -99,8 +108,7 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
////////////////////////////////////////////////////////////////////////////////////////////////////
// Cayley 5d
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
typedef WilsonFermion<WilsonImplR> WilsonFermionR;
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
@ -186,46 +194,6 @@ typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH;
typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF;
// Ls vectorised
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallFermionVec5dR;
typedef DomainWallFermion<DomainWallVec5dImplF> DomainWallFermionVec5dF;
typedef DomainWallFermion<DomainWallVec5dImplD> DomainWallFermionVec5dD;
typedef DomainWallFermion<DomainWallVec5dImplRL> DomainWallFermionVec5dRL;
typedef DomainWallFermion<DomainWallVec5dImplFH> DomainWallFermionVec5dFH;
typedef DomainWallFermion<DomainWallVec5dImplDF> DomainWallFermionVec5dDF;
typedef DomainWallEOFAFermion<DomainWallVec5dImplR> DomainWallEOFAFermionVec5dR;
typedef DomainWallEOFAFermion<DomainWallVec5dImplF> DomainWallEOFAFermionVec5dF;
typedef DomainWallEOFAFermion<DomainWallVec5dImplD> DomainWallEOFAFermionVec5dD;
typedef DomainWallEOFAFermion<DomainWallVec5dImplRL> DomainWallEOFAFermionVec5dRL;
typedef DomainWallEOFAFermion<DomainWallVec5dImplFH> DomainWallEOFAFermionVec5dFH;
typedef DomainWallEOFAFermion<DomainWallVec5dImplDF> DomainWallEOFAFermionVec5dDF;
typedef MobiusFermion<DomainWallVec5dImplR> MobiusFermionVec5dR;
typedef MobiusFermion<DomainWallVec5dImplF> MobiusFermionVec5dF;
typedef MobiusFermion<DomainWallVec5dImplD> MobiusFermionVec5dD;
typedef MobiusFermion<DomainWallVec5dImplRL> MobiusFermionVec5dRL;
typedef MobiusFermion<DomainWallVec5dImplFH> MobiusFermionVec5dFH;
typedef MobiusFermion<DomainWallVec5dImplDF> MobiusFermionVec5dDF;
typedef MobiusEOFAFermion<DomainWallVec5dImplR> MobiusEOFAFermionVec5dR;
typedef MobiusEOFAFermion<DomainWallVec5dImplF> MobiusEOFAFermionVec5dF;
typedef MobiusEOFAFermion<DomainWallVec5dImplD> MobiusEOFAFermionVec5dD;
typedef MobiusEOFAFermion<DomainWallVec5dImplRL> MobiusEOFAFermionVec5dRL;
typedef MobiusEOFAFermion<DomainWallVec5dImplFH> MobiusEOFAFermionVec5dFH;
typedef MobiusEOFAFermion<DomainWallVec5dImplDF> MobiusEOFAFermionVec5dDF;
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusFermionVec5dR;
typedef ZMobiusFermion<ZDomainWallVec5dImplF> ZMobiusFermionVec5dF;
typedef ZMobiusFermion<ZDomainWallVec5dImplD> ZMobiusFermionVec5dD;
typedef ZMobiusFermion<ZDomainWallVec5dImplRL> ZMobiusFermionVec5dRL;
typedef ZMobiusFermion<ZDomainWallVec5dImplFH> ZMobiusFermionVec5dFH;
typedef ZMobiusFermion<ZDomainWallVec5dImplDF> ZMobiusFermionVec5dDF;
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
@ -318,12 +286,13 @@ typedef ImprovedStaggeredFermion5D<StaggeredImplR> ImprovedStaggeredFermion5DR;
typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF;
typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD;
#ifndef GRID_NVCC
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplR> ImprovedStaggeredFermionVec5dR;
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplF> ImprovedStaggeredFermionVec5dF;
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplD> ImprovedStaggeredFermionVec5dD;
#endif
}}
NAMESPACE_END(Grid);
////////////////////
// Scalar QED actions
@ -332,4 +301,4 @@ typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplD> ImprovedStaggeredFermion
#include <Grid/qcd/action/scalar/Scalar.h>
#include <Grid/qcd/action/gauge/Photon.h>
#endif

View File

@ -36,58 +36,13 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
// Fermion prereqs
////////////////////////////////////////////
#include <Grid/qcd/action/fermion/WilsonCompressor.h> //used by all wilson type fermions
NAMESPACE_CHECK(Compressor);
#include <Grid/qcd/action/fermion/FermionOperatorImpl.h>
NAMESPACE_CHECK(FermionOperatorImpl);
#include <Grid/qcd/action/fermion/FermionOperator.h>
NAMESPACE_CHECK(FermionOperator);
#include <Grid/qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
#include <Grid/qcd/action/fermion/StaggeredKernels.h> //used by all wilson type fermions
#define FermOpStaggeredTemplateInstantiate(A) \
template class A<StaggeredImplF>; \
template class A<StaggeredImplD>;
#define FermOpStaggeredVec5dTemplateInstantiate(A) \
template class A<StaggeredVec5dImplF>; \
template class A<StaggeredVec5dImplD>;
#define FermOp4dVecTemplateInstantiate(A) \
template class A<WilsonImplF>; \
template class A<WilsonImplD>; \
template class A<ZWilsonImplF>; \
template class A<ZWilsonImplD>; \
template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>; \
template class A<WilsonImplFH>; \
template class A<WilsonImplDF>; \
template class A<ZWilsonImplFH>; \
template class A<ZWilsonImplDF>; \
template class A<GparityWilsonImplFH>; \
template class A<GparityWilsonImplDF>;
#define AdjointFermOpTemplateInstantiate(A) \
template class A<WilsonAdjImplF>; \
template class A<WilsonAdjImplD>;
#define TwoIndexFermOpTemplateInstantiate(A) \
template class A<WilsonTwoIndexSymmetricImplF>; \
template class A<WilsonTwoIndexSymmetricImplD>; \
template class A<WilsonTwoIndexAntiSymmetricImplF>; \
template class A<WilsonTwoIndexAntiSymmetricImplD>;
#define FermOp5dVecTemplateInstantiate(A) \
template class A<DomainWallVec5dImplF>; \
template class A<DomainWallVec5dImplD>; \
template class A<ZDomainWallVec5dImplF>; \
template class A<ZDomainWallVec5dImplD>; \
template class A<DomainWallVec5dImplFH>; \
template class A<DomainWallVec5dImplDF>; \
template class A<ZDomainWallVec5dImplFH>; \
template class A<ZDomainWallVec5dImplDF>;
#define FermOpTemplateInstantiate(A) \
FermOp4dVecTemplateInstantiate(A) \
FermOp5dVecTemplateInstantiate(A)
#define GparityFermOpTemplateInstantiate(A)
NAMESPACE_CHECK(Kernels);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -26,86 +26,87 @@ Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_FERMION_OPERATOR_H
#define GRID_QCD_FERMION_OPERATOR_H
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
////////////////////////////////////////////////////////////////
// Allow to select between gauge representation rank bc's, flavours etc.
// and single/double precision.
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
// Allow to select between gauge representation rank bc's, flavours etc.
// and single/double precision.
////////////////////////////////////////////////////////////////
template<class Impl>
class FermionOperator : public CheckerBoardedSparseMatrixBase<typename Impl::FermionField>, public Impl
{
public:
template<class Impl>
class FermionOperator : public CheckerBoardedSparseMatrixBase<typename Impl::FermionField>, public Impl
{
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_IMPL_TYPES(Impl);
FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
virtual ~FermionOperator(void) = default;
FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
virtual ~FermionOperator(void) = default;
virtual FermionField &tmp(void) = 0;
virtual FermionField &tmp(void) = 0;
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
virtual GridBase *FermionGrid(void) =0;
virtual GridBase *FermionRedBlackGrid(void) =0;
virtual GridBase *GaugeGrid(void) =0;
virtual GridBase *GaugeRedBlackGrid(void) =0;
virtual GridBase *FermionGrid(void) =0;
virtual GridBase *FermionRedBlackGrid(void) =0;
virtual GridBase *GaugeGrid(void) =0;
virtual GridBase *GaugeRedBlackGrid(void) =0;
// override multiply
virtual RealD M (const FermionField &in, FermionField &out)=0;
virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
// override multiply
virtual RealD M (const FermionField &in, FermionField &out)=0;
virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out)=0;
virtual void MeooeDag (const FermionField &in, FermionField &out)=0;
virtual void Mooee (const FermionField &in, FermionField &out)=0;
virtual void MooeeDag (const FermionField &in, FermionField &out)=0;
virtual void MooeeInv (const FermionField &in, FermionField &out)=0;
virtual void MooeeInvDag (const FermionField &in, FermionField &out)=0;
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out)=0;
virtual void MeooeDag (const FermionField &in, FermionField &out)=0;
virtual void Mooee (const FermionField &in, FermionField &out)=0;
virtual void MooeeDag (const FermionField &in, FermionField &out)=0;
virtual void MooeeInv (const FermionField &in, FermionField &out)=0;
virtual void MooeeInvDag (const FermionField &in, FermionField &out)=0;
// non-hermitian hopping term; half cb or both
virtual void Dhop (const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D
// non-hermitian hopping term; half cb or both
virtual void Dhop (const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);};
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);};
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);};
virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;}; // Clover can override these
virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;};
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);};
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);};
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);};
virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=Zero();}; // Clover can override these
virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=Zero();};
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {
FFT theFFT((GridCartesian *) in._grid);
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist)
{
FFT theFFT((GridCartesian *) in.Grid());
FermionField in_k(in._grid);
FermionField prop_k(in._grid);
typedef typename Simd::scalar_type Scalar;
FermionField in_k(in.Grid());
FermionField prop_k(in.Grid());
//phase for boundary condition
ComplexField coor(in._grid);
ComplexField ph(in._grid); ph = zero;
FermionField in_buf(in._grid); in_buf = zero;
ComplexField coor(in.Grid());
ComplexField ph(in.Grid()); ph = Zero();
FermionField in_buf(in.Grid()); in_buf = Zero();
Scalar ci(0.0,1.0);
assert(twist.size() == Nd);//check that twist is Nd
assert(boundary.size() == Nd);//check that boundary conditions is Nd
@ -113,7 +114,7 @@ namespace Grid {
{
LatticeCoordinate(coor, nu);
double boundary_phase = ::acos(real(boundary[nu]));
ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu])));
ph = ph + boundary_phase*coor*((1./(in.Grid()->_fdimensions[nu])));
//momenta for propagator shifted by twist+boundary
twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI));
}
@ -124,43 +125,42 @@ namespace Grid {
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
//phase for boundary condition
out = out * exp(ci*ph);
out = out * exp(Scalar(2.0*M_PI)*ci*ph);
};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
std::vector<Complex> boundary;
for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
FreePropagator(in,out,mass,boundary,twist);
std::vector<Complex> boundary;
for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
FreePropagator(in,out,mass,boundary,twist);
};
///////////////////////////////////////////////
// Updates gauge field during HMC
///////////////////////////////////////////////
virtual void ImportGauge(const GaugeField & _U)=0;
///////////////////////////////////////////////
// Updates gauge field during HMC
///////////////////////////////////////////////
virtual void ImportGauge(const GaugeField & _U)=0;
//////////////////////////////////////////////////////////////////////
// Conserved currents, either contract at sink or insert sequentially.
//////////////////////////////////////////////////////////////////////
virtual void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)=0;
virtual void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx)=0;
//////////////////////////////////////////////////////////////////////
// Conserved currents, either contract at sink or insert sequentially.
//////////////////////////////////////////////////////////////////////
virtual void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)=0;
virtual void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx)=0;
// Only reimplemented in Wilson5D
// Default to just a zero correlation function
virtual void ContractJ5q(FermionField &q_in ,ComplexField &J5q) { J5q=zero; };
virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=zero; };
virtual void ContractJ5q(FermionField &q_in ,ComplexField &J5q) { J5q=Zero(); };
virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=Zero(); };
///////////////////////////////////////////////
// Physical field import/export
@ -183,9 +183,7 @@ namespace Grid {
{
exported=solution;
};
};
};
}
}
NAMESPACE_END(Grid);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -28,8 +28,8 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<typename M>
void get_real_const_bc(M& m, RealD& _b, RealD& _c) {
@ -63,8 +63,8 @@ class FourierAcceleratedPV {
: dwfPV(_dwfPV), Umu(_Umu), cg(_cg), group_in_s(_group_in_s)
{
assert( dwfPV.FermionGrid()->_fdimensions[0] % (2*group_in_s) == 0);
grid5D = QCD::SpaceTimeGrid::makeFiveDimGrid(2*group_in_s, (GridCartesian*)Umu._grid);
gridRB5D = QCD::SpaceTimeGrid::makeFiveDimRedBlackGrid(2*group_in_s, (GridCartesian*)Umu._grid);
grid5D = SpaceTimeGrid::makeFiveDimGrid(2*group_in_s, (GridCartesian*)Umu.Grid());
gridRB5D = SpaceTimeGrid::makeFiveDimRedBlackGrid(2*group_in_s, (GridCartesian*)Umu.Grid());
}
void rotatePV(const Vi& _src, Vi& dst, bool forward) const {
@ -72,13 +72,13 @@ class FourierAcceleratedPV {
GridStopWatch gsw1, gsw2;
typedef typename Vi::scalar_type Coeff_t;
int Ls = dst._grid->_fdimensions[0];
int Ls = dst.Grid()->_fdimensions[0];
Vi _tmp(dst._grid);
Vi _tmp(dst.Grid());
double phase = M_PI / (double)Ls;
Coeff_t bzero(0.0,0.0);
FFT theFFT((GridCartesian*)dst._grid);
FFT theFFT((GridCartesian*)dst.Grid());
if (!forward) {
gsw1.Start();
@ -115,7 +115,7 @@ class FourierAcceleratedPV {
std::cout << GridLogMessage << "Fourier-Accelerated Outer Pauli Villars"<<std::endl;
typedef typename Vi::scalar_type Coeff_t;
int Ls = _dst._grid->_fdimensions[0];
int Ls = _dst.Grid()->_fdimensions[0];
GridStopWatch gswT;
gswT.Start();
@ -126,12 +126,12 @@ class FourierAcceleratedPV {
// U(true) Rightinv TMinv U(false) = Minv
Vi _src_diag(_dst._grid);
Vi _src_diag(_dst.Grid());
Vi _src_diag_slice(dwfPV.GaugeGrid());
Vi _dst_diag_slice(dwfPV.GaugeGrid());
Vi _src_diag_slices(grid5D);
Vi _dst_diag_slices(grid5D);
Vi _dst_diag(_dst._grid);
Vi _dst_diag(_dst.Grid());
rotatePV(_src,_src_diag,false);
@ -163,7 +163,7 @@ class FourierAcceleratedPV {
for (int sidx=0;sidx<group_in_s;sidx++) {
int s = sgroup*group_in_s + sidx;
int sprime = Ls-s-1;
// int sprime = Ls-s-1;
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
RealD cosp = ::cos(phase);
@ -196,7 +196,7 @@ class FourierAcceleratedPV {
GridStopWatch gsw;
gsw.Start();
_dst_diag_slices = zero; // zero guess
_dst_diag_slices = Zero(); // zero guess
sol(tm,_src_diag_slices,_dst_diag_slices);
gsw.Stop();
std::cout << GridLogMessage << "Solve[sgroup=" << sgroup << "] completed in " << gsw.Elapsed() << ", " << gswA.Elapsed() << std::endl;
@ -212,7 +212,7 @@ class FourierAcceleratedPV {
// now rotate with inverse of
Coeff_t pA = b + c*cosp;
Coeff_t pB = - Coeff_t(0.0,1.0)*c*sinp;
Coeff_t pB = - Coeff_t(0.0,1.0)*Coeff_t(c*sinp);
Coeff_t pABden = pA*pA - pB*pB;
// (pA + pB * G5) * (pA - pB*G5) = (pA^2 - pB^2)
@ -234,4 +234,5 @@ class FourierAcceleratedPV {
}
};
}}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,321 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal>
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > {
public:
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
static const int Nhcs = Options::Nhcs;
static const bool LsVectorised=false;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Dimension> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
typedef typename Options::_Coeff_t Coeff_t;
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Dimension>, Ns>, Ngp>;
template <typename vtype> using iImplPropagator = iVector<iMatrix<iMatrix<vtype, Dimension>, Ns>, Ngp>;
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Dimension>, Nhs>, Ngp>;
template <typename vtype> using iImplHalfCommSpinor = iVector<iVector<iVector<vtype, Dimension>, Nhcs>, Ngp>;
template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>, Ngp>;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef GparityWilsonImplParams ImplParams;
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor, ImplParams> StencilImpl;
typedef typename StencilImpl::View_type StencilView;
ImplParams Params;
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
// provide the multiply by link that is differentiated between Gparity (with
// flavour index) and non-Gparity
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu)
{
assert(0);
}
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu,
StencilEntry *SE,
StencilView &St)
{
int direction = St._directions[mu];
int distance = St._distances[mu];
int ptype = St._permute_type[mu];
int sl = St._simd_layout[direction];
Coordinate icoor;
#ifdef __CUDA_ARCH__
_Spinor tmp;
const int Nsimd =SiteDoubledGaugeField::Nsimd();
int s = SIMTlane(Nsimd);
St.iCoorFromIindex(icoor,s);
int mmu = mu % Nd;
if ( SE->_around_the_world && St.parameters.twists[mmu] ) {
int permute_lane = (sl==1)
|| ((distance== 1)&&(icoor[direction]==1))
|| ((distance==-1)&&(icoor[direction]==0));
if ( permute_lane ) {
tmp(0) = chi(1);
tmp(1) = chi(0);
} else {
tmp(0) = chi(0);
tmp(1) = chi(1);
}
auto UU0=coalescedRead(U(0)(mu));
auto UU1=coalescedRead(U(1)(mu));
mult(&phi(0),&UU0,&tmp(0));
mult(&phi(1),&UU1,&tmp(1));
} else {
auto UU0=coalescedRead(U(0)(mu));
auto UU1=coalescedRead(U(1)(mu));
mult(&phi(0),&UU0,&chi(0));
mult(&phi(1),&UU1,&chi(1));
}
#else
typedef _Spinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj;
typedef typename SiteHalfSpinor::vector_type vector_type;
vobj vtmp;
sobj stmp;
const int Nsimd =vector_type::Nsimd();
// Fixme X.Y.Z.T hardcode in stencil
int mmu = mu % Nd;
// assert our assumptions
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
assert((sl == 1) || (sl == 2));
if ( SE->_around_the_world && St.parameters.twists[mmu] ) {
if ( sl == 2 ) {
ExtractBuffer<sobj> vals(Nsimd);
extract(chi,vals);
for(int s=0;s<Nsimd;s++){
St.iCoorFromIindex(icoor,s);
assert((icoor[direction]==0)||(icoor[direction]==1));
int permute_lane;
if ( distance == 1) {
permute_lane = icoor[direction]?1:0;
} else {
permute_lane = icoor[direction]?0:1;
}
if ( permute_lane ) {
stmp(0) = vals[s](1);
stmp(1) = vals[s](0);
vals[s] = stmp;
}
}
merge(vtmp,vals);
} else {
vtmp(0) = chi(1);
vtmp(1) = chi(0);
}
mult(&phi(0),&U(0)(mu),&vtmp(0));
mult(&phi(1),&U(1)(mu),&vtmp(1));
} else {
mult(&phi(0),&U(0)(mu),&chi(0));
mult(&phi(1),&U(1)(mu),&chi(1));
}
#endif
}
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
reg = memory;
}
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds.Grid(),GaugeGrid);
conformable(Umu.Grid(),GaugeGrid);
GaugeLinkField Utmp (GaugeGrid);
GaugeLinkField U (GaugeGrid);
GaugeLinkField Uconj(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu);
U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conjugate(U);
// This phase could come from a simple bc 1,1,-1,1 ..
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
if ( Params.twists[mu] ) {
Uconj = where(coor==neglink,-Uconj,Uconj);
}
auto U_v = U.View();
auto Uds_v = Uds.View();
auto Uconj_v = Uconj.View();
auto Utmp_v= Utmp.View();
thread_foreach(ss,U_v,{
Uds_v[ss](0)(mu) = U_v[ss]();
Uds_v[ss](1)(mu) = Uconj_v[ss]();
});
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
Uconj = adj(Cshift(Uconj,mu,-1));
Utmp = U;
if ( Params.twists[mu] ) {
Utmp = where(coor==0,Uconj,Utmp);
}
thread_foreach(ss,Utmp_v,{
Uds_v[ss](0)(mu+4) = Utmp_v[ss]();
});
Utmp = Uconj;
if ( Params.twists[mu] ) {
Utmp = where(coor==0,U,Utmp);
}
thread_foreach(ss,Utmp_v,{
Uds_v[ss](1)(mu+4) = Utmp_v[ss]();
});
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
// DhopDir provides U or Uconj depending on coor/flavour.
GaugeLinkField link(mat.Grid());
// use lorentz for flavour as hack.
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
auto link_v = link.View();
auto tmp_v = tmp.View();
thread_foreach(ss,tmp_v,{
link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1));
});
PokeIndex<LorentzIndex>(mat, link, mu);
return;
}
inline void outerProductImpl(PropagatorField &mat, const FermionField &Btilde, const FermionField &A){
//mat = outerProduct(Btilde, A);
assert(0);
}
inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) {
assert(0);
/*
auto tmp = TraceIndex<SpinIndex>(P);
parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) {
mat[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1));
}
*/
}
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
assert(0);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
int Ls = Btilde.Grid()->_fdimensions[0];
GaugeLinkField tmp(mat.Grid());
tmp = Zero();
auto tmp_v = tmp.View();
auto Atilde_v = Atilde.View();
auto Btilde_v = Btilde.View();
thread_for(ss,tmp.Grid()->oSites(),{
for (int s = 0; s < Ls; s++) {
int sF = s + Ls * ss;
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
}
});
PokeIndex<LorentzIndex>(mat, tmp, mu);
return;
}
};
typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> GparityWilsonImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD; // Double
typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH; // Float
typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF; // Double
NAMESPACE_END(Grid);

View File

@ -25,16 +25,14 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef GRID_QCD_IMPR_STAG_FERMION_H
#define GRID_QCD_IMPR_STAG_FERMION_H
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
class ImprovedStaggeredFermionStatic {
public:
public:
static const std::vector<int> directions;
static const std::vector<int> displacements;
static const int npoint = 16;
@ -42,7 +40,7 @@ class ImprovedStaggeredFermionStatic {
template <class Impl>
class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedStaggeredFermionStatic {
public:
public:
INHERIT_IMPL_TYPES(Impl);
typedef StaggeredKernels<Impl> Kernels;
@ -139,7 +137,7 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
// DoubleStore impl dependent
void ImportGauge (const GaugeField &_Uthin ) { assert(0); }
void ImportGauge (const GaugeField &_Uthin ,const GaugeField &_Ufat);
void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat);
void ImportGaugeSimple(const GaugeField &_UUU ,const GaugeField &_U);
void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U);
DoubledGaugeField &GetU(void) { return Umu ; } ;
@ -151,7 +149,7 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
///////////////////////////////////////////////////////////////
// protected:
public:
public:
// any other parameters of action ???
virtual int isTrivialEE(void) { return 1; };
virtual RealD Mass(void) { return mass; }
@ -188,11 +186,11 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
};
@ -200,6 +198,6 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -1,5 +1,5 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -25,101 +25,99 @@ Author: AzusaYamaguchi <ayamaguc@staffmail.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H
#define GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
////////////////////////////////////////////////////////////////////////////////
class ImprovedStaggeredFermion5DStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static const std::vector<int> directions;
static const std::vector<int> displacements;
const int npoint = 16;
};
class ImprovedStaggeredFermion5DStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static const std::vector<int> directions;
static const std::vector<int> displacements;
const int npoint = 16;
};
template<class Impl>
class ImprovedStaggeredFermion5D : public StaggeredKernels<Impl>, public ImprovedStaggeredFermion5DStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef StaggeredKernels<Impl> Kernels;
template<class Impl>
class ImprovedStaggeredFermion5D : public StaggeredKernels<Impl>, public ImprovedStaggeredFermion5DStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef StaggeredKernels<Impl> Kernels;
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
////////////////////////////////////////
// Performance monitoring
////////////////////////////////////////
void Report(void);
void ZeroCounters(void);
double DhopTotalTime;
double DhopCalls;
double DhopCommTime;
double DhopComputeTime;
////////////////////////////////////////
// Performance monitoring
////////////////////////////////////////
void Report(void);
void ZeroCounters(void);
double DhopTotalTime;
double DhopCalls;
double DhopCommTime;
double DhopComputeTime;
double DhopComputeTime2;
double DhopFaceTime;
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FiveDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FiveDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
// full checkerboard operations; leave unimplemented as abstract for now
RealD M (const FermionField &in, FermionField &out);
RealD Mdag (const FermionField &in, FermionField &out);
// full checkerboard operations; leave unimplemented as abstract for now
RealD M (const FermionField &in, FermionField &out);
RealD Mdag (const FermionField &in, FermionField &out);
// half checkerboard operations
void Meooe (const FermionField &in, FermionField &out);
void Mooee (const FermionField &in, FermionField &out);
void MooeeInv (const FermionField &in, FermionField &out);
// half checkerboard operations
void Meooe (const FermionField &in, FermionField &out);
void Mooee (const FermionField &in, FermionField &out);
void MooeeInv (const FermionField &in, FermionField &out);
void MeooeDag (const FermionField &in, FermionField &out);
void MooeeDag (const FermionField &in, FermionField &out);
void MooeeInvDag (const FermionField &in, FermionField &out);
void MeooeDag (const FermionField &in, FermionField &out);
void MooeeDag (const FermionField &in, FermionField &out);
void MooeeInvDag (const FermionField &in, FermionField &out);
void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
// These can be overridden by fancy 5d chiral action
void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// These can be overridden by fancy 5d chiral action
void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// Implement hopping term non-hermitian hopping term; half cb or both
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
// Implement hopping term non-hermitian hopping term; half cb or both
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
///////////////////////////////////////////////////////////////
// New methods added
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
DoubledGaugeField & UUU,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
///////////////////////////////////////////////////////////////
// New methods added
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
DoubledGaugeField & UUU,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
void DhopInternal(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternal(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalOverlappedComms(StencilImpl & st,
LebesgueOrder &lo,
@ -138,17 +136,17 @@ namespace QCD {
int dag);
// Constructors
// Constructors
////////////////////////////////////////////////////////////////////////////////////////////////
// Grid internal interface -- Thin link and fat link, with coefficients
////////////////////////////////////////////////////////////////////////////////////////////////
ImprovedStaggeredFermion5D(GaugeField &_Uthin,
GaugeField &_Ufat,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _mass,
ImprovedStaggeredFermion5D(GaugeField &_Uthin,
GaugeField &_Ufat,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _mass,
RealD _c1, RealD _c2,RealD _u0,
const ImplParams &p= ImplParams());
////////////////////////////////////////////////////////////////////////////////////////////////
@ -160,11 +158,11 @@ namespace QCD {
GridRedBlackCartesian &FourDimRedBlackGrid,
double _mass,
RealD _c1=1.0, RealD _c2=1.0,RealD _u0=1.0,
const ImplParams &p= ImplParams());
const ImplParams &p= ImplParams());
// DoubleStore gauge field in operator
void ImportGauge (const GaugeField &_Uthin ) { assert(0); }
void ImportGauge (const GaugeField &_Uthin ,const GaugeField &_Ufat);
void ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat);
void ImportGaugeSimple(const GaugeField &_UUU,const GaugeField &_U);
void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U);
// Give a reference; can be used to do an assignment or copy back out after import
@ -173,62 +171,61 @@ namespace QCD {
DoubledGaugeField &GetUUU(void) { return UUUmu; };
void CopyGaugeCheckerboards(void);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
virtual int isTrivialEE(void) { return 1; };
virtual RealD Mass(void) { return mass; }
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
GridBase *_FiveDimGrid;
GridBase *_FiveDimRedBlackGrid;
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
GridBase *_FiveDimGrid;
GridBase *_FiveDimRedBlackGrid;
RealD mass;
RealD c1;
RealD c2;
RealD u0;
int Ls;
RealD mass;
RealD c1;
RealD c2;
RealD u0;
int Ls;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
DoubledGaugeField UUUmu;
DoubledGaugeField UUUmuEven;
DoubledGaugeField UUUmuOdd;
DoubledGaugeField UUUmu;
DoubledGaugeField UUUmuEven;
DoubledGaugeField UUUmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
///////////////////////////////////////////////////////////////
// Conserved current utilities
///////////////////////////////////////////////////////////////
void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
///////////////////////////////////////////////////////////////
// Conserved current utilities
///////////////////////////////////////////////////////////////
void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
};
};
}}
NAMESPACE_END(Grid);
#endif

View File

@ -27,8 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template <class Fieldi, class Fieldo,IfNotSame<Fieldi,Fieldo> X=0>
inline void convert(const Fieldi &from,Fieldo &to)
@ -109,7 +108,7 @@ class MADWF
std::cout << GridLogMessage << " b " <<norm2(b)<<std::endl;
defect = b;
sol5=zero;
sol5=Zero();
for (int i=0;i<maxiter;i++) {
///////////////////////////////////////
@ -122,7 +121,7 @@ class MADWF
////////////////////////////////////////////////
// Solve the inner system with surface term c0
////////////////////////////////////////////////
ci = zero;
ci = Zero();
convert(c0,c0i); // Possible precison change
InsertSlice(c0i,ci,0, 0);
@ -190,4 +189,4 @@ class MADWF
};
}}
NAMESPACE_END(Grid);

View File

@ -1,502 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
template<class Impl>
MobiusEOFAFermion<Impl>::MobiusEOFAFermion(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3,
RealD _shift, int _pm, RealD _M5,
RealD _b, RealD _c, const ImplParams &p) :
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
_shift, _pm, _M5, _b, _c, p)
{
int Ls = this->Ls;
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps, this->Ls);
assert(zdata->n == this->Ls);
std::cout << GridLogMessage << "MobiusEOFAFermion (b=" << _b <<
",c=" << _c << ") with Ls=" << Ls << std::endl;
this->SetCoefficientsTanh(zdata, _b, _c);
std::cout << GridLogMessage << "EOFA parameters: (mq1=" << _mq1 <<
",mq2=" << _mq2 << ",mq3=" << _mq3 << ",shift=" << _shift <<
",pm=" << _pm << ")" << std::endl;
Approx::zolotarev_free(zdata);
if(_shift != 0.0){
SetCoefficientsPrecondShiftOps();
} else {
Mooee_shift.resize(Ls, 0.0);
MooeeInv_shift_lc.resize(Ls, 0.0);
MooeeInv_shift_norm.resize(Ls, 0.0);
MooeeInvDag_shift_lc.resize(Ls, 0.0);
MooeeInvDag_shift_norm.resize(Ls, 0.0);
}
}
/****************************************************************
* Additional EOFA operators only called outside the inverter.
* Since speed is not essential, simple axpby-style
* implementations should be fine.
***************************************************************/
template<class Impl>
void MobiusEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
{
int Ls = this->Ls;
RealD alpha = this->alpha;
Din = zero;
if((sign == 1) && (dag == 0)) { // \Omega_{+}
for(int s=0; s<Ls; ++s){
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,Ls-s-1)/std::pow(1.0+alpha,Ls-s), psi, s, 0);
}
} else if((sign == -1) && (dag == 0)) { // \Omega_{-}
for(int s=0; s<Ls; ++s){
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,s)/std::pow(1.0+alpha,s+1), psi, s, 0);
}
} else if((sign == 1 ) && (dag == 1)) { // \Omega_{+}^{\dagger}
for(int sp=0; sp<Ls; ++sp){
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,Ls-sp-1)/std::pow(1.0+alpha,Ls-sp), psi, 0, sp);
}
} else if((sign == -1) && (dag == 1)) { // \Omega_{-}^{\dagger}
for(int sp=0; sp<Ls; ++sp){
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,sp)/std::pow(1.0+alpha,sp+1), psi, 0, sp);
}
}
}
// This is the operator relating the usual Ddwf to TWQCD's EOFA Dirac operator (arXiv:1706.05843, Eqn. 6).
// It also relates the preconditioned and unpreconditioned systems described in Appendix B.2.
template<class Impl>
void MobiusEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
RealD b = 0.5 * ( 1.0 + this->alpha );
RealD c = 0.5 * ( 1.0 - this->alpha );
RealD mq1 = this->mq1;
for(int s=0; s<Ls; ++s){
if(s == 0) {
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
axpby_ssp_pplus (chi, 1.0, chi, mq1*c, psi, s, Ls-1);
} else if(s == (Ls-1)) {
axpby_ssp_pminus(chi, b, psi, mq1*c, psi, s, 0);
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
} else {
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
RealD m = this->mq1;
RealD c = 0.5 * this->alpha;
RealD d = 0.5;
RealD DtInv_p(0.0), DtInv_m(0.0);
RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls);
FermionField tmp(this->FermionGrid());
for(int s=0; s<Ls; ++s){
for(int sp=0; sp<Ls; ++sp){
DtInv_p = m * std::pow(-1.0,s-sp+1) * std::pow(c-d,Ls+s-sp) / std::pow(c+d,s-sp+1) / N;
DtInv_p += (s < sp) ? 0.0 : std::pow(-1.0,s-sp) * std::pow(c-d,s-sp) / std::pow(c+d,s-sp+1);
DtInv_m = m * std::pow(-1.0,sp-s+1) * std::pow(c-d,Ls+sp-s) / std::pow(c+d,sp-s+1) / N;
DtInv_m += (s > sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1);
if(sp == 0){
axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp);
axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp);
} else {
axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp);
axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp);
}
}}
}
/*****************************************************************************************************/
template<class Impl>
RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->Meooe5D(psi, Din);
this->DW(Din, chi, DaggerNo);
axpby(chi, 1.0, 1.0, chi, psi);
this->M5D(psi, chi);
return(norm2(chi));
}
template<class Impl>
RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->DW(psi, Din, DaggerYes);
this->MeooeDag5D(Din, chi);
this->M5Ddag(psi, chi);
axpby(chi, 1.0, 1.0, chi, psi);
return(norm2(chi));
}
/********************************************************************
* Performance critical fermion operators called inside the inverter
********************************************************************/
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5Ddag_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
// half checkerboard operations
template<class Impl>
void MobiusEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
// coefficients of Mooee
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] *= -this->mq1;
lower[0] *= -this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, psi, chi, lower, diag, upper); }
// fused M + shift operation
else { this->M5D_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
// coefficients of MooeeDag
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
if(s==0) {
upper[s] = -this->cee[s+1];
lower[s] = this->mq1*this->cee[Ls-1];
} else if(s==(Ls-1)) {
upper[s] = this->mq1*this->cee[0];
lower[s] = -this->cee[s-1];
} else {
upper[s] = -this->cee[s+1];
lower[s] = -this->cee[s-1];
}
}
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, psi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5Ddag_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
/****************************************************************************************/
// Computes coefficients for applying Cayley preconditioned shift operators
// (Mooee + \Delta) --> Mooee_shift
// (Mooee + \Delta)^{-1} --> MooeeInv_shift_lc, MooeeInv_shift_norm
// (Mooee + \Delta)^{-dag} --> MooeeInvDag_shift_lc, MooeeInvDag_shift_norm
// For the latter two cases, the operation takes the form
// [ (Mooee + \Delta)^{-1} \psi ]_{i} = Mooee_{ij} \psi_{j} +
// ( MooeeInv_shift_norm )_{i} ( \sum_{j} [ MooeeInv_shift_lc ]_{j} P_{pm} \psi_{j} )
template<class Impl>
void MobiusEOFAFermion<Impl>::SetCoefficientsPrecondShiftOps()
{
int Ls = this->Ls;
int pm = this->pm;
RealD alpha = this->alpha;
RealD k = this->k;
RealD mq1 = this->mq1;
RealD shift = this->shift;
// Initialize
Mooee_shift.resize(Ls);
MooeeInv_shift_lc.resize(Ls);
MooeeInv_shift_norm.resize(Ls);
MooeeInvDag_shift_lc.resize(Ls);
MooeeInvDag_shift_norm.resize(Ls);
// Construct Mooee_shift
int idx(0);
Coeff_t N = ( (pm == 1) ? 1.0 : -1.0 ) * (2.0*shift*k) *
( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
for(int s=0; s<Ls; ++s){
idx = (pm == 1) ? (s) : (Ls-1-s);
Mooee_shift[idx] = N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1);
}
// Tridiagonal solve for MooeeInvDag_shift_lc
{
Coeff_t m(0.0);
std::vector<Coeff_t> d = Mooee_shift;
std::vector<Coeff_t> u(Ls,0.0);
std::vector<Coeff_t> y(Ls,0.0);
std::vector<Coeff_t> q(Ls,0.0);
if(pm == 1){ u[0] = 1.0; }
else{ u[Ls-1] = 1.0; }
// Tridiagonal matrix algorithm + Sherman-Morrison formula
//
// We solve
// ( Mooee' + u \otimes v ) MooeeInvDag_shift_lc = Mooee_shift
// where Mooee' is the tridiagonal part of Mooee_{+}, and
// u = (1,0,...,0) and v = (0,...,0,mq1*cee[0]) are chosen
// so that the outer-product u \otimes v gives the (0,Ls-1)
// entry of Mooee_{+}.
//
// We do this as two solves: Mooee'*y = d and Mooee'*q = u,
// and then construct the solution to the original system
// MooeeInvDag_shift_lc = y - <v,y> / ( 1 + <v,q> ) q
if(pm == 1){
for(int s=1; s<Ls; ++s){
m = -this->cee[s] / this->bee[s-1];
d[s] -= m*d[s-1];
u[s] -= m*u[s-1];
}
}
y[Ls-1] = d[Ls-1] / this->bee[Ls-1];
q[Ls-1] = u[Ls-1] / this->bee[Ls-1];
for(int s=Ls-2; s>=0; --s){
if(pm == 1){
y[s] = d[s] / this->bee[s];
q[s] = u[s] / this->bee[s];
} else {
y[s] = ( d[s] + this->cee[s]*y[s+1] ) / this->bee[s];
q[s] = ( u[s] + this->cee[s]*q[s+1] ) / this->bee[s];
}
}
// Construct MooeeInvDag_shift_lc
for(int s=0; s<Ls; ++s){
if(pm == 1){
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[0]*y[Ls-1] /
(1.0+mq1*this->cee[0]*q[Ls-1]) * q[s];
} else {
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[Ls-1]*y[0] /
(1.0+mq1*this->cee[Ls-1]*q[0]) * q[s];
}
}
// Compute remaining coefficients
N = (pm == 1) ? (1.0 + MooeeInvDag_shift_lc[Ls-1]) : (1.0 + MooeeInvDag_shift_lc[0]);
for(int s=0; s<Ls; ++s){
// MooeeInv_shift_lc
if(pm == 1){ MooeeInv_shift_lc[s] = std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s); }
else{ MooeeInv_shift_lc[s] = std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s); }
// MooeeInv_shift_norm
MooeeInv_shift_norm[s] = -MooeeInvDag_shift_lc[s] /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N;
// MooeeInvDag_shift_norm
if(pm == 1){ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s) /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
else{ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s) /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
}
}
}
// Recompute coefficients for a different value of shift constant
template<class Impl>
void MobiusEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
{
this->shift = new_shift;
if(new_shift != 0.0){
SetCoefficientsPrecondShiftOps();
} else {
int Ls = this->Ls;
Mooee_shift.resize(Ls,0.0);
MooeeInv_shift_lc.resize(Ls,0.0);
MooeeInv_shift_norm.resize(Ls,0.0);
MooeeInvDag_shift_lc.resize(Ls,0.0);
MooeeInvDag_shift_norm.resize(Ls,0.0);
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
int Ls = this->Ls;
GridBase* grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0];
if(LLs == Ls){ return; } // Not vectorised in 5th direction
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0; s<Ls; s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->mq1*this->cee[0];
Pminus(Ls-1,0) = this->mq1*this->cee[Ls-1];
if(this->shift != 0.0){
RealD c = 0.5 * this->alpha;
RealD d = 0.5;
RealD N = this->shift * this->k * ( std::pow(c+d,Ls) + this->mq1*std::pow(c-d,Ls) );
if(this->pm == 1) {
for(int s=0; s<Ls; ++s){
Pplus(s,Ls-1) += N * std::pow(-1.0,s) * std::pow(c-d,s) / std::pow(c+d,Ls+s+1);
}
} else {
for(int s=0; s<Ls; ++s){
Pminus(s,0) += N * std::pow(-1.0,s+1) * std::pow(c-d,Ls-1-s) / std::pow(c+d,2*Ls-s);
}
}
}
Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXcd PminusMat;
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
typedef typename SiteHalfSpinor::scalar_type scalar_type;
const int Nsimd = Simd::Nsimd();
Matp.resize(Ls*LLs);
Matm.resize(Ls*LLs);
for(int s2=0; s2<Ls; s2++){
for(int s1=0; s1<LLs; s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type*) &Vp;
scalar_type *sm = (scalar_type*) &Vm;
for(int l=0; l<Nsimd; l++){
if(switcheroo<Coeff_t>::iscomplex()) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
}
}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
}
FermOpTemplateInstantiate(MobiusEOFAFermion);
GparityFermOpTemplateInstantiate(MobiusEOFAFermion);
}}

View File

@ -26,108 +26,79 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#ifndef GRID_QCD_MOBIUS_EOFA_FERMION_H
#define GRID_QCD_MOBIUS_EOFA_FERMION_H
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Impl>
class MobiusEOFAFermion : public AbstractEOFAFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class MobiusEOFAFermion : public AbstractEOFAFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Shift operator coefficients for red-black preconditioned Mobius EOFA
std::vector<Coeff_t> Mooee_shift;
std::vector<Coeff_t> MooeeInv_shift_lc;
std::vector<Coeff_t> MooeeInv_shift_norm;
std::vector<Coeff_t> MooeeInvDag_shift_lc;
std::vector<Coeff_t> MooeeInvDag_shift_norm;
public:
// Shift operator coefficients for red-black preconditioned Mobius EOFA
Vector<Coeff_t> Mooee_shift;
Vector<Coeff_t> MooeeInv_shift_lc;
Vector<Coeff_t> MooeeInv_shift_norm;
Vector<Coeff_t> MooeeInvDag_shift_lc;
Vector<Coeff_t> MooeeInvDag_shift_norm;
virtual void Instantiatable(void) {};
virtual void Instantiatable(void) {};
// EOFA-specific operations
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
virtual void Dtilde (const FermionField& in, FermionField& out);
virtual void DtildeInv (const FermionField& in, FermionField& out);
// EOFA-specific operations
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
virtual void Dtilde (const FermionField& in, FermionField& out);
virtual void DtildeInv (const FermionField& in, FermionField& out);
// override multiply
virtual RealD M (const FermionField& in, FermionField& out);
virtual RealD Mdag (const FermionField& in, FermionField& out);
// override multiply
virtual RealD M (const FermionField& in, FermionField& out);
virtual RealD Mdag (const FermionField& in, FermionField& out);
// half checkerboard operations
virtual void Mooee (const FermionField& in, FermionField& out);
virtual void MooeeDag (const FermionField& in, FermionField& out);
virtual void MooeeInv (const FermionField& in, FermionField& out);
virtual void MooeeInv_shift (const FermionField& in, FermionField& out);
virtual void MooeeInvDag (const FermionField& in, FermionField& out);
virtual void MooeeInvDag_shift(const FermionField& in, FermionField& out);
// half checkerboard operations
virtual void Mooee (const FermionField& in, FermionField& out);
virtual void MooeeDag (const FermionField& in, FermionField& out);
virtual void MooeeInv (const FermionField& in, FermionField& out);
virtual void MooeeInv_shift (const FermionField& in, FermionField& out);
virtual void MooeeInvDag (const FermionField& in, FermionField& out);
virtual void MooeeInvDag_shift(const FermionField& in, FermionField& out);
virtual void M5D (const FermionField& psi, FermionField& chi);
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
virtual void M5D (const FermionField& psi, FermionField& chi);
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper);
void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs);
void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper,
Vector<Coeff_t>& shift_coeffs);
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper);
void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs);
void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper,
Vector<Coeff_t>& shift_coeffs);
void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
virtual void RefreshShiftCoefficients(RealD new_shift);
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
// Constructors
MobiusEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams());
void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
protected:
void SetCoefficientsPrecondShiftOps(void);
};
void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
virtual void RefreshShiftCoefficients(RealD new_shift);
// Constructors
MobiusEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams());
protected:
void SetCoefficientsPrecondShiftOps(void);
};
}}
#define INSTANTIATE_DPERP_MOBIUS_EOFA(A)\
template void MobiusEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void MobiusEOFAFermion<A>::M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
template void MobiusEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void MobiusEOFAFermion<A>::M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
template void MobiusEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
template void MobiusEOFAFermion<A>::MooeeInv_shift(const FermionField& psi, FermionField& chi); \
template void MobiusEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi); \
template void MobiusEOFAFermion<A>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi);
#undef MOBIUS_EOFA_DPERP_DENSE
#define MOBIUS_EOFA_DPERP_CACHE
#undef MOBIUS_EOFA_DPERP_LINALG
#define MOBIUS_EOFA_DPERP_VEC
NAMESPACE_END(Grid);
#endif

View File

@ -1,429 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
int Ls = this->Ls;
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0];
if(s==0){
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5m(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0];
if(s==0){
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5m(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); }
else{ spProj5m(tmp, psi._odata[ss+shift_s]); }
chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
int Ls = this->Ls;
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){
if(s==0) {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5p(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
chi[ss+Ls-1] = zero;
auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){
if(s==0) {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5p(tmp, psi._odata[ss+0]);
chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+s]); }
else{ spProj5m(tmp, psi._odata[ss+s]); }
chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
{
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
// Apply (L^{\prime})^{-1}
chi[ss] = psi[ss]; // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp;
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp;
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
{
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
auto tmp2_spProj = psi._odata[0];
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
chi[ss] = psi[ss]; // chi[0]=psi[0]
tmp2 = MooeeInv_shift_lc[0]*psi[ss];
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s];
}
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else{ spProj5m(tmp2_spProj, tmp2); }
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1;
}
// chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
// Apply U^{-1} and add shift term
for(int s=Ls-2; s>=0; s--){
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
spProj5m(tmp1, chi[ss+s]);
chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
// Apply (U^{\prime})^{-dag}
chi[ss] = psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp;
}
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp;
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
{
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
auto tmp2_spProj = psi._odata[0];
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
chi[ss] = psi[ss];
tmp2 = MooeeInvDag_shift_lc[0]*psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp1;
tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s];
}
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else{ spProj5m(tmp2_spProj, tmp2); }
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp1;
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
spProj5p(tmp1, chi[ss+Ls-1]);
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj;
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1;
spProj5p(tmp1, chi[ss+s]);
chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj;
}
}
this->MooeeInvTime += usecond();
}
#ifdef MOBIUS_EOFA_DPERP_CACHE
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -1,184 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
int pm = this->pm;
RealD shift = this->shift;
RealD alpha = this->alpha;
RealD k = this->k;
RealD mq1 = this->mq1;
chi.checkerboard = psi.checkerboard;
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = mq1*this->cee[0];
Pminus(Ls-1,0) = mq1*this->cee[Ls-1];
if(shift != 0.0){
Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
for(int s=0; s<Ls; ++s){
if(pm == 1){ Pplus(s,Ls-1) += shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
else{ Pminus(Ls-1-s,Ls-1) -= shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
}
}
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if(inv){
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
for(auto site=0; site<vol; site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = zero;
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
}
if(PminusMat(s1,s2) != 0.0){
spProj5m(SitePminus, psi[lex2]);
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
}
#ifdef MOBIUS_EOFA_DPERP_DENSE
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
template void MobiusEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
template void MobiusEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -1,290 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
}
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0);
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
}
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
// Apply U^{-1} and add shift term
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0]
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0);
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
// Apply L^{-dagger} and add shift
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
}
}
#ifdef MOBIUS_EOFA_DPERP_LINALG
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -1,983 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionvec.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
assert(Nc == 3);
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5m(hp, psi[ss+vp]);
spProj5p(hm, psi[ss+vm]);
if (vp <= v){ rotate(hp, hp, 1); }
if (vm >= v){ rotate(hm, hm, nsimd-1); }
hp = 0.5*hp;
hm = 0.5*hm;
spRecon5m(fp, hp);
spRecon5p(fm, hm);
chi[ss+v] = d[v]*phi[ss+v];
chi[ss+v] = chi[ss+v] + u[v]*fp;
chi[ss+v] = chi[ss+v] + l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if(vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
#if 0
this->M5D(psi, phi, chi, lower, diag, upper);
// FIXME: possible gain from vectorizing shift operation as well?
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
}
#else
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
Vector<iSinglet<Simd>> s(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
scalar_type* s_p = (scalar_type*) &s[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
s_p[ss] = shift_coeffs[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
assert(Nc == 3);
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
int vs = (this->pm == 1) ? LLs-1 : 0;
Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(2)(0) : psi[ss+vs]()(0)(0);
Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(2)(1) : psi[ss+vs]()(0)(1);
Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(2)(2) : psi[ss+vs]()(0)(2);
Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(3)(0) : psi[ss+vs]()(1)(0);
Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(3)(1) : psi[ss+vs]()(1)(1);
Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(3)(2) : psi[ss+vs]()(1)(2);
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if(vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(this->pm == 1 && vs <= v){
hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
if(this->pm == -1 && vs >= v){
hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
}
this->M5Dtime += usecond();
#endif
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5p(hp, psi[ss+vp]);
spProj5m(hm, psi[ss+vm]);
if(vp <= v){ rotate(hp, hp, 1); }
if(vm >= v){ rotate(hm, hm, nsimd-1); }
hp = hp*0.5;
hm = hm*0.5;
spRecon5p(fp, hp);
spRecon5m(fm, hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if (vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
#if 0
this->M5Ddag(psi, phi, chi, lower, diag, upper);
// FIXME: possible gain from vectorizing shift operation as well?
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
}
#else
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
Vector<iSinglet<Simd>> s(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
scalar_type* s_p = (scalar_type*) &s[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
s_p[ss] = shift_coeffs[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
int vs = (this->pm == 1) ? LLs-1 : 0;
Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(0)(0) : psi[ss+vs]()(2)(0);
Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(0)(1) : psi[ss+vs]()(2)(1);
Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(0)(2) : psi[ss+vs]()(2)(2);
Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(1)(0) : psi[ss+vs]()(3)(0);
Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(1)(1) : psi[ss+vs]()(3)(1);
Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(1)(2) : psi[ss+vs]()(3)(2);
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if (vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(this->pm == 1 && vs <= v){
hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
if(this->pm == -1 && vs >= v){
hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
}
Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
}
this->M5Dtime += usecond();
#endif
}
#ifdef AVX512
#include<simd/Intel512common.h>
#include<simd/Intel512avx.h>
#include<simd/Intel512single.h>
#endif
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
int s = s2 + l*LLs;
int lex = s2 + LLs*site;
if( s2==0 && l==0 ){
SiteChiP=zero;
SiteChiM=zero;
}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
}}
}}
{
int lex = s1 + LLs*site;
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %%zmm1
#define Chi_01 %%zmm2
#define Chi_02 %%zmm3
#define Chi_10 %%zmm4
#define Chi_11 %%zmm5
#define Chi_12 %%zmm6
#define Chi_20 %%zmm7
#define Chi_21 %%zmm8
#define Chi_22 %%zmm9
#define Chi_30 %%zmm10
#define Chi_31 %%zmm11
#define Chi_32 %%zmm12
#define BCAST0 %%zmm13
#define BCAST1 %%zmm14
#define BCAST2 %%zmm15
#define BCAST3 %%zmm16
#define BCAST4 %%zmm17
#define BCAST5 %%zmm18
#define BCAST6 %%zmm19
#define BCAST7 %%zmm20
#define BCAST8 %%zmm21
#define BCAST9 %%zmm22
#define BCAST10 %%zmm23
#define BCAST11 %%zmm24
int incr = LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
int lex = s2 + LLs*site;
uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t) &psi[lex];
for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
if((s2+l)==0) {
asm(
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
VBCASTCDUP(0,%2,BCAST0)
VBCASTCDUP(1,%2,BCAST1)
VBCASTCDUP(2,%2,BCAST2)
VBCASTCDUP(3,%2,BCAST3)
VBCASTCDUP(4,%2,BCAST4) VMULMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(5,%2,BCAST5) VMULMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(6,%2,BCAST6) VMULMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(7,%2,BCAST7) VMULMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(8,%2,BCAST8) VMULMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(9,%2,BCAST9) VMULMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(10,%2,BCAST10) VMULMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(11,%2,BCAST11) VMULMEM(0,%1,BCAST7,Chi_21)
VMULMEM(0,%1,BCAST8,Chi_22)
VMULMEM(0,%1,BCAST9,Chi_30)
VMULMEM(0,%1,BCAST10,Chi_31)
VMULMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
} else {
asm(
VBCASTCDUP(0,%2,BCAST0) VMADDMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(1,%2,BCAST1) VMADDMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(2,%2,BCAST2) VMADDMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(3,%2,BCAST3) VMADDMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(4,%2,BCAST4) VMADDMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(5,%2,BCAST5) VMADDMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(6,%2,BCAST6) VMADDMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(7,%2,BCAST7) VMADDMEM(0,%1,BCAST7,Chi_21)
VBCASTCDUP(8,%2,BCAST8) VMADDMEM(0,%1,BCAST8,Chi_22)
VBCASTCDUP(9,%2,BCAST9) VMADDMEM(0,%1,BCAST9,Chi_30)
VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
}
a0 = a0 + incr;
a1 = a1 + incr;
a2 = a2 + sizeof(typename Simd::scalar_type);
}
}
{
int lexa = s1+LLs*site;
asm (
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
// Z-mobius version
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
exit(-1);
};
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard = psi.checkerboard;
Vector<iSinglet<Simd>> Matp;
Vector<iSinglet<Simd>> Matm;
Vector<iSinglet<Simd>>* _Matp;
Vector<iSinglet<Simd>>* _Matm;
// MooeeInternalCompute(dag,inv,Matp,Matm);
if(inv && dag){
_Matp = &this->MatpInvDag;
_Matm = &this->MatmInvDag;
}
if(inv && (!dag)){
_Matp = &this->MatpInv;
_Matm = &this->MatmInv;
}
if(!inv){
MooeeInternalCompute(dag, inv, Matp, Matm);
_Matp = &Matp;
_Matm = &Matm;
}
assert(_Matp->size() == Ls*LLs);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
if(switcheroo<Coeff_t>::iscomplex()){
parallel_for(auto site=0; site<vol; site++){
MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
} else {
parallel_for(auto site=0; site<vol; site++){
MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
}
this->MooeeInvTime += usecond();
}
#ifdef MOBIUS_EOFA_DPERP_VEC
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplFH);
template void MobiusEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,57 +24,54 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_MOBIUS_FERMION_H
#define GRID_QCD_MOBIUS_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class MobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class MobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
MobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD b, RealD c,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void) {};
// Constructors
MobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD b, RealD c,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
{
RealD eps = 1.0;
std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Tanh approx"<<std::endl;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls);
std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Tanh approx"<<std::endl;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls);
// Call base setter
this->SetCoefficientsTanh(zdata,b,c);
// Call base setter
this->SetCoefficientsTanh(zdata,b,c);
Approx::zolotarev_free(zdata);
Approx::zolotarev_free(zdata);
}
};
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,58 +24,55 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
#define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class MobiusZolotarevFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class MobiusZolotarevFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
MobiusZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD b, RealD c,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void) {};
// Constructors
MobiusZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD b, RealD c,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = lo/hi;
{
RealD eps = lo/hi;
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,this->Ls,0);
assert(zdata->n==this->Ls);
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,this->Ls,0);
assert(zdata->n==this->Ls);
std::cout<<GridLogMessage << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
std::cout<<GridLogMessage << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
// Call base setter
this->SetCoefficientsZolotarev(hi,zdata,b,c);
// Call base setter
this->SetCoefficientsZolotarev(hi,zdata,b,c);
Approx::zolotarev_free(zdata);
}
};
Approx::zolotarev_free(zdata);
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,46 +24,44 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonCayleyTanhFermion : public MobiusFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class OverlapWilsonCayleyTanhFermion : public MobiusFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
this->MomentumSpacePropagatorHw(out,in,_m,twist);
};
};
// Constructors
OverlapWilsonCayleyTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
// Constructors
OverlapWilsonCayleyTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
// b+c=scale, b-c = 0 <=> b =c = scale/2
MobiusFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale,p)
{
}
};
// b+c=scale, b-c = 0 <=> b =c = scale/2
MobiusFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale,p)
{
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,45 +24,42 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
#define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Constructors
// Constructors
OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
// b+c=1.0, b-c = 0 <=> b =c = 1/2
MobiusZolotarevFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi,p)
OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
// b+c=1.0, b-c = 0 <=> b =c = 1/2
MobiusZolotarevFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi,p)
{}
{}
};
};
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,48 +24,47 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
#define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonContFracTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonContFracTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
// b+c=scale, b-c = 0 <=> b =c = scale/2
ContinuedFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls-1;// Even rational order
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
this->SetCoefficientsTanh(zdata,scale);
Approx::zolotarev_free(zdata);
}
};
// b+c=scale, b-c = 0 <=> b =c = scale/2
ContinuedFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls-1;// Even rational order
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
this->SetCoefficientsTanh(zdata,scale);
Approx::zolotarev_free(zdata);
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,51 +24,49 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
#define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
// b+c=scale, b-c = 0 <=> b =c = scale/2
ContinuedFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
// b+c=scale, b-c = 0 <=> b =c = scale/2
ContinuedFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls;// Odd rational order
RealD eps = lo/hi;
int nrational=this->Ls;// Odd rational order
RealD eps = lo/hi;
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
this->SetCoefficientsZolotarev(hi,zdata);
Approx::zolotarev_free(zdata);
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
this->SetCoefficientsZolotarev(hi,zdata);
Approx::zolotarev_free(zdata);
}
};
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,48 +24,46 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
#define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale,const ImplParams &p= ImplParams()) :
// b+c=scale, b-c = 0 <=> b =c = scale/2
PartialFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls-1;// Even rational order
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
this->SetCoefficientsTanh(zdata,scale);
Approx::zolotarev_free(zdata);
}
};
// b+c=scale, b-c = 0 <=> b =c = scale/2
PartialFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls-1;// Even rational order
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
this->SetCoefficientsTanh(zdata,scale);
Approx::zolotarev_free(zdata);
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,51 +24,50 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
#define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
// b+c=scale, b-c = 0 <=> b =c = scale/2
PartialFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
// b+c=scale, b-c = 0 <=> b =c = scale/2
PartialFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls;// Odd rational order
RealD eps = lo/hi;
int nrational=this->Ls;// Odd rational order
RealD eps = lo/hi;
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
this->SetCoefficientsZolotarev(hi,zdata);
Approx::zolotarev_free(zdata);
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
this->SetCoefficientsZolotarev(hi,zdata);
Approx::zolotarev_free(zdata);
}
};
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,459 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h>
namespace Grid {
namespace QCD {
template<class Impl>
void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
// this does both dag and undag but is trivial; make a common helper routing
int sign = 1;
int Ls = this->Ls;
this->DhopDir(psi,chi,dir,disp);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(chi,-scale,chi,0.0,chi,s,s);
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
}
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
{
int Ls = this->Ls;
int sign = dag ? (-1) : 1;
if ( psi.checkerboard == Odd ) {
this->DhopEO(psi,chi,DaggerNo);
} else {
this->DhopOE(psi,chi,DaggerNo);
}
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(chi,-scale,chi,0.0,chi,s,s);
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
}
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Mooee_internal(const FermionField &psi, FermionField &chi,int dag)
{
// again dag and undag are trivially related
int sign = dag ? (-1) : 1;
int Ls = this->Ls;
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
// Do each 2x2 block aligned at s and multiplies Dw site diagonal by G5 so Hw
ag5xpby_ssp(chi,-dw_diag*scale,psi,amax*sqrt(qq)*scale,psi, s ,s+1);
ag5xpby_ssp(chi, dw_diag*scale,psi,amax*sqrt(qq)*scale,psi, s+1,s);
axpby_ssp (chi, 1.0, chi,sqrt(amax*pp)*scale*sign,psi,s+1,Ls-1);
}
{
RealD R=(1+mass)/(1-mass);
//R g5 psi[Ls-1] + p[0] H
ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale*dw_diag/amax,psi,Ls-1,Ls-1);
for(int b=0;b<nblock;b++){
int s = 2*b+1;
RealD pp = p[nblock-1-b];
axpby_ssp(chi,1.0,chi,-sqrt(amax*pp)*scale*sign,psi,Ls-1,s);
}
}
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInv_internal(const FermionField &psi, FermionField &chi,int dag)
{
int sign = dag ? (-1) : 1;
int Ls = this->Ls;
FermionField tmp(psi._grid);
///////////////////////////////////////////////////////////////////////////////////////
//Linv
///////////////////////////////////////////////////////////////////////////////////////
int nblock=(Ls-1)/2;
axpy(chi,0.0,psi,psi); // Identity piece
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=sign*sqrt(amax*amax*amax*pp*qq) / ( dw_diag*dw_diag + amax*amax* qq);
RealD coeff2=sign*sqrt(amax*pp)*dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
axpby_ssp (chi,1.0,chi,coeff1,psi,Ls-1,s);
axpbg5y_ssp(chi,1.0,chi,coeff2,psi,Ls-1,s+1);
}
///////////////////////////////////////////////////////////////////////////////////////
//Dinv (note D isn't really diagonal -- just diagonal enough that we can still invert)
// Compute Seeinv (coeff of gamma5)
///////////////////////////////////////////////////////////////////////////////////////
RealD R=(1+mass)/(1-mass);
RealD Seeinv = R + p[nblock]*dw_diag/amax;
for(int b=0;b<nblock;b++){
Seeinv += p[nblock-1-b]*dw_diag/amax / ( dw_diag*dw_diag/amax/amax + q[nblock-1-b]);
}
Seeinv = 1.0/Seeinv;
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
RealD coeff2=amax*sqrt(qq) / ( dw_diag*dw_diag + amax*amax* qq);
ag5xpby_ssp (tmp,-coeff1,chi,coeff2,chi,s,s+1);
ag5xpby_ssp (tmp, coeff1,chi,coeff2,chi,s+1,s);
}
ag5xpby_ssp (tmp, Seeinv,chi,0.0,chi,Ls-1,Ls-1);
///////////////////////////////////////////////////////////////////////////////////////
// Uinv
///////////////////////////////////////////////////////////////////////////////////////
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=-sign*sqrt(amax*amax*amax*pp*qq) / ( dw_diag*dw_diag + amax*amax* qq);
RealD coeff2=-sign*sqrt(amax*pp)*dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
axpby_ssp (chi,1.0/scale,tmp,coeff1/scale,tmp,s,Ls-1);
axpbg5y_ssp(chi,1.0/scale,tmp,coeff2/scale,tmp,s+1,Ls-1);
}
axpby_ssp (chi, 1.0/scale,tmp,0.0,tmp,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::M_internal(const FermionField &psi, FermionField &chi,int dag)
{
FermionField D(psi._grid);
int Ls = this->Ls;
int sign = dag ? (-1) : 1;
// For partial frac Hw case (b5=c5=1) chroma quirkily computes
//
// Conventions for partfrac appear to be a mess.
// Tony's Nara lectures have
//
// BlockDiag( H/p_i 1 | 1 )
// ( 1 p_i H / q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R +p0 H )
//
//Chroma ( -2H 2sqrt(q_i) | 0 )
// (2 sqrt(q_i) 2H | 2 sqrt(p_i) )
// ---------------------------------
// ( 0 -2 sqrt(p_i) | 2 R gamma_5 + p0 2H
//
// Edwards/Joo/Kennedy/Wenger
//
// Here, the "beta's" selected by chroma to scale the unphysical bulk constraint fields
// incorporate the approx scale factor. This is obtained by propagating the
// scale on "H" out to the off diagonal elements as follows:
//
// BlockDiag( H/p_i 1 | 1 )
// ( 1 p_i H / q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R + p_0 H )
//
// becomes:
// BlockDiag( H/ sp_i 1 | 1 )
// ( 1 sp_i H / s^2q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R + p_0/s H )
//
//
// This is implemented in Chroma by
// p0' = p0/approxMax
// p_i' = p_i*approxMax
// q_i' = q_i*approxMax*approxMax
//
// After the equivalence transform is applied the matrix becomes
//
//Chroma ( -2H sqrt(q'_i) | 0 )
// (sqrt(q'_i) 2H | sqrt(p'_i) )
// ---------------------------------
// ( 0 -sqrt(p'_i) | 2 R gamma_5 + p'0 2H
//
// = ( -2H sqrt(q_i)amax | 0 )
// (sqrt(q_i)amax 2H | sqrt(p_i*amax) )
// ---------------------------------
// ( 0 -sqrt(p_i)*amax | 2 R gamma_5 + p0/amax 2H
//
this->DW(psi,D,DaggerNo);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
double pp = p[nblock-1-b];
double qq = q[nblock-1-b];
// Do each 2x2 block aligned at s and
ag5xpby_ssp(chi,-1.0*scale,D,amax*sqrt(qq)*scale,psi, s ,s+1); // Multiplies Dw by G5 so Hw
ag5xpby_ssp(chi, 1.0*scale,D,amax*sqrt(qq)*scale,psi, s+1,s);
// Pick up last column
axpby_ssp (chi, 1.0, chi,sqrt(amax*pp)*scale*sign,psi,s+1,Ls-1);
}
{
double R=(1+this->mass)/(1-this->mass);
//R g5 psi[Ls] + p[0] H
ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale/amax,D,Ls-1,Ls-1);
for(int b=0;b<nblock;b++){
int s = 2*b+1;
double pp = p[nblock-1-b];
axpby_ssp(chi,1.0,chi,-sqrt(amax*pp)*scale*sign,psi,Ls-1,s);
}
}
}
template<class Impl>
RealD PartialFractionFermion5D<Impl>::M (const FermionField &in, FermionField &out)
{
M_internal(in,out,DaggerNo);
return norm2(out);
}
template<class Impl>
RealD PartialFractionFermion5D<Impl>::Mdag (const FermionField &in, FermionField &out)
{
M_internal(in,out,DaggerYes);
return norm2(out);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe (const FermionField &in, FermionField &out)
{
Meooe_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MeooeDag (const FermionField &in, FermionField &out)
{
Meooe_internal(in,out,DaggerYes);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Mooee (const FermionField &in, FermionField &out)
{
Mooee_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeDag (const FermionField &in, FermionField &out)
{
Mooee_internal(in,out,DaggerYes);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInv (const FermionField &in, FermionField &out)
{
MooeeInv_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInvDag (const FermionField &in, FermionField &out)
{
MooeeInv_internal(in,out,DaggerYes);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void PartialFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDeriv(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDerivOE(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDerivEO(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){
SetCoefficientsZolotarev(1.0/scale,zdata);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){
// check on degree matching
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
int Ls = this->Ls;
assert(Ls == (2*zdata->da -1) );
// Part frac
// RealD R;
R=(1+mass)/(1-mass);
dw_diag = (4.0-this->M5);
// std::vector<RealD> p;
// std::vector<RealD> q;
p.resize(zdata->da);
q.resize(zdata->dd);
for(int n=0;n<zdata->da;n++){
p[n] = zdata -> alpha[n];
}
for(int n=0;n<zdata->dd;n++){
q[n] = -zdata -> ap[n];
}
scale= part_frac_chroma_convention ? 2.0 : 1.0; // Chroma conventions annoy me
amax=zolo_hi;
}
template<class Impl>
void PartialFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
conformable(solution5d._grid,this->FermionGrid());
conformable(exported4d._grid,this->GaugeGrid());
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
int Ls = this->Ls;
conformable(imported5d._grid,this->FermionGrid());
conformable(input4d._grid ,this->GaugeGrid());
FermionField tmp(this->FermionGrid());
tmp=zero;
InsertSlice(input4d, tmp, Ls-1, Ls-1);
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
this->Dminus(tmp,imported5d);
}
// Constructors
template<class Impl>
PartialFractionFermion5D<Impl>::PartialFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,
const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5,p),
mass(_mass)
{
int Ls = this->Ls;
assert((Ls&0x1)==1); // Odd Ls required
int nrational=Ls-1;
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);
// NB: chroma uses a cast to "float" for the zolotarev range(!?).
// this creates a real difference in the operator which I do not like but we can replicate here
// to demonstrate compatibility
// RealD eps = (zolo_lo / zolo_hi);
// zdata = bfm_zolotarev(eps,nrational,0);
SetCoefficientsTanh(zdata,1.0);
Approx::zolotarev_free(zdata);
}
FermOpTemplateInstantiate(PartialFractionFermion5D);
}
}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,51 +24,49 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_PARTIAL_FRACTION_H
#define GRID_QCD_PARTIAL_FRACTION_H
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class PartialFractionFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class PartialFractionFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
const int part_frac_chroma_convention=1;
const int part_frac_chroma_convention=1;
void Meooe_internal(const FermionField &in, FermionField &out,int dag);
void Mooee_internal(const FermionField &in, FermionField &out,int dag);
void MooeeInv_internal(const FermionField &in, FermionField &out,int dag);
void M_internal(const FermionField &in, FermionField &out,int dag);
void Meooe_internal(const FermionField &in, FermionField &out,int dag);
void Mooee_internal(const FermionField &in, FermionField &out,int dag);
void MooeeInv_internal(const FermionField &in, FermionField &out,int dag);
void M_internal(const FermionField &in, FermionField &out,int dag);
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
// override multiply
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void Instantiatable(void) =0; // ensure no make-eee
virtual void Instantiatable(void) =0; // ensure no make-eee
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
@ -76,32 +74,30 @@ namespace Grid {
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
// Constructors
PartialFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
// Constructors
PartialFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
protected:
protected:
virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
virtual void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);
virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
virtual void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);
// Part frac
RealD mass;
RealD dw_diag;
RealD R;
RealD amax;
RealD scale;
std::vector<double> p;
std::vector<double> q;
// Part frac
RealD mass;
RealD dw_diag;
RealD R;
RealD amax;
RealD scale;
Vector<double> p;
Vector<double> q;
};
};
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -27,8 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Field>
class PauliVillarsSolverUnprec
@ -90,6 +89,4 @@ class PauliVillarsSolverFourierAccel
};
};
}
}
NAMESPACE_END(Grid);

View File

@ -27,8 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Field,class PVinverter> class Reconstruct5DfromPhysical {
private:
@ -131,5 +130,5 @@ template<class Field,class PVinverter> class Reconstruct5DfromPhysical {
}
};
}
}
NAMESPACE_END(Grid);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,46 +24,43 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H
#define GRID_QCD_SCALED_SHAMIR_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class ScaledShamirFermion : public MobiusFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class ScaledShamirFermion : public MobiusFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
// Constructors
ScaledShamirFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
// RealD scale):
RealD scale,const ImplParams &p= ImplParams()) :
// Constructors
ScaledShamirFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
// RealD scale):
RealD scale,const ImplParams &p= ImplParams()) :
// b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1
MobiusFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0),p)
// FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0))
{
}
};
// b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1
MobiusFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0),p)
// FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0))
{
}
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,40 +24,40 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef _SCHUR_DIAG_TWO_KAPPA_H
#define _SCHUR_DIAG_TWO_KAPPA_H
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
NAMESPACE_BEGIN(Grid);
// This is specific to (Z)mobius fermions
template<class Matrix, class Field>
class KappaSimilarityTransform {
public:
INHERIT_IMPL_TYPES(Matrix);
std::vector<Coeff_t> kappa, kappaDag, kappaInv, kappaInvDag;
// This is specific to (Z)mobius fermions
template<class Matrix, class Field>
class KappaSimilarityTransform {
public:
INHERIT_IMPL_TYPES(Matrix);
Vector<Coeff_t> kappa, kappaDag, kappaInv, kappaInvDag;
KappaSimilarityTransform (Matrix &zmob) {
for (int i=0;i<(int)zmob.bs.size();i++) {
Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) );
kappa.push_back( k );
kappaDag.push_back( conj(k) );
kappaInv.push_back( 1.0 / k );
kappaInvDag.push_back( 1.0 / conj(k) );
}
KappaSimilarityTransform (Matrix &zmob) {
for (int i=0;i<(int)zmob.bs.size();i++) {
Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) );
kappa.push_back( k );
kappaDag.push_back( conj(k) );
kappaInv.push_back( 1.0 / k );
kappaInvDag.push_back( 1.0 / conj(k) );
}
}
template<typename vobj>
void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) {
GridBase *grid=out._grid;
out.checkerboard = in.checkerboard;
void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) {
GridBase *grid=out.Grid();
out.Checkerboard() = in.Checkerboard();
assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now
int Ls = grid->_rdimensions[0];
parallel_for(int ss=0;ss<grid->oSites();ss++){
vobj tmp = s[ss % Ls]*in._odata[ss];
vstream(out._odata[ss],tmp);
}
thread_for(ss, grid->oSites(),
{
vobj tmp = s[ss % Ls]*in[ss];
vstream(out[ss],tmp);
});
}
RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) {
@ -70,33 +70,33 @@ namespace Grid {
virtual RealD MInv (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);}
virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);}
};
};
template<class Matrix,class Field>
class SchurDiagTwoKappaOperator : public SchurOperatorBase<Field> {
public:
KappaSimilarityTransform<Matrix, Field> _S;
SchurDiagTwoOperator<Matrix, Field> _Mat;
template<class Matrix,class Field>
class SchurDiagTwoKappaOperator : public SchurOperatorBase<Field> {
public:
KappaSimilarityTransform<Matrix, Field> _S;
SchurDiagTwoOperator<Matrix, Field> _Mat;
SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {};
SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {};
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid);
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in.Grid());
_S.MInv(in,out);
_Mat.Mpc(out,tmp);
return _S.M(tmp,out);
_S.MInv(in,out);
_Mat.Mpc(out,tmp);
return _S.M(tmp,out);
}
virtual RealD MpcDag (const Field &in, Field &out){
Field tmp(in._grid);
}
virtual RealD MpcDag (const Field &in, Field &out){
Field tmp(in.Grid());
_S.MDag(in,out);
_Mat.MpcDag(out,tmp);
return _S.MInvDag(tmp,out);
}
};
_S.MDag(in,out);
_Mat.MpcDag(out,tmp);
return _S.MInvDag(tmp,out);
}
};
NAMESPACE_END(Grid);
}
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,46 +24,43 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
#define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class ShamirZolotarevFermion : public MobiusZolotarevFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
template<class Impl>
class ShamirZolotarevFermion : public MobiusZolotarevFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
// Constructors
// Constructors
ShamirZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
ShamirZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
// b+c = 1; b-c = 1 => b=1, c=0
MobiusZolotarevFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi,p)
// b+c = 1; b-c = 1 => b=1, c=0
MobiusZolotarevFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi,p)
{}
{}
};
};
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -0,0 +1,175 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template <class S, class Representation = FundamentalRepresentation >
class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > >
{
public:
typedef RealD _Coeff_t ;
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
static const bool LsVectorised=false;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
template <typename vtype> using iImplPropagator = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef iImplPropagator<Simd> SitePropagator;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef Lattice<SitePropagator> PropagatorField;
typedef StaggeredImplParams ImplParams;
typedef SimpleCompressor<SiteSpinor> Compressor;
typedef CartesianStencil<SiteSpinor, SiteSpinor, ImplParams> StencilImpl;
typedef typename StencilImpl::View_type StencilView;
ImplParams Params;
StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){};
static accelerator_inline void multLink(SiteSpinor &phi,
const SiteDoubledGaugeField &U,
const SiteSpinor &chi,
int mu)
{
mult(&phi(), &U(mu), &chi());
}
static accelerator_inline void multLinkAdd(SiteSpinor &phi,
const SiteDoubledGaugeField &U,
const SiteSpinor &chi,
int mu)
{
mac(&phi(), &U(mu), &chi());
}
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
reg = memory;
}
inline void InsertGaugeField(DoubledGaugeField &U_ds,
const GaugeLinkField &U,int mu)
{
PokeIndex<LorentzIndex>(U_ds, U, mu);
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &UUUds, // for Naik term
DoubledGaugeField &Uds,
const GaugeField &Uthin,
const GaugeField &Ufat) {
conformable(Uds.Grid(), GaugeGrid);
conformable(Uthin.Grid(), GaugeGrid);
conformable(Ufat.Grid(), GaugeGrid);
GaugeLinkField U(GaugeGrid);
GaugeLinkField UU(GaugeGrid);
GaugeLinkField UUU(GaugeGrid);
GaugeLinkField Udag(GaugeGrid);
GaugeLinkField UUUdag(GaugeGrid);
for (int mu = 0; mu < Nd; mu++) {
// Staggered Phase.
Lattice<iScalar<vInteger> > coor(GaugeGrid);
Lattice<iScalar<vInteger> > x(GaugeGrid); LatticeCoordinate(x,0);
Lattice<iScalar<vInteger> > y(GaugeGrid); LatticeCoordinate(y,1);
Lattice<iScalar<vInteger> > z(GaugeGrid); LatticeCoordinate(z,2);
Lattice<iScalar<vInteger> > t(GaugeGrid); LatticeCoordinate(t,3);
Lattice<iScalar<vInteger> > lin_z(GaugeGrid); lin_z=x+y;
Lattice<iScalar<vInteger> > lin_t(GaugeGrid); lin_t=x+y+z;
ComplexField phases(GaugeGrid); phases=1.0;
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
// 1 hop based on fat links
U = PeekIndex<LorentzIndex>(Ufat, mu);
Udag = adj( Cshift(U, mu, -1));
U = U *phases;
Udag = Udag *phases;
InsertGaugeField(Uds,U,mu);
InsertGaugeField(Uds,Udag,mu+4);
// PokeIndex<LorentzIndex>(Uds, U, mu);
// PokeIndex<LorentzIndex>(Uds, Udag, mu + 4);
// 3 hop based on thin links. Crazy huh ?
U = PeekIndex<LorentzIndex>(Uthin, mu);
UU = Gimpl::CovShiftForward(U,mu,U);
UUU= Gimpl::CovShiftForward(U,mu,UU);
UUUdag = adj( Cshift(UUU, mu, -3));
UUU = UUU *phases;
UUUdag = UUUdag *phases;
InsertGaugeField(UUUds,UUU,mu);
InsertGaugeField(UUUds,UUUdag,mu+4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
GaugeLinkField link(mat.Grid());
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
assert (0);
// Must never hit
}
};
typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec
typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float
typedef StaggeredImpl<vComplexD, FundamentalRepresentation > StaggeredImplD; // Double
NAMESPACE_END(Grid);

View File

@ -26,11 +26,9 @@ See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_STAGGERED_KERNELS_H
#define GRID_QCD_STAGGERED_KERNELS_H
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid)
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Staggered stencil for a single site.
@ -51,72 +49,69 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
public:
void DhopDir(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp);
void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
///////////////////////////////////////////////////////////////////////////////////////
// Generic Nc kernels
///////////////////////////////////////////////////////////////////////////////////////
void DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
void DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
void DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
///////////////////////////////////////////////////////////////////////////////////////
// Nc=3 specific kernels
///////////////////////////////////////////////////////////////////////////////////////
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
void DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
void DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
///////////////////////////////////////////////////////////////////////////////////////
// Asm Nc=3 specific kernels
///////////////////////////////////////////////////////////////////////////////////////
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag);
const FermionFieldView &in, FermionFieldView &out,int dag);
///////////////////////////////////////////////////////////////////////////////////////////////////
// Generic interface; fan out to right routine
///////////////////////////////////////////////////////////////////////////////////////////////////
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out, int interior=1,int exterior=1);
const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1);
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out, int interior=1,int exterior=1);
const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1);
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU,
const FermionField &in, FermionField &out, int dag, int interior,int exterior);
const FermionFieldView &in, FermionFieldView &out, int dag, int interior,int exterior);
public:
StaggeredKernels(const ImplParams &p = ImplParams());
};
}}
#endif
NAMESPACE_END(Grid);

View File

@ -0,0 +1,203 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template <class S, class Representation = FundamentalRepresentation >
class StaggeredVec5dImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
static const bool LsVectorised=true;
typedef RealD Coeff_t ;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nd>;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
template <typename vtype> using iImplPropagator = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
// Make the doubled gauge field a *scalar*
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
typedef iImplPropagator<Simd> SitePropagator;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef Lattice<SitePropagator> PropagatorField;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef Lattice<SiteSpinor> FermionField;
typedef StaggeredImplParams ImplParams;
typedef SimpleCompressor<SiteSpinor> Compressor;
typedef CartesianStencil<SiteSpinor, SiteSpinor, ImplParams> StencilImpl;
typedef typename StencilImpl::View_type StencilView;
ImplParams Params;
StaggeredVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
vsplat(reg, memory);
}
static accelerator_inline void multLink(SiteHalfSpinor &phi,
const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi,
int mu)
{
SiteGaugeLink UU;
for (int i = 0; i < Dimension; i++) {
for (int j = 0; j < Dimension; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
}
}
mult(&phi(), &UU(), &chi());
}
static accelerator_inline void multLinkAdd(SiteHalfSpinor &phi,
const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi,
int mu)
{
SiteGaugeLink UU;
for (int i = 0; i < Dimension; i++) {
for (int j = 0; j < Dimension; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
}
}
mac(&phi(), &UU(), &chi());
}
inline void InsertGaugeField(DoubledGaugeField &U_ds,const GaugeLinkField &U,int mu)
{
GridBase *GaugeGrid = U_ds.Grid();
thread_for(lidx, GaugeGrid->lSites(),{
SiteScalarGaugeLink ScalarU;
SiteDoubledGaugeField ScalarUds;
Coordinate lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
peekLocalSite(ScalarUds, U_ds, lcoor);
peekLocalSite(ScalarU, U, lcoor);
ScalarUds(mu) = ScalarU();
});
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &UUUds, // for Naik term
DoubledGaugeField &Uds,
const GaugeField &Uthin,
const GaugeField &Ufat)
{
GridBase * InputGrid = Uthin.Grid();
conformable(InputGrid,Ufat.Grid());
GaugeLinkField U(InputGrid);
GaugeLinkField UU(InputGrid);
GaugeLinkField UUU(InputGrid);
GaugeLinkField Udag(InputGrid);
GaugeLinkField UUUdag(InputGrid);
for (int mu = 0; mu < Nd; mu++) {
// Staggered Phase.
Lattice<iScalar<vInteger> > coor(InputGrid);
Lattice<iScalar<vInteger> > x(InputGrid); LatticeCoordinate(x,0);
Lattice<iScalar<vInteger> > y(InputGrid); LatticeCoordinate(y,1);
Lattice<iScalar<vInteger> > z(InputGrid); LatticeCoordinate(z,2);
Lattice<iScalar<vInteger> > t(InputGrid); LatticeCoordinate(t,3);
Lattice<iScalar<vInteger> > lin_z(InputGrid); lin_z=x+y;
Lattice<iScalar<vInteger> > lin_t(InputGrid); lin_t=x+y+z;
ComplexField phases(InputGrid); phases=1.0;
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
// 1 hop based on fat links
U = PeekIndex<LorentzIndex>(Ufat, mu);
Udag = adj( Cshift(U, mu, -1));
U = U *phases;
Udag = Udag *phases;
InsertGaugeField(Uds,U,mu);
InsertGaugeField(Uds,Udag,mu+4);
// 3 hop based on thin links. Crazy huh ?
U = PeekIndex<LorentzIndex>(Uthin, mu);
UU = Gimpl::CovShiftForward(U,mu,U);
UUU= Gimpl::CovShiftForward(U,mu,UU);
UUUdag = adj( Cshift(UUU, mu, -3));
UUU = UUU *phases;
UUUdag = UUUdag *phases;
InsertGaugeField(UUUds,UUU,mu);
InsertGaugeField(UUUds,UUUdag,mu+4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
assert(0);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
assert (0);
}
};
typedef StaggeredVec5dImpl<vComplex, FundamentalRepresentation > StaggeredVec5dImplR; // Real.. whichever prec
typedef StaggeredVec5dImpl<vComplexF, FundamentalRepresentation > StaggeredVec5dImplF; // Float
typedef StaggeredVec5dImpl<vComplexD, FundamentalRepresentation > StaggeredVec5dImplD; // Double
NAMESPACE_END(Grid);

View File

@ -27,15 +27,11 @@
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_WILSON_CLOVER_FERMION_H
#define GRID_QCD_WILSON_CLOVER_FERMION_H
#pragma once
#include <Grid/Grid.h>
namespace Grid
{
namespace QCD
{
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////////////////////////////////
// Wilson Clover
@ -131,22 +127,22 @@ public:
// Derivative parts unpreconditioned pseudofermions
void MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag)
{
conformable(X._grid, Y._grid);
conformable(X._grid, force._grid);
GaugeLinkField force_mu(force._grid), lambda(force._grid);
GaugeField clover_force(force._grid);
PropagatorField Lambda(force._grid);
conformable(X.Grid(), Y.Grid());
conformable(X.Grid(), force.Grid());
GaugeLinkField force_mu(force.Grid()), lambda(force.Grid());
GaugeField clover_force(force.Grid());
PropagatorField Lambda(force.Grid());
// Guido: Here we are hitting some performance issues:
// need to extract the components of the DoubledGaugeField
// for each call
// Possible solution
// Create a vector object to store them? (cons: wasting space)
std::vector<GaugeLinkField> U(Nd, this->Umu._grid);
std::vector<GaugeLinkField> U(Nd, this->Umu.Grid());
Impl::extractLinkField(U, this->Umu);
force = zero;
force = Zero();
// Derivative of the Wilson hopping term
this->DhopDeriv(force, X, Y, dag);
@ -179,10 +175,10 @@ public:
*/
int count = 0;
clover_force = zero;
clover_force = Zero();
for (int mu = 0; mu < 4; mu++)
{
force_mu = zero;
force_mu = Zero();
for (int nu = 0; nu < 4; nu++)
{
if (mu == nu)
@ -212,8 +208,8 @@ public:
// Computing C_{\mu \nu}(x) as in Eq.(B.39) in Zbigniew Sroczynski's PhD thesis
GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu)
{
conformable(lambda._grid, U[0]._grid);
GaugeLinkField out(lambda._grid), tmp(lambda._grid);
conformable(lambda.Grid(), U[0].Grid());
GaugeLinkField out(lambda.Grid()), tmp(lambda.Grid());
// insertion in upper staple
// please check redundancy of shift operations
@ -266,102 +262,113 @@ private:
// using the DeGrand-Rossi basis for the gamma matrices
CloverFieldType fillCloverYZ(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
CloverFieldType T(F.Grid());
T = Zero();
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T._odata[i]()(0, 1) = timesMinusI(F._odata[i]()());
T._odata[i]()(1, 0) = timesMinusI(F._odata[i]()());
T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()());
T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()());
}
T_v[i]()(0, 1) = timesMinusI(F_v[i]()());
T_v[i]()(1, 0) = timesMinusI(F_v[i]()());
T_v[i]()(2, 3) = timesMinusI(F_v[i]()());
T_v[i]()(3, 2) = timesMinusI(F_v[i]()());
});
return T;
}
CloverFieldType fillCloverXZ(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
CloverFieldType T(F.Grid());
T = Zero();
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T._odata[i]()(0, 1) = -F._odata[i]()();
T._odata[i]()(1, 0) = F._odata[i]()();
T._odata[i]()(2, 3) = -F._odata[i]()();
T._odata[i]()(3, 2) = F._odata[i]()();
}
T_v[i]()(0, 1) = -F_v[i]()();
T_v[i]()(1, 0) = F_v[i]()();
T_v[i]()(2, 3) = -F_v[i]()();
T_v[i]()(3, 2) = F_v[i]()();
});
return T;
}
CloverFieldType fillCloverXY(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
{
CloverFieldType T(F.Grid());
T = Zero();
T._odata[i]()(0, 0) = timesMinusI(F._odata[i]()());
T._odata[i]()(1, 1) = timesI(F._odata[i]()());
T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()());
T._odata[i]()(3, 3) = timesI(F._odata[i]()());
}
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T_v[i]()(0, 0) = timesMinusI(F_v[i]()());
T_v[i]()(1, 1) = timesI(F_v[i]()());
T_v[i]()(2, 2) = timesMinusI(F_v[i]()());
T_v[i]()(3, 3) = timesI(F_v[i]()());
});
return T;
}
CloverFieldType fillCloverXT(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
CloverFieldType T(F.Grid());
T = Zero();
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T._odata[i]()(0, 1) = timesI(F._odata[i]()());
T._odata[i]()(1, 0) = timesI(F._odata[i]()());
T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()());
T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()());
}
T_v[i]()(0, 1) = timesI(F_v[i]()());
T_v[i]()(1, 0) = timesI(F_v[i]()());
T_v[i]()(2, 3) = timesMinusI(F_v[i]()());
T_v[i]()(3, 2) = timesMinusI(F_v[i]()());
});
return T;
}
CloverFieldType fillCloverYT(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
CloverFieldType T(F.Grid());
T = Zero();
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T._odata[i]()(0, 1) = -(F._odata[i]()());
T._odata[i]()(1, 0) = (F._odata[i]()());
T._odata[i]()(2, 3) = (F._odata[i]()());
T._odata[i]()(3, 2) = -(F._odata[i]()());
}
T_v[i]()(0, 1) = -(F_v[i]()());
T_v[i]()(1, 0) = (F_v[i]()());
T_v[i]()(2, 3) = (F_v[i]()());
T_v[i]()(3, 2) = -(F_v[i]()());
});
return T;
}
CloverFieldType fillCloverZT(const GaugeLinkField &F)
{
CloverFieldType T(F._grid);
T = zero;
PARALLEL_FOR_LOOP
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
CloverFieldType T(F.Grid());
T = Zero();
auto T_v = T.View();
auto F_v = F.View();
thread_for(i, CloverTerm.Grid()->oSites(),
{
T._odata[i]()(0, 0) = timesI(F._odata[i]()());
T._odata[i]()(1, 1) = timesMinusI(F._odata[i]()());
T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()());
T._odata[i]()(3, 3) = timesI(F._odata[i]()());
}
T_v[i]()(0, 0) = timesI(F_v[i]()());
T_v[i]()(1, 1) = timesMinusI(F_v[i]()());
T_v[i]()(2, 2) = timesMinusI(F_v[i]()());
T_v[i]()(3, 3) = timesI(F_v[i]()());
});
return T;
}
};
}
}
NAMESPACE_END(Grid);
#endif // GRID_QCD_WILSON_CLOVER_FERMION_H

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -25,13 +25,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_WILSON_COMPRESSOR_H
#define GRID_QCD_WILSON_COMPRESSOR_H
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////////
// optimised versions supporting half precision too
@ -43,9 +42,9 @@ class WilsonCompressorTemplate;
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typename std::enable_if<std::is_same<_HCspinor,_Hspinor>::value>::type >
typename std::enable_if<std::is_same<_HCspinor,_Hspinor>::value>::type >
{
public:
public:
int mu,dag;
@ -62,15 +61,16 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typedef typename SiteHalfSpinor::vector_type vComplexHigh;
constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh);
inline int CommDatumSize(void) {
accelerator_inline int CommDatumSize(void) {
return sizeof(SiteHalfCommSpinor);
}
/*****************************************************/
/* Compress includes precision change if mpi data is not same */
/*****************************************************/
inline void Compress(SiteHalfSpinor * __restrict__ buf,Integer o,const SiteSpinor &in) {
SiteHalfSpinor tmp;
template<class _SiteHalfSpinor, class _SiteSpinor>
accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) {
_SiteHalfSpinor tmp;
projector::Proj(tmp,in,mu,dag);
vstream(buf[o],tmp);
}
@ -78,10 +78,10 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Exchange includes precision change if mpi data is not same */
/*****************************************************/
inline void Exchange(SiteHalfSpinor * __restrict__ mp,
const SiteHalfSpinor * __restrict__ vp0,
const SiteHalfSpinor * __restrict__ vp1,
Integer type,Integer o){
accelerator_inline void Exchange(SiteHalfSpinor *mp,
const SiteHalfSpinor * __restrict__ vp0,
const SiteHalfSpinor * __restrict__ vp1,
Integer type,Integer o){
SiteHalfSpinor tmp1;
SiteHalfSpinor tmp2;
exchange(tmp1,tmp2,vp0[o],vp1[o],type);
@ -92,19 +92,21 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Have a decompression step if mpi data is not same */
/*****************************************************/
inline void Decompress(SiteHalfSpinor * __restrict__ out,
SiteHalfSpinor * __restrict__ in, Integer o) {
accelerator_inline void Decompress(SiteHalfSpinor * __restrict__ out,
SiteHalfSpinor * __restrict__ in, Integer o) {
assert(0);
}
/*****************************************************/
/* Compress Exchange */
/*****************************************************/
inline void CompressExchange(SiteHalfSpinor * __restrict__ out0,
SiteHalfSpinor * __restrict__ out1,
const SiteSpinor * __restrict__ in,
Integer j,Integer k, Integer m,Integer type){
SiteHalfSpinor temp1, temp2,temp3,temp4;
accelerator_inline void CompressExchange(SiteHalfSpinor * __restrict__ out0,
SiteHalfSpinor * __restrict__ out1,
const SiteSpinor * __restrict__ in,
Integer j,Integer k, Integer m,Integer type)
{
SiteHalfSpinor temp1, temp2;
SiteHalfSpinor temp3, temp4;
projector::Proj(temp1,in[k],mu,dag);
projector::Proj(temp2,in[m],mu,dag);
exchange(temp3,temp4,temp1,temp2,type);
@ -115,15 +117,15 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Pass the info to the stencil */
/*****************************************************/
inline bool DecompressionStep(void) { return false; }
accelerator_inline bool DecompressionStep(void) { return false; }
};
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type >
typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type >
{
public:
public:
int mu,dag;
@ -140,15 +142,16 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typedef typename SiteHalfSpinor::vector_type vComplexHigh;
constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh);
inline int CommDatumSize(void) {
accelerator_inline int CommDatumSize(void) {
return sizeof(SiteHalfCommSpinor);
}
/*****************************************************/
/* Compress includes precision change if mpi data is not same */
/*****************************************************/
inline void Compress(SiteHalfSpinor *buf,Integer o,const SiteSpinor &in) {
SiteHalfSpinor hsp;
template<class _SiteHalfSpinor, class _SiteSpinor>
accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) {
_SiteHalfSpinor hsp;
SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf;
projector::Proj(hsp,in,mu,dag);
precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw);
@ -157,7 +160,7 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Exchange includes precision change if mpi data is not same */
/*****************************************************/
inline void Exchange(SiteHalfSpinor *mp,
accelerator_inline void Exchange(SiteHalfSpinor *mp,
SiteHalfSpinor *vp0,
SiteHalfSpinor *vp1,
Integer type,Integer o){
@ -172,8 +175,7 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Have a decompression step if mpi data is not same */
/*****************************************************/
inline void Decompress(SiteHalfSpinor *out,
SiteHalfSpinor *in, Integer o){
accelerator_inline void Decompress(SiteHalfSpinor *out, SiteHalfSpinor *in, Integer o){
SiteHalfCommSpinor *hin=(SiteHalfCommSpinor *)in;
precisionChange((vComplexHigh *)&out[o],(vComplexLow *)&hin[o],Nw);
}
@ -181,7 +183,7 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Compress Exchange */
/*****************************************************/
inline void CompressExchange(SiteHalfSpinor *out0,
accelerator_inline void CompressExchange(SiteHalfSpinor *out0,
SiteHalfSpinor *out1,
const SiteSpinor *in,
Integer j,Integer k, Integer m,Integer type){
@ -198,19 +200,19 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
/*****************************************************/
/* Pass the info to the stencil */
/*****************************************************/
inline bool DecompressionStep(void) { return true; }
accelerator_inline bool DecompressionStep(void) { return true; }
};
#define DECLARE_PROJ(Projector,Compressor,spProj) \
class Projector { \
public: \
template<class hsp,class fsp> \
static void Proj(hsp &result,const fsp &in,int mu,int dag){ \
spProj(result,in); \
} \
template<class hsp,class fsp> \
static accelerator void Proj(hsp &result,const fsp &in,int mu,int dag){ \
spProj(result,in); \
} \
}; \
template<typename HCS,typename HS,typename S> using Compressor = WilsonCompressorTemplate<HCS,HS,S,Projector>;
template<typename HCS,typename HS,typename S> using Compressor = WilsonCompressorTemplate<HCS,HS,S,Projector>;
DECLARE_PROJ(WilsonXpProjector,WilsonXpCompressor,spProjXp);
DECLARE_PROJ(WilsonYpProjector,WilsonYpCompressor,spProjYp);
@ -222,9 +224,9 @@ DECLARE_PROJ(WilsonZmProjector,WilsonZmCompressor,spProjZm);
DECLARE_PROJ(WilsonTmProjector,WilsonTmCompressor,spProjTm);
class WilsonProjector {
public:
public:
template<class hsp,class fsp>
static void Proj(hsp &result,const fsp &in,int mu,int dag){
static accelerator void Proj(hsp &result,const fsp &in,int mu,int dag){
int mudag=dag? mu : (mu+Nd)%(2*Nd);
switch(mudag) {
case Xp: spProjXp(result,in); break;
@ -243,9 +245,14 @@ template<typename HCS,typename HS,typename S> using WilsonCompressor = WilsonCom
// Fast comms buffer manipulation which should inline right through (avoid direction
// dependent logic that prevents inlining
template<class vobj,class cobj>
class WilsonStencil : public CartesianStencil<vobj,cobj> {
template<class vobj,class cobj,class Parameters>
class WilsonStencil : public CartesianStencil<vobj,cobj,Parameters> {
public:
typedef CartesianStencil<vobj,cobj,Parameters> Base;
typedef typename Base::View_type View_type;
typedef typename Base::StencilVector StencilVector;
double timer0;
double timer1;
double timer2;
@ -274,16 +281,40 @@ public:
if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl;
}
std::vector<int> surface_list;
WilsonStencil(GridBase *grid,
int npoints,
int checkerboard,
const std::vector<int> &directions,
const std::vector<int> &distances)
: CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances)
const std::vector<int> &distances,Parameters p)
: CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)
{
ZeroCountersi();
surface_list.resize(0);
this->same_node.resize(npoints);
};
void BuildSurfaceList(int Ls,int vol4){
// find same node for SHM
// Here we know the distance is 1 for WilsonStencil
for(int point=0;point<this->_npoints;point++){
this->same_node[point] = this->SameNode(point);
}
for(int site = 0 ;site< vol4;site++){
int local = 1;
for(int point=0;point<this->_npoints;point++){
if( (!this->GetNodeLocal(site*Ls,point)) && (!this->same_node[point]) ){
local = 0;
}
}
if(local == 0) {
surface_list.push_back(site);
}
}
}
template < class compressor>
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
@ -292,8 +323,6 @@ public:
this->HaloExchangeOptGather(source,compress);
double t1=usecond();
// Asynchronous MPI calls multidirectional, Isend etc...
// this->CommunicateBegin(reqs);
// this->CommunicateComplete(reqs);
// Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways.
this->Communicate();
double t2=usecond(); timer1 += t2-t1;
@ -327,7 +356,7 @@ public:
this->_grid->StencilBarrier();
this->mpi3synctime_g+=usecond();
assert(source._grid==this->_grid);
assert(source.Grid()==this->_grid);
this->halogtime-=usecond();
this->u_comm_offset=0;
@ -365,9 +394,10 @@ public:
this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size);
this->halogtime+=usecond();
accelerator_barrier();
}
};
};
}} // namespace close
NAMESPACE_END(Grid);
#endif

View File

@ -27,16 +27,13 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_WILSON_FERMION_H
#define GRID_QCD_WILSON_FERMION_H
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
class WilsonFermionStatic {
public:
public:
static int HandOptDslash; // these are a temporary hack
static int MortonOrder;
static const std::vector<int> directions;
@ -60,8 +57,9 @@ class WilsonFermionStatic {
};
template <class Impl>
class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
public:
class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef WilsonKernels<Impl> Kernels;
@ -138,10 +136,10 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
// Constructor
WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid, RealD _mass,
GridRedBlackCartesian &Hgrid, RealD _mass,
const ImplParams &p = ImplParams(),
const WilsonAnisotropyCoefficients &anis = WilsonAnisotropyCoefficients() );
// DoubleStore impl dependent
void ImportGauge(const GaugeField &_Umu);
@ -150,7 +148,7 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
///////////////////////////////////////////////////////////////
// protected:
public:
public:
virtual RealD Mass(void) { return mass; }
virtual int isTrivialEE(void) { return 1; };
RealD mass;
@ -171,7 +169,7 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
WilsonAnisotropyCoefficients anisotropyCoeff;
///////////////////////////////////////////////////////////////
@ -182,11 +180,11 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
};
@ -194,7 +192,6 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
NAMESPACE_END(Grid);
}
}
#endif

View File

@ -1,5 +1,5 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -26,216 +26,215 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_WILSON_FERMION_5D_H
#define GRID_QCD_WILSON_FERMION_5D_H
#include <Grid/perfmon/Stat.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
//
// parity = (x+y+z+t)|2;
// generalised five dim fermions like mobius, zolotarev etc..
//
// i.e. even even contains fifth dim hopping term.
//
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
//
// parity = (x+y+z+t)|2;
// generalised five dim fermions like mobius, zolotarev etc..
//
// i.e. even even contains fifth dim hopping term.
//
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
//
// parity = (x+y+z+t)|2;
// generalised five dim fermions like mobius, zolotarev etc..
//
// i.e. even even contains fifth dim hopping term.
//
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
//
// parity = (x+y+z+t)|2;
// generalised five dim fermions like mobius, zolotarev etc..
//
// i.e. even even contains fifth dim hopping term.
//
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
////////////////////////////////////////////////////////////////////////////////
class WilsonFermion5DStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static const std::vector<int> directions;
static const std::vector<int> displacements;
const int npoint = 8;
};
class WilsonFermion5DStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static const std::vector<int> directions;
static const std::vector<int> displacements;
static constexpr int npoint = 8;
};
template<class Impl>
class WilsonFermion5D : public WilsonKernels<Impl>, public WilsonFermion5DStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef WilsonKernels<Impl> Kernels;
PmuStat stat;
template<class Impl>
class WilsonFermion5D : public WilsonKernels<Impl>, public WilsonFermion5DStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef WilsonKernels<Impl> Kernels;
PmuStat stat;
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
void Report(void);
void ZeroCounters(void);
double DhopCalls;
double DhopCommTime;
double DhopComputeTime;
double DhopComputeTime2;
double DhopFaceTime;
double DhopTotalTime;
void Report(void);
void ZeroCounters(void);
double DhopCalls;
double DhopCommTime;
double DhopComputeTime;
double DhopComputeTime2;
double DhopFaceTime;
double DhopTotalTime;
double DerivCalls;
double DerivCommTime;
double DerivComputeTime;
double DerivDhopComputeTime;
double DerivCalls;
double DerivCommTime;
double DerivComputeTime;
double DerivDhopComputeTime;
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FiveDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FiveDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
// full checkerboard operations; leave unimplemented as abstract for now
virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;};
virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
// full checkerboard operations; leave unimplemented as abstract for now
virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;};
virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
// half checkerboard operations; leave unimplemented as abstract for now
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);};
virtual void Mooee (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);};
// half checkerboard operations; leave unimplemented as abstract for now
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);};
virtual void Mooee (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);};
virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
// These can be overridden by fancy 5d chiral action
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// These can be overridden by fancy 5d chiral action
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
// Implement hopping term non-hermitian hopping term; half cb or both
// Implement s-diagonal DW
void DW (const FermionField &in, FermionField &out,int dag);
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
// Implement hopping term non-hermitian hopping term; half cb or both
// Implement s-diagonal DW
void DW (const FermionField &in, FermionField &out,int dag);
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
// add a DhopComm
// -- suboptimal interface will presently trigger multiple comms.
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
// add a DhopComm
// -- suboptimal interface will presently trigger multiple comms.
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
///////////////////////////////////////////////////////////////
// New methods added
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
///////////////////////////////////////////////////////////////
// New methods added
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
void DhopInternal(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternal(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalOverlappedComms(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalOverlappedComms(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalSerialComms(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalSerialComms(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
// Constructors
WilsonFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _M5,const ImplParams &p= ImplParams());
// Constructors
WilsonFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _M5,const ImplParams &p= ImplParams());
// Constructors
/*
WilsonFermion5D(int simd,
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
double _M5,const ImplParams &p= ImplParams());
*/
// Constructors
/*
WilsonFermion5D(int simd,
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
double _M5,const ImplParams &p= ImplParams());
*/
// DoubleStore
void ImportGauge(const GaugeField &_Umu);
// DoubleStore
void ImportGauge(const GaugeField &_Umu);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
// Add these to the support from Wilson
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
GridBase *_FiveDimGrid;
GridBase *_FiveDimRedBlackGrid;
// Add these to the support from Wilson
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
GridBase *_FiveDimGrid;
GridBase *_FiveDimRedBlackGrid;
double M5;
int Ls;
double M5;
int Ls;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
///////////////////////////////////////////////////////////////
// Conserved current utilities
///////////////////////////////////////////////////////////////
void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
///////////////////////////////////////////////////////////////
// Conserved current utilities
///////////////////////////////////////////////////////////////
void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
void ContractJ5q(PropagatorField &q_in,ComplexField &J5q);
void ContractJ5q(FermionField &q_in,ComplexField &J5q);
void ContractJ5q(PropagatorField &q_in,ComplexField &J5q);
void ContractJ5q(FermionField &q_in,ComplexField &J5q);
};
};
}}
NAMESPACE_END(Grid);
#endif

View File

@ -0,0 +1,226 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////
// Single flavour four spinors with colour index
/////////////////////////////////////////////////////////////////////////////
template <class S, class Representation = FundamentalRepresentation,class Options = CoeffReal >
class WilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
static const bool LsVectorised=false;
static const int Nhcs = Options::Nhcs;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
typedef typename Options::_Coeff_t Coeff_t;
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhcs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor,ImplParams> StencilImpl;
typedef typename StencilImpl::View_type StencilView;
ImplParams Params;
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){
assert(Params.boundary_phases.size() == Nd);
};
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu)
{
auto UU = coalescedRead(U(mu));
mult(&phi(), &UU, &chi());
}
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu,
StencilEntry *SE,
StencilView &St)
{
multLink(phi,U,chi,mu);
}
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
reg = memory;
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &Uds,
const GaugeField &Umu)
{
typedef typename Simd::scalar_type scalar_type;
conformable(Uds.Grid(), GaugeGrid);
conformable(Umu.Grid(), GaugeGrid);
GaugeLinkField U(GaugeGrid);
GaugeLinkField tmp(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
////////////////////////////////////////////////////
// apply any boundary phase or twists
////////////////////////////////////////////////////
for (int mu = 0; mu < Nd; mu++) {
////////// boundary phase /////////////
auto pha = Params.boundary_phases[mu];
scalar_type phase( real(pha),imag(pha) );
int L = GaugeGrid->GlobalDimensions()[mu];
int Lmu = L - 1;
LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu);
// apply any twists
RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L;
if ( theta != 0.0) {
scalar_type twphase(::cos(theta),::sin(theta));
U = twphase*U;
std::cout << GridLogMessage << " Twist ["<<mu<<"] "<< Params.twist_n_2pi_L[mu]<< " phase"<<phase <<std::endl;
}
tmp = where(coor == Lmu, phase * U, U);
PokeIndex<LorentzIndex>(Uds, tmp, mu);
U = adj(Cshift(U, mu, -1));
U = where(coor == 0, conjugate(phase) * U, U);
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
GaugeLinkField link(mat.Grid());
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu);
}
inline void outerProductImpl(PropagatorField &mat, const FermionField &B, const FermionField &A){
mat = outerProduct(B,A);
}
inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) {
mat = TraceIndex<SpinIndex>(P);
}
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
for (int mu = 0; mu < Nd; mu++)
mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
int Ls=Btilde.Grid()->_fdimensions[0];
GaugeLinkField tmp(mat.Grid());
tmp = Zero();
auto tmp_v = tmp.View();
auto Btilde_v = Btilde.View();
auto Atilde_v = Atilde.View();
thread_for(sss,tmp.Grid()->oSites(),{
int sU=sss;
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
tmp_v[sU] = tmp_v[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here
}
});
PokeIndex<LorentzIndex>(mat,tmp,mu);
}
};
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > WilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double
typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float
typedef WilsonImpl<vComplexD, AdjointRepresentation, CoeffReal > WilsonAdjImplD; // Double
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplD; // Double
typedef WilsonImpl<vComplex, TwoIndexAntiSymmetricRepresentation, CoeffReal > WilsonTwoIndexAntiSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, TwoIndexAntiSymmetricRepresentation, CoeffReal > WilsonTwoIndexAntiSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexAntiSymmetricRepresentation, CoeffReal > WilsonTwoIndexAntiSymmetricImplD; // Double
NAMESPACE_END(Grid);

View File

@ -1,455 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
namespace QCD {
int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric;
int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute;
template <class Impl>
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
////////////////////////////////////////////
// Generic implementation; move to different file?
////////////////////////////////////////////
#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
chi_p = &chi; \
if (SE->_permute) { \
spProj(tmp, in._odata[SE->_offset]); \
permute(chi, tmp, ptype); \
} else { \
spProj(chi, in._odata[SE->_offset]); \
} \
} else { \
chi_p = &buf[SE->_offset]; \
} \
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
Recon(result, Uchi);
#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
chi_p = &chi; \
if (SE->_permute) { \
spProj(tmp, in._odata[SE->_offset]); \
permute(chi, tmp, ptype); \
} else { \
spProj(chi, in._odata[SE->_offset]); \
} \
} else if ( st.same_node[Dir] ) { \
chi_p = &buf[SE->_offset]; \
} \
if (SE->_is_local || st.same_node[Dir] ) { \
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
Recon(result, Uchi); \
}
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
chi_p = &buf[SE->_offset]; \
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
Recon(result, Uchi); \
nmu++; \
}
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
if (gamma == Dir) { \
if (SE->_is_local && SE->_permute) { \
spProj(tmp, in._odata[SE->_offset]); \
permute(chi, tmp, ptype); \
} else if (SE->_is_local) { \
spProj(chi, in._odata[SE->_offset]); \
} else { \
chi = buf[SE->_offset]; \
} \
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); \
Recon(result, Uchi); \
}
////////////////////////////////////////////////////////////////////
// All legs kernels ; comms then compute
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp);
GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp);
GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
////////////////////////////////////////////////////////////////////
// Interior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
result=zero;
GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_INT(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_INT(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
result=zero;
GENERIC_STENCIL_LEG_INT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_INT(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_INT(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
////////////////////////////////////////////////////////////////////
// Exterior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
result=zero;
GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_EXT(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_EXT(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_EXT(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm);
if ( nmu ) {
out._odata[sF] = out._odata[sF] + result;
}
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
result=zero;
GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_EXT(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_EXT(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_EXT(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm);
if ( nmu ) {
out._odata[sF] = out._odata[sF] + result;
}
};
template <class Impl>
void WilsonKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteSpinor result;
SiteHalfSpinor Uchi;
StencilEntry *SE;
int ptype;
SE = st.GetEntry(ptype, dir, sF);
GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp);
GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp);
GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp);
GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp);
GENERIC_DHOPDIR_LEG(Xm,spProjXm,spReconXm);
GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm);
GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm);
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
vstream(out._odata[sF], result);
}
/*******************************************************************************
* Conserved current utilities for Wilson fermions, for contracting propagators
* to make a conserved current sink or inserting the conserved current
* sequentially. Common to both 4D and 5D.
******************************************************************************/
// N.B. Functions below assume a -1/2 factor within U.
#define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr))
#define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr))
/*******************************************************************************
* Name: ContractConservedCurrentSiteFwd
* Operation: (1/2) * q2[x] * U(x) * (g[mu] - 1) * q1[x + mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in_1 shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(
const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
if (switch_sign)
{
q_out -= result;
}
else
{
q_out += result;
}
}
/*******************************************************************************
* Name: ContractConservedCurrentSiteBwd
* Operation: (1/2) * q2[x + mu] * U^dag(x) * (g[mu] + 1) * q1[x]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in_2 shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(
const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu + Nd);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
if (switch_sign)
{
q_out += result;
}
else
{
q_out -= result;
}
}
// G-parity requires more specialised implementation.
#define NO_CURR_SITE(Impl) \
template <> \
void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd( \
const SitePropagator &q_in_1, \
const SitePropagator &q_in_2, \
SitePropagator &q_out, \
DoubledGaugeField &U, \
unsigned int sU, \
unsigned int mu, \
bool switch_sign) \
{ \
assert(0); \
} \
template <> \
void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd( \
const SitePropagator &q_in_1, \
const SitePropagator &q_in_2, \
SitePropagator &q_out, \
DoubledGaugeField &U, \
unsigned int mu, \
unsigned int sU, \
bool switch_sign) \
{ \
assert(0); \
}
NO_CURR_SITE(GparityWilsonImplF);
NO_CURR_SITE(GparityWilsonImplD);
NO_CURR_SITE(GparityWilsonImplFH);
NO_CURR_SITE(GparityWilsonImplDF);
/*******************************************************************************
* Name: SeqConservedCurrentSiteFwd
* Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLinkProp(result, U._odata[sU], q_in, mu);
result = WilsonCurrentFwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign)
{
q_out -= result;
}
else
{
q_out += result;
}
}
/*******************************************************************************
* Name: SeqConservedCurrentSiteFwd
* Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in shifted in -ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLinkProp(result, U._odata[sU], q_in, mu + Nd);
result = WilsonCurrentBwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign)
{
q_out += result;
}
else
{
q_out -= result;
}
}
FermOpTemplateInstantiate(WilsonKernels);
AdjointFermOpTemplateInstantiate(WilsonKernels);
TwoIndexFermOpTemplateInstantiate(WilsonKernels);
}}

View File

@ -27,19 +27,17 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_DHOP_H
#define GRID_QCD_DHOP_H
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
// Common to both the WilsonFermion and WilsonFermion5D
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
// Common to both the WilsonFermion and WilsonFermion5D
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class WilsonKernelsStatic {
public:
public:
enum { OptGeneric, OptHandUnroll, OptInlineAsm };
enum { CommsAndCompute, CommsThenCompute };
static int Opt;
@ -47,235 +45,123 @@ class WilsonKernelsStatic {
};
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
public:
public:
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
public:
template <bool EnableBool = true>
typename std::enable_if<Impl::isFundamental==true && Nc == 3 &&EnableBool, void>::type
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
{
bgq_l1p_optimisation(1);
switch(Opt) {
#if defined(AVX512) || defined (QPX)
case OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else assert(0);
break;
#endif
case OptHandUnroll:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
sF++;
}
sU++;
}
break;
case OptGeneric:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
break;
default:
assert(0);
}
bgq_l1p_optimisation(0);
}
template <bool EnableBool = true>
typename std::enable_if<(Impl::isFundamental==false || (Impl::isFundamental==true && Nc != 3)) && EnableBool, void>::type
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
// no kernel choice
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
}
template <bool EnableBool = true>
typename std::enable_if<Impl::isFundamental==true && Nc == 3 && EnableBool,void>::type
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
{
bgq_l1p_optimisation(1);
switch(Opt) {
#if defined(AVX512) || defined (QPX)
case OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else assert(0);
break;
#endif
case OptHandUnroll:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::HandDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::HandDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
break;
case OptGeneric:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
break;
default:
assert(0);
}
bgq_l1p_optimisation(0);
}
static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
template <bool EnableBool = true>
typename std::enable_if<(Impl::isFundamental==false || (Impl::isFundamental==true && Nc != 3)) && EnableBool,void>::type
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) {
static void DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
}
}
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
void DhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
//////////////////////////////////////////////////////////////////////////////
// Utilities for inserting Wilson conserved current.
//////////////////////////////////////////////////////////////////////////////
void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
static void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign = false);
void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
static void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign = false);
void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
static void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
vPredicate t_mask,
bool switch_sign = false);
void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
static void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
vPredicate t_mask,
bool switch_sign = false);
private:
// Specialised variants
void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
// Specialised variants
static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
static accelerator void GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
static accelerator void GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
public:
static accelerator void GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
WilsonKernels(const ImplParams &p = ImplParams());
static void AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
static void AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
static void AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
static void AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
static void AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
static void AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
// Keep Hand unrolled temporarily
static accelerator void HandDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
static accelerator void HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
public:
WilsonKernels(const ImplParams &p = ImplParams()) : Base(p){};
};
}}
NAMESPACE_END(Grid);
#endif

View File

@ -1,127 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsm.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
namespace QCD {
///////////////////////////////////////////////////////////
// Default to no assembler implementation
///////////////////////////////////////////////////////////
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
#include <Grid/qcd/action/fermion/WilsonKernelsAsmAvx512.h>
#include <Grid/qcd/action/fermion/WilsonKernelsAsmQPX.h>
#define INSTANTIATE_ASM(A)\
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
\
template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
\
template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
\
template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
INSTANTIATE_ASM(WilsonImplF);
INSTANTIATE_ASM(WilsonImplD);
INSTANTIATE_ASM(ZWilsonImplF);
INSTANTIATE_ASM(ZWilsonImplD);
INSTANTIATE_ASM(GparityWilsonImplF);
INSTANTIATE_ASM(GparityWilsonImplD);
INSTANTIATE_ASM(DomainWallVec5dImplF);
INSTANTIATE_ASM(DomainWallVec5dImplD);
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
INSTANTIATE_ASM(WilsonImplFH);
INSTANTIATE_ASM(WilsonImplDF);
INSTANTIATE_ASM(ZWilsonImplFH);
INSTANTIATE_ASM(ZWilsonImplDF);
INSTANTIATE_ASM(GparityWilsonImplFH);
INSTANTIATE_ASM(GparityWilsonImplDF);
INSTANTIATE_ASM(DomainWallVec5dImplFH);
INSTANTIATE_ASM(DomainWallVec5dImplDF);
INSTANTIATE_ASM(ZDomainWallVec5dImplFH);
INSTANTIATE_ASM(ZDomainWallVec5dImplDF);
}}

View File

@ -1,650 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#if defined(AVX512)
///////////////////////////////////////////////////////////
// If we are AVX512 specialise the single precision routine
///////////////////////////////////////////////////////////
#include <simd/Intel512wilson.h>
#include <simd/Intel512single.h>
static Vector<vComplexF> signsF;
template<typename vtype>
int setupSigns(Vector<vtype>& signs ){
Vector<vtype> bother(2);
signs = bother;
vrsign(signs[0]);
visign(signs[1]);
return 1;
}
static int signInitF = setupSigns(signsF);
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define COMPLEX_SIGNS(isigns) vComplexF *isigns = &signsF[0];
/////////////////////////////////////////////////////////////////
// XYZT vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
#define MAYBEPERM(A,B)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
/////////////////////////////////////////////////////////////////
// Ls vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#undef MULT_2SPIN
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf)
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef COMPLEX_SIGNS
#undef MAYBEPERM
#undef MULT_2SPIN
///////////////////////////////////////////////////////////
// If we are AVX512 specialise the double precision routine
///////////////////////////////////////////////////////////
#include <simd/Intel512double.h>
static Vector<vComplexD> signsD;
static int signInitD = setupSigns(signsD);
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define COMPLEX_SIGNS(isigns) vComplexD *isigns = &signsD[0];
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
/////////////////////////////////////////////////////////////////
// XYZT vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
#define MAYBEPERM(A,B)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
/////////////////////////////////////////////////////////////////
// Ls vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#undef MULT_2SPIN
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf)
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef COMPLEX_SIGNS
#undef MAYBEPERM
#undef MULT_2SPIN
#endif //AVX512

View File

@ -1,99 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc
Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonTMFermion.h>
namespace Grid {
namespace QCD {
/*
* BF sequence
*
void bfmbase<Float>::MooeeInv(Fermion_t psi,
Fermion_t chi,
int dag, int cb)
double m = this->mass;
double tm = this->twistedmass;
double mtil = 4.0+this->mass;
double sq = mtil*mtil + tm*tm;
double a = mtil/sq;
double b = -tm /sq;
if(dag) b=-b;
axpibg5x(chi,psi,a,b);
void bfmbase<Float>::Mooee(Fermion_t psi,
Fermion_t chi,
int dag,int cb)
double a = 4.0+this->mass;
double b = this->twistedmass;
if(dag) b=-b;
axpibg5x(chi,psi,a,b);
*/
template<class Impl>
void WilsonTMFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
RealD a = 4.0+this->mass;
RealD b = this->mu;
out.checkerboard = in.checkerboard;
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
RealD a = 4.0+this->mass;
RealD b = -this->mu;
out.checkerboard = in.checkerboard;
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
RealD m = this->mass;
RealD tm = this->mu;
RealD mtil = 4.0+this->mass;
RealD sq = mtil*mtil+tm*tm;
RealD a = mtil/sq;
RealD b = -tm /sq;
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
RealD m = this->mass;
RealD tm = this->mu;
RealD mtil = 4.0+this->mass;
RealD sq = mtil*mtil+tm*tm;
RealD a = mtil/sq;
RealD b = tm /sq;
axpibg5x(out,in,a,b);
}
FermOpTemplateInstantiate(WilsonTMFermion);
}
}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -23,55 +23,52 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_WILSON_TM_FERMION_H
#define GRID_QCD_WILSON_TM_FERMION_H
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonFermion.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class WilsonTMFermion : public WilsonFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class WilsonTMFermion : public WilsonFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
WilsonTMFermion(GaugeField &_Umu,
GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid,
RealD _mass,
RealD _mu,
const ImplParams &p= ImplParams()
) :
WilsonFermion<Impl>(_Umu,
Fgrid,
Hgrid,
_mass,p)
virtual void Instantiatable(void) {};
// Constructors
WilsonTMFermion(GaugeField &_Umu,
GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid,
RealD _mass,
RealD _mu,
const ImplParams &p= ImplParams()
) :
WilsonFermion<Impl>(_Umu,
Fgrid,
Hgrid,
_mass,p)
{
mu = _mu;
}
{
mu = _mu;
}
// allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out) ;
virtual void MooeeDag(const FermionField &in, FermionField &out) ;
virtual void MooeeInv(const FermionField &in, FermionField &out) ;
virtual void MooeeInvDag(const FermionField &in, FermionField &out) ;
// allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out) ;
virtual void MooeeDag(const FermionField &in, FermionField &out) ;
virtual void MooeeInv(const FermionField &in, FermionField &out) ;
virtual void MooeeInvDag(const FermionField &in, FermionField &out) ;
private:
RealD mu; // TwistedMass parameter
private:
RealD mu; // TwistedMass parameter
};
};
NAMESPACE_END(Grid);
}}
#endif

View File

@ -30,126 +30,123 @@ Author: paboyle <paboyle@ph.ed.ac.uk> ; NB Christoph did similar in GPT
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonFermion.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Impl>
class WilsonTMFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
WilsonTMFermion5D(GaugeField &_Umu,
GridCartesian &Fgrid,
GridRedBlackCartesian &Frbgrid,
GridCartesian &Ugrid,
GridRedBlackCartesian &Urbgrid,
const std::vector<RealD> _mass,
const std::vector<RealD> _mu,
const ImplParams &p= ImplParams()
) :
WilsonFermion5D<Impl>(_Umu,
Fgrid,
Frbgrid,
Ugrid,
Urbgrid,
4.0,p)
{
update(_mass,_mu);
}
virtual void Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
this->DhopEO(in, out, DaggerNo);
} else {
this->DhopOE(in, out, DaggerNo);
}
}
virtual void MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
this->DhopEO(in, out, DaggerYes);
} else {
this->DhopOE(in, out, DaggerYes);
}
}
// allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
//axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,this->mu[s]);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,-this->mu[s]);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeInv(const FermionField &in, FermionField &out) {
for (int s=0;s<(int)this->mass.size();s++) {
RealD m = this->mass[s];
RealD tm = this->mu[s];
RealD mtil = 4.0+this->mass[s];
RealD sq = mtil*mtil+tm*tm;
ComplexD a = mtil/sq;
ComplexD b(0.0, -tm /sq);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeInvDag(const FermionField &in, FermionField &out) {
for (int s=0;s<(int)this->mass.size();s++) {
RealD m = this->mass[s];
RealD tm = this->mu[s];
RealD mtil = 4.0+this->mass[s];
RealD sq = mtil*mtil+tm*tm;
ComplexD a = mtil/sq;
ComplexD b(0.0,tm /sq);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual RealD M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
this->Dhop(in, out, DaggerNo);
FermionField tmp(out._grid);
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,this->mu[s]);
axpbg5y_ssp(tmp,a,in,b,in,s,s);
}
return axpy_norm(out, 1.0, tmp, out);
}
// needed for fast PV
void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu) {
assert(_mass.size() == _mu.size());
assert(_mass.size() == this->FermionGrid()->_fdimensions[0]);
this->mass = _mass;
this->mu = _mu;
}
private:
std::vector<RealD> mu;
std::vector<RealD> mass;
};
template<class Impl>
class WilsonTMFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
WilsonTMFermion5D(GaugeField &_Umu,
GridCartesian &Fgrid,
GridRedBlackCartesian &Frbgrid,
GridCartesian &Ugrid,
GridRedBlackCartesian &Urbgrid,
const std::vector<RealD> _mass,
const std::vector<RealD> _mu,
const ImplParams &p= ImplParams()
) :
WilsonFermion5D<Impl>(_Umu,
Fgrid,
Frbgrid,
Ugrid,
Urbgrid,
4.0,p)
typedef WilsonTMFermion5D<WilsonImplF> WilsonTMFermion5DF;
typedef WilsonTMFermion5D<WilsonImplD> WilsonTMFermion5DD;
{
update(_mass,_mu);
}
virtual void Meooe(const FermionField &in, FermionField &out) {
if (in.Checkerboard() == Odd) {
this->DhopEO(in, out, DaggerNo);
} else {
this->DhopOE(in, out, DaggerNo);
}
}
virtual void MeooeDag(const FermionField &in, FermionField &out) {
if (in.Checkerboard() == Odd) {
this->DhopEO(in, out, DaggerYes);
} else {
this->DhopOE(in, out, DaggerYes);
}
}
// allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard();
//axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,this->mu[s]);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeDag(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard();
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,-this->mu[s]);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeInv(const FermionField &in, FermionField &out) {
for (int s=0;s<(int)this->mass.size();s++) {
RealD m = this->mass[s];
RealD tm = this->mu[s];
RealD mtil = 4.0+this->mass[s];
RealD sq = mtil*mtil+tm*tm;
ComplexD a = mtil/sq;
ComplexD b(0.0, -tm /sq);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeInvDag(const FermionField &in, FermionField &out) {
for (int s=0;s<(int)this->mass.size();s++) {
RealD m = this->mass[s];
RealD tm = this->mu[s];
RealD mtil = 4.0+this->mass[s];
RealD sq = mtil*mtil+tm*tm;
ComplexD a = mtil/sq;
ComplexD b(0.0,tm /sq);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual RealD M(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard();
this->Dhop(in, out, DaggerNo);
FermionField tmp(out.Grid());
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,this->mu[s]);
axpbg5y_ssp(tmp,a,in,b,in,s,s);
}
return axpy_norm(out, 1.0, tmp, out);
}
// needed for fast PV
void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu) {
assert(_mass.size() == _mu.size());
assert(_mass.size() == this->FermionGrid()->_fdimensions[0]);
this->mass = _mass;
this->mu = _mu;
}
private:
std::vector<RealD> mu;
std::vector<RealD> mass;
};
typedef WilsonTMFermion5D<WilsonImplF> WilsonTMFermion5DF;
typedef WilsonTMFermion5D<WilsonImplD> WilsonTMFermion5DD;
}}
NAMESPACE_END(Grid);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,56 +24,50 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_ZMOBIUS_FERMION_H
#define GRID_QCD_ZMOBIUS_FERMION_H
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
NAMESPACE_BEGIN(Grid);
namespace QCD {
template<class Impl>
class ZMobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
template<class Impl>
class ZMobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
ZMobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) :
virtual void Instantiatable(void) {};
// Constructors
ZMobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
std::vector<Coeff_t> zgamma(this->Ls);
for(int s=0;s<this->Ls;s++){
zgamma[s] = gamma[s];
}
// Call base setter
this->SetCoefficientsInternal(1.0,zgamma,b,c);
}
};
{
// RealD eps = 1.0;
std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
Vector<Coeff_t> zgamma(this->Ls);
for(int s=0;s<this->Ls;s++){
zgamma[s] = gamma[s];
}
// Call base setter
this->SetCoefficientsInternal(1.0,zgamma,b,c);
}
}
#endif
};
NAMESPACE_END(Grid);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -26,19 +26,19 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
NAMESPACE_BEGIN(Grid);
/*
* Dense matrix versions of routines
*/
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
@ -54,10 +54,10 @@ template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{
int Ls=this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs;
chi.checkerboard=psi.checkerboard;
chi.Checkerboard()=psi.Checkerboard();
assert(Ls==LLs);
@ -96,15 +96,14 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
}
// For the non-vectorised s-direction this is simple
for(auto site=0;site<vol;site++){
thread_loop( (auto site=0;site<vol;site++), {
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0;s1<Ls;s1++){
SiteChi =zero;
SiteChi =Zero();
for(int s2=0;s2<Ls;s2++){
int lex2 = s2+Ls*site;
@ -120,7 +119,7 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
});
}
#ifdef CAYLEY_DPERP_DENSE
@ -153,4 +152,4 @@ template void CayleyFermion5D<ZWilsonImplFH>::MooeeInternal(const FermionField &
template void CayleyFermion5D<ZWilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
#endif
}}
NAMESPACE_END(Grid);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -26,26 +26,24 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
// Pminus fowards
// Pplus backwards
template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
Coeff_t one(1.0);
int Ls=this->Ls;
@ -66,9 +64,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
Coeff_t one(1.0);
int Ls=this->Ls;
@ -91,7 +89,7 @@ void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard=psi.checkerboard;
chi.Checkerboard()=psi.Checkerboard();
int Ls=this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0]
@ -120,7 +118,7 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard=psi.checkerboard;
chi.Checkerboard()=psi.Checkerboard();
int Ls=this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0]
@ -145,20 +143,19 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
#ifdef CAYLEY_DPERP_LINALG
INSTANTIATE_DPERP(WilsonImplF);
INSTANTIATE_DPERP(WilsonImplD);
INSTANTIATE_DPERP(GparityWilsonImplF);
INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
INSTANTIATE_DPERP(WilsonImplF);
INSTANTIATE_DPERP(WilsonImplD);
INSTANTIATE_DPERP(GparityWilsonImplF);
INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
#endif
}
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,158 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
/*
* Dense matrix versions of routines
*/
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs;
chi.Checkerboard() = psi.Checkerboard();
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->dp;
Pminus(Ls-1,0) = this->dm;
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
for(auto site=0; site<vol; site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = Zero();
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
}
if(PminusMat(s1,s2) != 0.0){
spProj5m(SitePminus, psi[lex2]);
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
}
#ifdef DOMAIN_WALL_EOFA_DPERP_DENSE
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
template void DomainWallEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
template void DomainWallEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
NAMESPACE_END(Grid);

View File

@ -0,0 +1,167 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
FermionField tmp(psi.Grid());
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1);
}
axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1);
axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
FermionField tmp(psi.Grid());
// Apply (U^{\prime})^{-dagger}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1);
axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
}
}
#ifdef DOMAIN_WALL_EOFA_DPERP_LINALG
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
#endif
NAMESPACE_END(Grid);

View File

@ -0,0 +1,183 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
/*
* Dense matrix versions of routines
*/
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs;
int pm = this->pm;
RealD shift = this->shift;
RealD alpha = this->alpha;
RealD k = this->k;
RealD mq1 = this->mq1;
chi.Checkerboard() = psi.Checkerboard();
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = mq1*this->cee[0];
Pminus(Ls-1,0) = mq1*this->cee[Ls-1];
if(shift != 0.0){
Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
for(int s=0; s<Ls; ++s){
if(pm == 1){ Pplus(s,Ls-1) += shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
else{ Pminus(Ls-1-s,Ls-1) -= shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
}
}
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if(inv){
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
for(auto site=0; site<vol; site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = Zero();
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
}
if(PminusMat(s1,s2) != 0.0){
spProj5m(SitePminus, psi[lex2]);
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
}
#ifdef MOBIUS_EOFA_DPERP_DENSE
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
template void MobiusEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
template void MobiusEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
NAMESPACE_END(Grid);

View File

@ -0,0 +1,289 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper,
Vector<Coeff_t>& shift_coeffs)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper,
Vector<Coeff_t>& shift_coeffs)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
}
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
FermionField tmp(psi.Grid());
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0);
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
}
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
// Apply U^{-1} and add shift term
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
FermionField tmp(psi.Grid());
// Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0]
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0);
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
// Apply L^{-dagger} and add shift
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
}
}
#ifdef MOBIUS_EOFA_DPERP_LINALG
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
#endif
NAMESPACE_END(Grid);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -23,13 +23,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef G5_HERMITIAN_LINOP
#define G5_HERMITIAN_LINOP
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////
// Wrap an already herm matrix
@ -46,12 +45,12 @@ public:
HermOp(in,out);
}
void OpDiag (const Field &in, Field &out) {
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.Mdiag(in,tmp);
G5R5(out,tmp);
}
void OpDir (const Field &in, Field &out,int dir,int disp) {
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.Mdir(in,tmp,dir,disp);
G5R5(out,tmp);
}
@ -68,7 +67,7 @@ public:
n2=real(dot);
}
void HermOp(const Field &in, Field &out){
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.M(in,tmp);
G5R5(out,tmp);
}
@ -80,7 +79,7 @@ class Gamma5HermitianLinearOperator : public LinearOperatorBase<Field> {
Matrix &_Mat;
Gamma g5;
public:
Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat), g5(Gamma::Algebra::Gamma5) {};
Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat), g5(Gamma::Algebra::Gamma5) {};
void Op (const Field &in, Field &out){
HermOp(in,out);
}
@ -88,12 +87,12 @@ public:
HermOp(in,out);
}
void OpDiag (const Field &in, Field &out) {
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.Mdiag(in,tmp);
out=g5*tmp;
}
void OpDir (const Field &in, Field &out,int dir,int disp) {
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.Mdir(in,tmp,dir,disp);
out=g5*tmp;
}
@ -110,12 +109,11 @@ public:
n2=real(dot);
}
void HermOp(const Field &in, Field &out){
Field tmp(in._grid);
Field tmp(in.Grid());
_Mat.M(in,tmp);
out=g5*tmp;
}
};
}}
NAMESPACE_END(Grid);
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -26,31 +26,30 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Impl>
CayleyFermion5D<Impl>::CayleyFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_M5,p),
mass(_mass)
{
}
template<class Impl>
CayleyFermion5D<Impl>::CayleyFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_M5,p),
mass(_mass)
{
}
///////////////////////////////////////////////////////////////
// Physical surface field utilities
@ -61,8 +60,8 @@ void CayleyFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &so
int Ls = this->Ls;
FermionField tmp(this->FermionGrid());
tmp = solution5d;
conformable(solution5d._grid,this->FermionGrid());
conformable(exported4d._grid,this->GaugeGrid());
conformable(solution5d.Grid(),this->FermionGrid());
conformable(exported4d.Grid(),this->GaugeGrid());
axpby_ssp_pminus(tmp, 0., solution5d, 1., solution5d, 0, 0);
axpby_ssp_pplus (tmp, 1., tmp , 1., solution5d, 0, Ls-1);
ExtractSlice(exported4d, tmp, 0, 0);
@ -71,7 +70,7 @@ template<class Impl>
void CayleyFermion5D<Impl>::P(const FermionField &psi, FermionField &chi)
{
int Ls= this->Ls;
chi=zero;
chi=Zero();
for(int s=0;s<Ls;s++){
axpby_ssp_pminus(chi,1.0,chi,1.0,psi,s,s);
axpby_ssp_pplus (chi,1.0,chi,1.0,psi,s,(s+1)%Ls);
@ -81,7 +80,7 @@ template<class Impl>
void CayleyFermion5D<Impl>::Pdag(const FermionField &psi, FermionField &chi)
{
int Ls= this->Ls;
chi=zero;
chi=Zero();
for(int s=0;s<Ls;s++){
axpby_ssp_pminus(chi,1.0,chi,1.0,psi,s,s);
axpby_ssp_pplus (chi,1.0,chi,1.0,psi,s,(s-1+Ls)%Ls);
@ -93,8 +92,8 @@ void CayleyFermion5D<Impl>::ExportPhysicalFermionSource(const FermionField &solu
int Ls = this->Ls;
FermionField tmp(this->FermionGrid());
tmp = solution5d;
conformable(solution5d._grid,this->FermionGrid());
conformable(exported4d._grid,this->GaugeGrid());
conformable(solution5d.Grid(),this->FermionGrid());
conformable(exported4d.Grid(),this->GaugeGrid());
axpby_ssp_pplus (tmp, 0., solution5d, 1., solution5d, 0, 0);
axpby_ssp_pminus(tmp, 1., tmp , 1., solution5d, 0, Ls-1);
ExtractSlice(exported4d, tmp, 0, 0);
@ -104,9 +103,9 @@ void CayleyFermion5D<Impl>::ImportUnphysicalFermion(const FermionField &input4d,
{
int Ls = this->Ls;
FermionField tmp(this->FermionGrid());
conformable(imported5d._grid,this->FermionGrid());
conformable(input4d._grid ,this->GaugeGrid());
tmp = zero;
conformable(imported5d.Grid(),this->FermionGrid());
conformable(input4d.Grid() ,this->GaugeGrid());
tmp = Zero();
InsertSlice(input4d, tmp, 0 , 0);
InsertSlice(input4d, tmp, Ls-1, 0);
axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, 0, 0);
@ -119,9 +118,9 @@ void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &inpu
{
int Ls = this->Ls;
FermionField tmp(this->FermionGrid());
conformable(imported5d._grid,this->FermionGrid());
conformable(input4d._grid ,this->GaugeGrid());
tmp = zero;
conformable(imported5d.Grid(),this->FermionGrid());
conformable(input4d.Grid() ,this->GaugeGrid());
tmp = Zero();
InsertSlice(input4d, tmp, 0 , 0);
InsertSlice(input4d, tmp, Ls-1, 0);
axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, 0, 0);
@ -156,7 +155,7 @@ void CayleyFermion5D<Impl>::DminusDag(const FermionField &psi, FermionField &chi
template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void)
{
this->Report();
std::vector<int> latt = GridDefaultLatt();
Coordinate latt = GridDefaultLatt();
RealD volume = this->Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
RealD NP = this->_FourDimGrid->_Nprocessors;
if ( M5Dcalls > 0 ) {
@ -164,10 +163,16 @@ template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void)
std::cout << GridLogMessage << "CayleyFermion5D Number of M5D Calls : " << M5Dcalls << std::endl;
std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << M5Dtime / M5Dcalls << " us" << std::endl;
// Flops = 6.0*(Nc*Ns) *Ls*vol
RealD mflops = 6.0*12*volume*M5Dcalls/M5Dtime/2; // 2 for red black counting
// Flops = 10.0*(Nc*Ns) *Ls*vol
RealD mflops = 10.0*(Nc*Ns)*volume*M5Dcalls/M5Dtime/2; // 2 for red black counting
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
// Bytes = sizeof(Real) * (Nc*Ns*Nreim) * Ls * vol * (read+write) (/2 for red black counting)
// read = 2 ( psi[ss+s+1] and psi[ss+s-1] count as 1 )
// write = 1
RealD Gbytes = sizeof(Real) * (Nc*Ns*2) * volume * 3 /2. * 1.e-9;
std::cout << GridLogMessage << "Average bandwidth (GB/s) : " << Gbytes/M5Dtime*M5Dcalls*1.e6 << std::endl;
}
if ( MooeeInvCalls > 0 ) {
@ -175,11 +180,16 @@ template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void)
std::cout << GridLogMessage << "#### MooeeInv calls report " << std::endl;
std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl;
std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl;
#ifdef GRID_NVCC
RealD mflops = ( -16.*Nc*Ns+this->Ls*(1.+18.*Nc*Ns) )*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
#else
// Flops = MADD * Ls *Ls *4dvol * spin/colour/complex
RealD mflops = 2.0*24*this->Ls*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
#endif
}
}
@ -198,18 +208,18 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag (Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
Vector<Coeff_t> diag (Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
M5D(psi,chi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag = bs;
std::vector<Coeff_t> upper= cs;
std::vector<Coeff_t> lower= cs;
Vector<Coeff_t> diag = bs;
Vector<Coeff_t> upper= cs;
Vector<Coeff_t> lower= cs;
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5D(psi,psi,Din,lower,diag,upper);
@ -218,9 +228,9 @@ void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &D
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag = beo;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
Vector<Coeff_t> diag = beo;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-ceo[i];
lower[i]=-ceo[i];
@ -233,9 +243,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag = bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
Vector<Coeff_t> diag = bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-cee[i];
lower[i]=-cee[i];
@ -248,9 +258,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag = bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
Vector<Coeff_t> diag = bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for (int s=0;s<Ls;s++){
// Assemble the 5d matrix
@ -278,9 +288,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0);
std::vector<Coeff_t> lower(Ls,-1.0);
Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0);
Vector<Coeff_t> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper);
@ -290,9 +300,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag =bs;
std::vector<Coeff_t> upper=cs;
std::vector<Coeff_t> lower=cs;
Vector<Coeff_t> diag =bs;
Vector<Coeff_t> upper=cs;
Vector<Coeff_t> lower=cs;
for (int s=0;s<Ls;s++){
if ( s== 0 ) {
@ -315,9 +325,7 @@ void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField
template<class Impl>
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
FermionField Din(psi._grid);
FermionField Din(psi.Grid());
// Assemble Din
Meooe5D(psi,Din);
@ -337,7 +345,7 @@ RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
//D1+ D1- P- -> D1+^dag P+ D2-^dag
//D2- P+ D2+ P-D1-^dag D2+dag
FermionField Din(psi._grid);
FermionField Din(psi.Grid());
// Apply Dw
this->DW(psi,Din,DaggerYes);
@ -353,11 +361,9 @@ RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
template<class Impl>
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
Meooe5D(psi,this->tmp());
if ( psi.checkerboard == Odd ) {
if ( psi.Checkerboard() == Odd ) {
this->DhopEO(this->tmp(),chi,DaggerNo);
} else {
this->DhopOE(this->tmp(),chi,DaggerNo);
@ -368,7 +374,7 @@ template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
{
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
if ( psi.Checkerboard() == Odd ) {
this->DhopEO(psi,this->tmp(),DaggerYes);
} else {
this->DhopOE(psi,this->tmp(),DaggerYes);
@ -386,7 +392,7 @@ void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,in
template<class Impl>
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
FermionField Din(V.Grid());
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
@ -401,7 +407,7 @@ void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const
template<class Impl>
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
FermionField Din(V.Grid());
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
@ -416,7 +422,7 @@ void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const
template<class Impl>
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
FermionField Din(V.Grid());
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
@ -433,7 +439,7 @@ void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
{
std::vector<Coeff_t> gamma(this->Ls);
Vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(1.0,gamma,b,c);
}
@ -441,13 +447,13 @@ void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,Re
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
{
std::vector<Coeff_t> gamma(this->Ls);
Vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(zolo_hi,gamma,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c)
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t> & gamma,RealD b,RealD c)
{
int Ls=this->Ls;
@ -568,12 +574,12 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
dee[Ls-1] += delta_d;
}
int inv=1;
this->MooeeInternalCompute(0,inv,MatpInv,MatmInv);
this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
// int inv=1;
// this->MooeeInternalCompute(0,inv,MatpInv,MatmInv);
// this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
}
#if 0
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv,
Vector<iSinglet<Simd> > & Matp,
@ -628,35 +634,32 @@ void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv,
Matm.resize(Ls*LLs);
for(int s2=0;s2<Ls;s2++){
for(int s1=0;s1<LLs;s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type *)&Vp;
scalar_type *sm = (scalar_type *)&Vm;
for(int l=0;l<Nsimd;l++){
if ( switcheroo<Coeff_t>::iscomplex() ) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
for(int s1=0;s1<LLs;s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type *)&Vp;
scalar_type *sm = (scalar_type *)&Vm;
for(int l=0;l<Nsimd;l++){
if ( switcheroo<Coeff_t>::iscomplex() ) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
}
}
}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
}
FermOpTemplateInstantiate(CayleyFermion5D);
GparityFermOpTemplateInstantiate(CayleyFermion5D);
}}
#endif
NAMESPACE_END(Grid);

View File

@ -0,0 +1,235 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
NAMESPACE_BEGIN(Grid);
// Pminus fowards
// Pplus backwards..
template<class Impl>
void
CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
int Ls =this->Ls;
// 10 = 3 complex mult + 2 complex add
// Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting)
M5Dcalls++;
M5Dtime-=usecond();
uint64_t nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss= sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1, tmp2;
for(int s=0;s<Ls;s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1,psi(idx_u));
spProj5p(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],diag[s]*phi(ss+s)+upper[s]*tmp1+lower[s]*tmp2);
}
});
M5Dtime+=usecond();
}
template<class Impl>
void
CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
int Ls=this->Ls;
// Flops = 6.0*(Nc*Ns) *Ls*vol
M5Dcalls++;
M5Dtime-=usecond();
uint64_t nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2;
for(int s=0;s<Ls;s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1,psi(idx_u));
spProj5m(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],diag[s]*phi(ss+s)+upper[s]*tmp1+lower[s]*tmp2);
}
});
M5Dtime+=usecond();
}
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi_i)
{
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
int Ls=this->Ls;
auto plee = & lee [0];
auto pdee = & dee [0];
auto puee = & uee [0];
auto pleem = & leem[0];
auto pueem = & ueem[0];
MooeeInvCalls++;
MooeeInvTime-=usecond();
uint64_t nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
// Apply (L^{\prime})^{-1}
coalescedWrite(chi[ss],psi(ss)); // chi[0]=psi[0]
for(int s=1;s<Ls;s++){
spProj5p(tmp,chi(ss+s-1));
coalescedWrite(chi[ss+s] , psi(ss+s)-plee[s-1]*tmp);
}
// L_m^{-1}
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -pleem[s]P_- chi
spProj5m(tmp,chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp);
}
// U_m^{-1} D^{-1}
for (int s=0;s<Ls-1;s++){
// Chi[s] + 1/d chi[s]
spProj5p(tmp,chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s)-(pueem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply U^{-1}
for (int s=Ls-2;s>=0;s--){
spProj5m(tmp,chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp);
}
});
MooeeInvTime+=usecond();
}
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i)
{
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
int Ls=this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
auto plee = & lee [0];
auto pdee = & dee [0];
auto puee = & uee [0];
auto pleem = & leem[0];
auto pueem = & ueem[0];
assert(psi.Checkerboard() == psi.Checkerboard());
MooeeInvCalls++;
MooeeInvTime-=usecond();
uint64_t nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
// Apply (U^{\prime})^{-dagger}
coalescedWrite(chi[ss],psi(ss));
for (int s=1;s<Ls;s++){
spProj5m(tmp,chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s)-conjugate(puee[s-1])*tmp);
}
// U_m^{-\dagger}
for (int s=0;s<Ls-1;s++){
spProj5p(tmp,chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - conjugate(pueem[s])*tmp);
}
// L_m^{-\dagger} D^{-dagger}
for (int s=0;s<Ls-1;s++){
spProj5m(tmp,chi(ss+Ls-1));
coalescedWrite(chi[ss+s], conjugate(1.0/pdee[s])*chi(ss+s)-conjugate(pleem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], conjugate(1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply L^{-dagger}
for (int s=Ls-2;s>=0;s--){
spProj5p(tmp,chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - conjugate(plee[s])*tmp);
}
});
MooeeInvTime+=usecond();
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,831 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
NAMESPACE_BEGIN(Grid);
/*
* Dense matrix versions of routines
*/
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
{
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
}
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
{
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
}
template<class Impl>
void
CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd= Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs==nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
scalar_type * l_p = (scalar_type *)&l[0];
scalar_type * d_p = (scalar_type *)&d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o+i*LLs;
int ss = o*nsimd+i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
M5Dcalls++;
M5Dtime-=usecond();
assert(Nc==3);
thread_loop( (int ss=0;ss<grid->oSites();ss+=LLs),{ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0;v<LLs;v++){
int vp=(v+1)%LLs;
int vm=(v+LLs-1)%LLs;
spProj5m(hp,psi[ss+vp]);
spProj5p(hm,psi[ss+vm]);
if ( vp<=v ) rotate(hp,hp,1);
if ( vm>=v ) rotate(hm,hm,nsimd-1);
hp=0.5*hp;
hm=0.5*hm;
spRecon5m(fp,hp);
spRecon5p(fm,hm);
chi[ss+v] = d[v]*phi[ss+v];
chi[ss+v] = chi[ss+v] +u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0;v<LLs;v++){
vprefetch(psi[ss+v+LLs]);
int vp= (v==LLs-1) ? 0 : v+1;
int vm= (v==0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if ( vp<=v ) {
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if ( vm>=v ) {
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_12);
vstream(chi[ss+v]()(0)(0),p_00);
vstream(chi[ss+v]()(0)(1),p_01);
vstream(chi[ss+v]()(0)(2),p_02);
vstream(chi[ss+v]()(1)(0),p_10);
vstream(chi[ss+v]()(1)(1),p_11);
vstream(chi[ss+v]()(1)(2),p_12);
vstream(chi[ss+v]()(2)(0),p_20);
vstream(chi[ss+v]()(2)(1),p_21);
vstream(chi[ss+v]()(2)(2),p_22);
vstream(chi[ss+v]()(3)(0),p_30);
vstream(chi[ss+v]()(3)(1),p_31);
vstream(chi[ss+v]()(3)(2),p_32);
}
#endif
});
M5Dtime+=usecond();
}
template<class Impl>
void
CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
Vector<Coeff_t> &lower,
Vector<Coeff_t> &diag,
Vector<Coeff_t> &upper)
{
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi=psi_i.View();
auto phi=phi_i.View();
auto chi=chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd= Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs==nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
scalar_type * l_p = (scalar_type *)&l[0];
scalar_type * d_p = (scalar_type *)&d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o+i*LLs;
int ss = o*nsimd+i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
M5Dcalls++;
M5Dtime-=usecond();
thread_loop( (int ss=0;ss<grid->oSites();ss+=LLs),{ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0;v<LLs;v++){
int vp=(v+1)%LLs;
int vm=(v+LLs-1)%LLs;
spProj5p(hp,psi[ss+vp]);
spProj5m(hm,psi[ss+vm]);
if ( vp<=v ) rotate(hp,hp,1);
if ( vm>=v ) rotate(hm,hm,nsimd-1);
hp=hp*0.5;
hm=hm*0.5;
spRecon5p(fp,hp);
spRecon5m(fm,hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0;v<LLs;v++){
vprefetch(psi[ss+v+LLs]);
int vp= (v==LLs-1) ? 0 : v+1;
int vm= (v==0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if ( vp<=v ) {
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if ( vm>=v ) {
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_12);
vstream(chi[ss+v]()(0)(0),p_00);
vstream(chi[ss+v]()(0)(1),p_01);
vstream(chi[ss+v]()(0)(2),p_02);
vstream(chi[ss+v]()(1)(0),p_10);
vstream(chi[ss+v]()(1)(1),p_11);
vstream(chi[ss+v]()(1)(2),p_12);
vstream(chi[ss+v]()(2)(0),p_20);
vstream(chi[ss+v]()(2)(1),p_21);
vstream(chi[ss+v]()(2)(2),p_22);
vstream(chi[ss+v]()(3)(0),p_30);
vstream(chi[ss+v]()(3)(1),p_31);
vstream(chi[ss+v]()(3)(2),p_32);
}
#endif
});
M5Dtime+=usecond();
}
#ifdef AVX512
#include <simd/Intel512common.h>
#include <simd/Intel512avx.h>
#include <simd/Intel512single.h>
#endif
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi_i, FermionField &chi_i,
int LLs, int site,
Vector<iSinglet<Simd> > &Matp,
Vector<iSinglet<Simd> > &Matm)
{
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
auto psi = psi_i.View();
auto chi = chi_i.View();
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
int s=s2+l*LLs;
int lex=s2+LLs*site;
if ( s2==0 && l==0) {
SiteChiP=Zero();
SiteChiM=Zero();
}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastP()(sp )(co),psi[lex]()(sp)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastM()(sp )(co),psi[lex]()(sp+2)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
SiteChiP()(sp)(co)=real_madd(Matp[LLs*s+s1]()()(),BcastP()(sp)(co),SiteChiP()(sp)(co)); // 1100 us.
SiteChiM()(sp)(co)=real_madd(Matm[LLs*s+s1]()()(),BcastM()(sp)(co),SiteChiM()(sp)(co)); // each found by commenting out
}}
}}
{
int lex = s1+LLs*site;
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %%zmm1
#define Chi_01 %%zmm2
#define Chi_02 %%zmm3
#define Chi_10 %%zmm4
#define Chi_11 %%zmm5
#define Chi_12 %%zmm6
#define Chi_20 %%zmm7
#define Chi_21 %%zmm8
#define Chi_22 %%zmm9
#define Chi_30 %%zmm10
#define Chi_31 %%zmm11
#define Chi_32 %%zmm12
#define BCAST0 %%zmm13
#define BCAST1 %%zmm14
#define BCAST2 %%zmm15
#define BCAST3 %%zmm16
#define BCAST4 %%zmm17
#define BCAST5 %%zmm18
#define BCAST6 %%zmm19
#define BCAST7 %%zmm20
#define BCAST8 %%zmm21
#define BCAST9 %%zmm22
#define BCAST10 %%zmm23
#define BCAST11 %%zmm24
int incr=LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
int lex=s2+LLs*site;
uint64_t a0 = (uint64_t)&Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t)&Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t)&psi[lex];
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
if ( (s2+l)==0 ) {
asm (
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
VBCASTCDUP(0,%2,BCAST0)
VBCASTCDUP(1,%2,BCAST1)
VBCASTCDUP(2,%2,BCAST2)
VBCASTCDUP(3,%2,BCAST3)
VBCASTCDUP(4,%2,BCAST4) VMULMEM (0,%0,BCAST0,Chi_00)
VBCASTCDUP(5,%2,BCAST5) VMULMEM (0,%0,BCAST1,Chi_01)
VBCASTCDUP(6,%2,BCAST6) VMULMEM (0,%0,BCAST2,Chi_02)
VBCASTCDUP(7,%2,BCAST7) VMULMEM (0,%0,BCAST3,Chi_10)
VBCASTCDUP(8,%2,BCAST8) VMULMEM (0,%0,BCAST4,Chi_11)
VBCASTCDUP(9,%2,BCAST9) VMULMEM (0,%0,BCAST5,Chi_12)
VBCASTCDUP(10,%2,BCAST10) VMULMEM (0,%1,BCAST6,Chi_20)
VBCASTCDUP(11,%2,BCAST11) VMULMEM (0,%1,BCAST7,Chi_21)
VMULMEM (0,%1,BCAST8,Chi_22)
VMULMEM (0,%1,BCAST9,Chi_30)
VMULMEM (0,%1,BCAST10,Chi_31)
VMULMEM (0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
} else {
asm (
VBCASTCDUP(0,%2,BCAST0) VMADDMEM (0,%0,BCAST0,Chi_00)
VBCASTCDUP(1,%2,BCAST1) VMADDMEM (0,%0,BCAST1,Chi_01)
VBCASTCDUP(2,%2,BCAST2) VMADDMEM (0,%0,BCAST2,Chi_02)
VBCASTCDUP(3,%2,BCAST3) VMADDMEM (0,%0,BCAST3,Chi_10)
VBCASTCDUP(4,%2,BCAST4) VMADDMEM (0,%0,BCAST4,Chi_11)
VBCASTCDUP(5,%2,BCAST5) VMADDMEM (0,%0,BCAST5,Chi_12)
VBCASTCDUP(6,%2,BCAST6) VMADDMEM (0,%1,BCAST6,Chi_20)
VBCASTCDUP(7,%2,BCAST7) VMADDMEM (0,%1,BCAST7,Chi_21)
VBCASTCDUP(8,%2,BCAST8) VMADDMEM (0,%1,BCAST8,Chi_22)
VBCASTCDUP(9,%2,BCAST9) VMADDMEM (0,%1,BCAST9,Chi_30)
VBCASTCDUP(10,%2,BCAST10) VMADDMEM (0,%1,BCAST10,Chi_31)
VBCASTCDUP(11,%2,BCAST11) VMADDMEM (0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
}
a0 = a0+incr;
a1 = a1+incr;
a2 = a2+sizeof(typename Simd::scalar_type);
}}
{
int lexa = s1+LLs*site;
asm (
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
// Z-mobius version
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi_i, FermionField &chi_i,
int LLs, int site, Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matm)
{
EnableIf<Impl::LsVectorised,int> sfinae=0;
#ifndef AVX512
{
auto psi = psi_i.View();
auto chi = chi_i.View();
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
int s=s2+l*LLs;
int lex=s2+LLs*site;
if ( s2==0 && l==0) {
SiteChiP=Zero();
SiteChiM=Zero();
}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastP()(sp )(co),psi[lex]()(sp)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vbroadcast(BcastM()(sp )(co),psi[lex]()(sp+2)(co),l);
}}
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
SiteChiP()(sp)(co)=SiteChiP()(sp)(co)+ Matp[LLs*s+s1]()()()*BcastP()(sp)(co);
SiteChiM()(sp)(co)=SiteChiM()(sp)(co)+ Matm[LLs*s+s1]()()()*BcastM()(sp)(co);
}}
}}
{
int lex = s1+LLs*site;
for(int sp=0;sp<2;sp++){
for(int co=0;co<Nc;co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
auto psi = psi_i.View();
auto chi = chi_i.View();
// pointers
// MASK_REGS;
#define Chi_00 %zmm0
#define Chi_01 %zmm1
#define Chi_02 %zmm2
#define Chi_10 %zmm3
#define Chi_11 %zmm4
#define Chi_12 %zmm5
#define Chi_20 %zmm6
#define Chi_21 %zmm7
#define Chi_22 %zmm8
#define Chi_30 %zmm9
#define Chi_31 %zmm10
#define Chi_32 %zmm11
#define pChi_00 %%zmm0
#define pChi_01 %%zmm1
#define pChi_02 %%zmm2
#define pChi_10 %%zmm3
#define pChi_11 %%zmm4
#define pChi_12 %%zmm5
#define pChi_20 %%zmm6
#define pChi_21 %%zmm7
#define pChi_22 %%zmm8
#define pChi_30 %%zmm9
#define pChi_31 %%zmm10
#define pChi_32 %%zmm11
#define BCAST_00 %zmm12
#define SHUF_00 %zmm13
#define BCAST_01 %zmm14
#define SHUF_01 %zmm15
#define BCAST_02 %zmm16
#define SHUF_02 %zmm17
#define BCAST_10 %zmm18
#define SHUF_10 %zmm19
#define BCAST_11 %zmm20
#define SHUF_11 %zmm21
#define BCAST_12 %zmm22
#define SHUF_12 %zmm23
#define Mp %zmm24
#define Mps %zmm25
#define Mm %zmm26
#define Mms %zmm27
#define N 8
int incr=LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0;s1<LLs;s1++){
for(int s2=0;s2<LLs;s2++){
int lex=s2+LLs*site;
uint64_t a0 = (uint64_t)&Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t)&Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t)&psi[lex];
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
if ( (s2+l)==0 ) {
LOAD64(%r8,a0);
LOAD64(%r9,a1);
LOAD64(%r10,a2);
asm (
VLOAD(0,%r8,Mp)// i r
VLOAD(0,%r9,Mm)
VSHUF(Mp,Mps) // r i
VSHUF(Mm,Mms)
VPREFETCH1(12,%r10) VPREFETCH1(13,%r10)
VPREFETCH1(14,%r10) VPREFETCH1(15,%r10)
VMULIDUP(0*N,%r10,Mps,Chi_00)
VMULIDUP(1*N,%r10,Mps,Chi_01)
VMULIDUP(2*N,%r10,Mps,Chi_02)
VMULIDUP(3*N,%r10,Mps,Chi_10)
VMULIDUP(4*N,%r10,Mps,Chi_11)
VMULIDUP(5*N,%r10,Mps,Chi_12)
VMULIDUP(6*N ,%r10,Mms,Chi_20)
VMULIDUP(7*N ,%r10,Mms,Chi_21)
VMULIDUP(8*N ,%r10,Mms,Chi_22)
VMULIDUP(9*N ,%r10,Mms,Chi_30)
VMULIDUP(10*N,%r10,Mms,Chi_31)
VMULIDUP(11*N,%r10,Mms,Chi_32)
VMADDSUBRDUP(0*N,%r10,Mp,Chi_00)
VMADDSUBRDUP(1*N,%r10,Mp,Chi_01)
VMADDSUBRDUP(2*N,%r10,Mp,Chi_02)
VMADDSUBRDUP(3*N,%r10,Mp,Chi_10)
VMADDSUBRDUP(4*N,%r10,Mp,Chi_11)
VMADDSUBRDUP(5*N,%r10,Mp,Chi_12)
VMADDSUBRDUP(6*N ,%r10,Mm,Chi_20)
VMADDSUBRDUP(7*N ,%r10,Mm,Chi_21)
VMADDSUBRDUP(8*N ,%r10,Mm,Chi_22)
VMADDSUBRDUP(9*N ,%r10,Mm,Chi_30)
VMADDSUBRDUP(10*N,%r10,Mm,Chi_31)
VMADDSUBRDUP(11*N,%r10,Mm,Chi_32)
);
} else {
LOAD64(%r8,a0);
LOAD64(%r9,a1);
LOAD64(%r10,a2);
asm (
VLOAD(0,%r8,Mp)
VSHUF(Mp,Mps)
VLOAD(0,%r9,Mm)
VSHUF(Mm,Mms)
VMADDSUBIDUP(0*N,%r10,Mps,Chi_00) // Mri * Pii +- Cir
VMADDSUBIDUP(1*N,%r10,Mps,Chi_01)
VMADDSUBIDUP(2*N,%r10,Mps,Chi_02)
VMADDSUBIDUP(3*N,%r10,Mps,Chi_10)
VMADDSUBIDUP(4*N,%r10,Mps,Chi_11)
VMADDSUBIDUP(5*N,%r10,Mps,Chi_12)
VMADDSUBIDUP(6 *N,%r10,Mms,Chi_20)
VMADDSUBIDUP(7 *N,%r10,Mms,Chi_21)
VMADDSUBIDUP(8 *N,%r10,Mms,Chi_22)
VMADDSUBIDUP(9 *N,%r10,Mms,Chi_30)
VMADDSUBIDUP(10*N,%r10,Mms,Chi_31)
VMADDSUBIDUP(11*N,%r10,Mms,Chi_32)
VMADDSUBRDUP(0*N,%r10,Mp,Chi_00) // Cir = Mir * Prr +- ( Mri * Pii +- Cir)
VMADDSUBRDUP(1*N,%r10,Mp,Chi_01) // Ci = MiPr + Ci + MrPi ; Cr = MrPr - ( MiPi - Cr)
VMADDSUBRDUP(2*N,%r10,Mp,Chi_02)
VMADDSUBRDUP(3*N,%r10,Mp,Chi_10)
VMADDSUBRDUP(4*N,%r10,Mp,Chi_11)
VMADDSUBRDUP(5*N,%r10,Mp,Chi_12)
VMADDSUBRDUP(6 *N,%r10,Mm,Chi_20)
VMADDSUBRDUP(7 *N,%r10,Mm,Chi_21)
VMADDSUBRDUP(8 *N,%r10,Mm,Chi_22)
VMADDSUBRDUP(9 *N,%r10,Mm,Chi_30)
VMADDSUBRDUP(10*N,%r10,Mm,Chi_31)
VMADDSUBRDUP(11*N,%r10,Mm,Chi_32)
);
}
a0 = a0+incr;
a1 = a1+incr;
a2 = a2+sizeof(typename Simd::scalar_type);
}}
{
int lexa = s1+LLs*site;
/*
SiteSpinor tmp;
asm (
VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02)
VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12)
VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22)
VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32)
: : "r" ((uint64_t)&tmp) : "memory" );
*/
asm (
VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02)
VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12)
VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22)
VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
// if ( 1 || (site==0) ) {
// std::cout<<site << " s1 "<<s1<<"\n\t"<<tmp << "\n't" << chi[lexa] <<"\n\t"<<tmp-chi[lexa]<<std::endl;
// }
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
template<class Impl>
void
CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{
EnableIf<Impl::LsVectorised,int> sfinae=0;
chi.Checkerboard()=psi.Checkerboard();
int Ls=this->Ls;
int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs;
Vector<iSinglet<Simd> > Matp;
Vector<iSinglet<Simd> > Matm;
Vector<iSinglet<Simd> > *_Matp;
Vector<iSinglet<Simd> > *_Matm;
// MooeeInternalCompute(dag,inv,Matp,Matm);
if ( inv && dag ) {
_Matp = &MatpInvDag;
_Matm = &MatmInvDag;
}
if ( inv && (!dag) ) {
_Matp = &MatpInv;
_Matm = &MatmInv;
}
if ( !inv ) {
MooeeInternalCompute(dag,inv,Matp,Matm);
_Matp = &Matp;
_Matm = &Matm;
}
assert(_Matp->size()==Ls*LLs);
MooeeInvCalls++;
MooeeInvTime-=usecond();
if ( switcheroo<Coeff_t>::iscomplex() ) {
thread_loop( (auto site=0;site<vol;site++),{
MooeeInternalZAsm(psi,chi,LLs,site,*_Matp,*_Matm);
});
} else {
thread_loop( (auto site=0;site<vol;site++),{
MooeeInternalAsm(psi,chi,LLs,site,*_Matp,*_Matm);
});
}
MooeeInvTime+=usecond();
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,321 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>
#pragma once
NAMESPACE_BEGIN(Grid);
template<class Impl>
void ContinuedFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale)
{
SetCoefficientsZolotarev(1.0/scale,zdata);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
{
// How to check Ls matches??
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
int Ls = this->Ls;
assert(zdata->db==Ls);// Beta has Ls coeffs
R=(1+this->mass)/(1-this->mass);
Beta.resize(Ls);
cc.resize(Ls);
cc_d.resize(Ls);
sqrt_cc.resize(Ls);
for(int i=0; i < Ls ; i++){
Beta[i] = zdata -> beta[i];
cc[i] = 1.0/Beta[i];
cc_d[i]=std::sqrt(cc[i]);
}
cc_d[Ls-1]=1.0;
for(int i=0; i < Ls-1 ; i++){
sqrt_cc[i]= std::sqrt(cc[i]*cc[i+1]);
}
sqrt_cc[Ls-2]=std::sqrt(cc[Ls-2]);
ZoloHiInv =1.0/zolo_hi;
dw_diag = (4.0-this->M5)*ZoloHiInv;
See.resize(Ls);
Aee.resize(Ls);
int sign=1;
for(int s=0;s<Ls;s++){
Aee[s] = sign * Beta[s] * dw_diag;
sign = - sign;
}
Aee[Ls-1] += R;
See[0] = Aee[0];
for(int s=1;s<Ls;s++){
See[s] = Aee[s] - 1.0/See[s-1];
}
for(int s=0;s<Ls;s++){
std::cout<<GridLogMessage <<"s = "<<s<<" Beta "<<Beta[s]<<" Aee "<<Aee[s] <<" See "<<See[s] <<std::endl;
}
}
template<class Impl>
RealD ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
FermionField D(psi.Grid());
this->DW(psi,D,DaggerNo);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*ZoloHiInv,D,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
} else if ( s==(Ls-1) ){
RealD R=(1.0+mass)/(1.0-mass);
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,D,sqrt_cc[s-1],psi,s,s-1);
ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,D,sqrt_cc[s],psi,s,s+1);
axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1);
}
sign=-sign;
}
return norm2(chi);
}
template<class Impl>
RealD ContinuedFractionFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
{
// This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
// The rest of matrix is symmetric.
// Can ignore "dag"
return M(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
int Ls = this->Ls;
this->DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,chi,0.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,chi,0.0,chi,s,s);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
// Apply 4d dslash
if ( psi.Checkerboard() == Odd ) {
this->DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
} else {
this->DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
}
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,chi,0.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,chi,0.0,chi,s,s);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
{
this->Meooe(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*dw_diag,psi,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
} else if ( s==(Ls-1) ){
// Drop the CC here.
double R=(1+mass)/(1-mass);
ag5xpby_ssp(chi,Beta[s]*dw_diag,psi,sqrt_cc[s-1],psi,s,s-1);
ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
} else {
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*dw_diag,psi,sqrt_cc[s],psi,s,s+1);
axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1);
}
sign=-sign;
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
this->Mooee(psi,chi);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
// Apply Linv
axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0);
for(int s=1;s<Ls;s++){
axpbg5y_ssp(chi,1.0/cc_d[s],psi,-1.0/See[s-1],chi,s,s-1);
}
// Apply Dinv
for(int s=0;s<Ls;s++){
ag5xpby_ssp(chi,1.0/See[s],chi,0.0,chi,s,s); //only appearance of See[0]
}
// Apply Uinv = (Linv)^T
axpby_ssp(chi,1.0/cc_d[Ls-1],chi,0.0,chi,Ls-1,Ls-1);
for(int s=Ls-2;s>=0;s--){
axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1);
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
this->MooeeInv(psi,chi);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDeriv(mat,D,V,DaggerNo);
};
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDerivOE(mat,D,V,DaggerNo);
};
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDerivEO(mat,D,V,DaggerNo);
};
// Constructors
template<class Impl>
ContinuedFractionFermion5D<Impl>::ContinuedFractionFermion5D(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5,p),
mass(_mass)
{
int Ls = this->Ls;
assert((Ls&0x1)==1); // Odd Ls required
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
conformable(solution5d.Grid(),this->FermionGrid());
conformable(exported4d.Grid(),this->GaugeGrid());
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
int Ls = this->Ls;
conformable(imported5d.Grid(),this->FermionGrid());
conformable(input4d.Grid() ,this->GaugeGrid());
FermionField tmp(this->FermionGrid());
tmp=Zero();
InsertSlice(input4d, tmp, Ls-1, Ls-1);
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
this->Dminus(tmp,imported5d);
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,227 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards..
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
int Ls = this->Ls;
GridBase* grid = psi_i.Grid();
auto phi = phi_i.View();
auto psi = psi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
auto nloop=grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
auto ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
for(int s=0; s<Ls; s++){
spinor tmp1, tmp2;
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1, psi(idx_u));
spProj5p(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
auto nloop=grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
typedef decltype(coalescedRead(psi[0])) spinor;
auto ss=sss*Ls;
for(int s=0; s<Ls; s++){
spinor tmp1, tmp2;
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1, psi(idx_u));
spProj5m(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionField& chi_i)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi=psi_i.View();
auto chi=chi_i.View();
int Ls = this->Ls;
auto plee = & this->lee[0];
auto pdee = & this->dee[0];
auto puee = & this->uee[0];
auto pleem = & this->leem[0];
auto pueem = & this->ueem[0];
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
uint64_t nloop=grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
auto ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2;
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
// Apply (L^{\prime})^{-1}
coalescedWrite(chi[ss],psi(ss)); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp1);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp1);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls])*tmp1);
}
spProj5m(tmp2, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1],(1.0/pdee[Ls])*tmp1 + (1.0/pdee[Ls-1])*tmp2);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp1, chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp1);
}
});
this->MooeeInvTime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, FermionField& chi_i)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
assert(psi.Checkerboard() == psi.Checkerboard());
Vector<Coeff_t> ueec(Ls);
Vector<Coeff_t> deec(Ls+1);
Vector<Coeff_t> leec(Ls);
Vector<Coeff_t> ueemc(Ls);
Vector<Coeff_t> leemc(Ls);
for(int s=0; s<ueec.size(); s++){
ueec[s] = conjugate(this->uee[s]);
deec[s] = conjugate(this->dee[s]);
leec[s] = conjugate(this->lee[s]);
ueemc[s] = conjugate(this->ueem[s]);
leemc[s] = conjugate(this->leem[s]);
}
deec[Ls] = conjugate(this->dee[Ls]);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
auto nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2;
auto ss=sss*Ls;
// Apply (U^{\prime})^{-dagger}
coalescedWrite(chi[ss], psi(ss));
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - ueec[s-1]*tmp1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - ueemc[s]*tmp1);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s] ,(1.0/deec[s])*chi(ss+s) - (leemc[s]/deec[Ls-1])*tmp1);
}
spProj5p(tmp2, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], (1.0/deec[Ls-1])*tmp1 + (1.0/deec[Ls])*tmp2);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp1, chi(ss+s+1));
coalescedWrite(chi[ss+s],chi(ss+s) - leec[s]*tmp1);
}
});
this->MooeeInvTime += usecond();
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,321 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
template<class Impl>
DomainWallEOFAFermion<Impl>::DomainWallEOFAFermion(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3,
RealD _shift, int _pm, RealD _M5, const ImplParams &p) :
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
_shift, _pm, _M5, 1.0, 0.0, p)
{
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);
assert(zdata->n == this->Ls);
std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl;
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
Approx::zolotarev_free(zdata);
}
/***************************************************************
* Additional EOFA operators only called outside the inverter.
* Since speed is not essential, simple axpby-style
* implementations should be fine.
***************************************************************/
template<class Impl>
void DomainWallEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
{
int Ls = this->Ls;
Din = Zero();
if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); }
else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); }
else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
}
// This is just the identity for DWF
template<class Impl>
void DomainWallEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; }
// This is just the identity for DWF
template<class Impl>
void DomainWallEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; }
/*****************************************************************************************************/
template<class Impl>
RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
{
FermionField Din(psi.Grid());
this->Meooe5D(psi, Din);
this->DW(Din, chi, DaggerNo);
axpby(chi, 1.0, 1.0, chi, psi);
this->M5D(psi, chi);
return(norm2(chi));
}
template<class Impl>
RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
{
FermionField Din(psi.Grid());
this->DW(psi, Din, DaggerYes);
this->MeooeDag5D(Din, chi);
this->M5Ddag(psi, chi);
axpby(chi, 1.0, 1.0, chi, psi);
return(norm2(chi));
}
/********************************************************************
* Performance critical fermion operators called inside the inverter
********************************************************************/
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int pm = this->pm;
RealD shift = this->shift;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
Coeff_t shiftp(0.0), shiftm(0.0);
if(shift != 0.0){
if(pm == 1){ shiftp = shift*(mq3-mq2); }
else{ shiftm = -shift*(mq3-mq2); }
}
Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftp;
#if(0)
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl;
for(int i=0; i<diag.size(); ++i){
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
}
for(int i=0; i<upper.size(); ++i){
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
}
for(int i=0; i<lower.size(); ++i){
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
}
#endif
this->M5D(psi, chi, chi, lower, diag, upper);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int pm = this->pm;
RealD shift = this->shift;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
Coeff_t shiftp(0.0), shiftm(0.0);
if(shift != 0.0){
if(pm == 1){ shiftp = shift*(mq3-mq2); }
else{ shiftm = -shift*(mq3-mq2); }
}
Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftm;
this->M5Ddag(psi, chi, chi, lower, diag, upper);
}
// half checkerboard operations
template<class Impl>
void DomainWallEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
Vector<Coeff_t> diag = this->bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] = this->dm;
lower[0] = this->dp;
this->M5D(psi, psi, chi, lower, diag, upper);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
Vector<Coeff_t> diag = this->bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] = this->dp;
lower[0] = this->dm;
this->M5Ddag(psi, psi, chi, lower, diag, upper);
}
/****************************************************************************************/
//Zolo
template<class Impl>
void DomainWallEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, Vector<Coeff_t>& gamma, RealD b, RealD c)
{
int Ls = this->Ls;
int pm = this->pm;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
RealD shift = this->shift;
////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form
////////////////////////////////////////////////////////
this->bs.resize(Ls);
this->cs.resize(Ls);
this->aee.resize(Ls);
this->aeo.resize(Ls);
this->bee.resize(Ls);
this->beo.resize(Ls);
this->cee.resize(Ls);
this->ceo.resize(Ls);
for(int i=0; i<Ls; ++i){
this->bee[i] = 4.0 - this->M5 + 1.0;
this->cee[i] = 1.0;
}
for(int i=0; i<Ls; ++i){
this->aee[i] = this->cee[i];
this->bs[i] = this->beo[i] = 1.0;
this->cs[i] = this->ceo[i] = 0.0;
}
//////////////////////////////////////////
// EOFA shift terms
//////////////////////////////////////////
if(pm == 1){
this->dp = mq1*this->cee[0] + shift*(mq3-mq2);
this->dm = mq1*this->cee[Ls-1];
} else if(this->pm == -1) {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1] - shift*(mq3-mq2);
} else {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1];
}
//////////////////////////////////////////
// LDU decomposition of eeoo
//////////////////////////////////////////
this->dee.resize(Ls+1);
this->lee.resize(Ls);
this->leem.resize(Ls);
this->uee.resize(Ls);
this->ueem.resize(Ls);
for(int i=0; i<Ls; ++i){
if(i < Ls-1){
this->lee[i] = -this->cee[i+1]/this->bee[i]; // sub-diag entry on the ith column
this->leem[i] = this->dm/this->bee[i];
for(int j=0; j<i; j++){ this->leem[i] *= this->aee[j]/this->bee[j]; }
this->dee[i] = this->bee[i];
this->uee[i] = -this->aee[i]/this->bee[i]; // up-diag entry on the ith row
this->ueem[i] = this->dp / this->bee[0];
for(int j=1; j<=i; j++){ this->ueem[i] *= this->cee[j]/this->bee[j]; }
} else {
this->lee[i] = 0.0;
this->leem[i] = 0.0;
this->uee[i] = 0.0;
this->ueem[i] = 0.0;
}
}
{
Coeff_t delta_d = 1.0 / this->bee[0];
for(int j=1; j<Ls-1; j++){ delta_d *= this->cee[j] / this->bee[j]; }
this->dee[Ls-1] = this->bee[Ls-1] + this->cee[0] * this->dm * delta_d;
this->dee[Ls] = this->bee[Ls-1] + this->cee[Ls-1] * this->dp * delta_d;
}
}
// Recompute Cayley-form coefficients for different shift
template<class Impl>
void DomainWallEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
{
this->shift = new_shift;
Approx::zolotarev_data *zdata = Approx::higham(1.0, this->Ls);
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
}
NAMESPACE_END(Grid);

View File

@ -24,22 +24,17 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
#include <Grid/perfmon/PerfCount.h>
namespace Grid {
namespace QCD {
// S-direction is INNERMOST and takes no part in the parity.
const std::vector<int>
ImprovedStaggeredFermion5DStatic::directions({1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4});
const std::vector<int>
ImprovedStaggeredFermion5DStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
#pragma once
// 5d lattice for DWF.
NAMESPACE_BEGIN(Grid);
// 5d lattice for DWF.
template<class Impl>
ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
@ -53,9 +48,9 @@ ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GridCartesian
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
_FourDimGrid (&FourDimGrid),
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
Stencil (&FiveDimGrid,npoint,Even,directions,displacements),
StencilEven(&FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
StencilOdd (&FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
Stencil (&FiveDimGrid,npoint,Even,directions,displacements,p),
StencilEven(&FiveDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even
StencilOdd (&FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd
mass(_mass),
c1(_c1),
c2(_c2),
@ -108,8 +103,8 @@ ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GridCartesian
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
for(int d=0;d<4;d++){
assert(FourDimGrid._simd_layout[d]=1);
assert(FourDimRedBlackGrid._simd_layout[d]=1);
assert(FourDimGrid._simd_layout[d]==1);
assert(FourDimRedBlackGrid._simd_layout[d]==1);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
}
@ -226,24 +221,27 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionFi
Compressor compressor;
Stencil.HaloExchange(in,compressor);
parallel_for(int ss=0;ss<Umu._grid->oSites();ss++){
auto Umu_v = Umu.View();
auto UUUmu_v = UUUmu.View();
auto in_v = in.View();
auto out_v = out.View();
thread_for( ss,Umu.Grid()->oSites(),{
for(int s=0;s<Ls;s++){
int sU=ss;
int sF = s+Ls*sU;
Kernels::DhopDir(Stencil, Umu, UUUmu, Stencil.CommBuf(), sF, sU, in, out, dir, disp);
Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp);
}
}
});
};
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
DoubledGaugeField & UUU,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
DoubledGaugeField & U,
DoubledGaugeField & UUU,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
// No force terms in multi-rhs solver staggered
assert(0);
@ -251,18 +249,18 @@ void ImprovedStaggeredFermion5D<Impl>::DerivInternal(StencilImpl & st,
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopDeriv(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
const FermionField &A,
const FermionField &B,
int dag)
{
assert(0);
}
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
const FermionField &A,
const FermionField &B,
int dag)
{
assert(0);
}
@ -270,9 +268,9 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
const FermionField &A,
const FermionField &B,
int dag)
{
assert(0);
}
@ -301,8 +299,8 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
Compressor compressor;
int LLs = in._grid->_rdimensions[0];
int len = U._grid->oSites();
int LLs = in.Grid()->_rdimensions[0];
int len = U.Grid()->oSites();
DhopFaceTime-=usecond();
st.Prepare();
@ -328,7 +326,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
double start = usecond();
nthreads -= ncomms;
int ttid = tid - ncomms;
int n = U._grid->oSites(); // 4d vol
int n = U.Grid()->oSites(); // 4d vol
int chunk = n / nthreads;
int rem = n % nthreads;
int myblock, myn;
@ -341,17 +339,22 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
}
// do the compute
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss;
// Interior = 1; Exterior = 0; must implement for staggered
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,1,0); //<---------
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<---------
}
} else {
for (int ss = myblock; ss < myblock+myn; ++ss) {
// Interior = 1; Exterior = 0;
int sU = ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,1,0); //<------------
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<------------
}
}
ptime = usecond() - start;
@ -372,18 +375,23 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
DhopFaceTime+=usecond();
DhopComputeTime2-=usecond();
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
thread_for( ss,sz,{
int sU = st.surface_list[ss];
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,0,1); //<----------
}
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1); //<----------
});
} else {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
thread_for( ss,sz,{
int sU = st.surface_list[ss];
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,0,1);//<----------
}
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1);//<----------
});
}
DhopComputeTime2+=usecond();
#else
@ -398,7 +406,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
const FermionField &in, FermionField &out,int dag)
{
Compressor compressor;
int LLs = in._grid->_rdimensions[0];
int LLs = in.Grid()->_rdimensions[0];
@ -410,16 +418,20 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
DhopComputeTime -= usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
thread_for( ss,U.Grid()->oSites(),{
int sU=ss;
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
}
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
});
} else {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
thread_for( ss,U.Grid()->oSites(),{
int sU=ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
}
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
});
}
DhopComputeTime += usecond();
DhopTotalTime += usecond();
@ -432,50 +444,17 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
}
/*CHANGE END*/
/* ORG
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U,DoubledGaugeField & UUU,
const FermionField &in, FermionField &out,int dag)
{
Compressor compressor;
int LLs = in._grid->_rdimensions[0];
DhopTotalTime -= usecond();
DhopCommTime -= usecond();
st.HaloExchange(in,compressor);
DhopCommTime += usecond();
DhopComputeTime -= usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
if (dag == DaggerYes) {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU=ss;
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
}
} else {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU=ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
}
}
DhopComputeTime += usecond();
DhopTotalTime += usecond();
}
*/
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
{
DhopCalls+=1;
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.checkerboard==Even);
out.checkerboard = Odd;
assert(in.Checkerboard()==Even);
out.Checkerboard() = Odd;
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,UUUmuOdd,in,out,dag);
}
@ -483,11 +462,11 @@ template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
{
DhopCalls+=1;
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.checkerboard==Odd);
out.checkerboard = Even;
assert(in.Checkerboard()==Odd);
out.Checkerboard() = Even;
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,UUUmuEven,in,out,dag);
}
@ -495,10 +474,10 @@ template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
{
DhopCalls+=2;
conformable(in._grid,FermionGrid()); // verifies full grid
conformable(in._grid,out._grid);
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag);
}
@ -506,7 +485,7 @@ void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::Report(void)
{
std::vector<int> latt = GridDefaultLatt();
Coordinate latt = GridDefaultLatt();
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
RealD NP = _FourDimGrid->_Nprocessors;
RealD NN = _FourDimGrid->NodeCount();
@ -564,21 +543,21 @@ void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField
}
template <class Impl>
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerNo);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
RealD ImprovedStaggeredFermion5D<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerYes);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
@ -586,7 +565,7 @@ void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionFiel
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
@ -595,27 +574,27 @@ void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionF
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
typename FermionField::scalar_type scal(mass);
out = scal * in;
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Mooee(in, out);
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
out = (1.0 / (mass)) * in;
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,
FermionField &out) {
out.checkerboard = in.checkerboard;
FermionField &out) {
out.Checkerboard() = in.Checkerboard();
MooeeInv(in, out);
}
@ -624,31 +603,28 @@ void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,
////////////////////////////////////////////////////////
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)
{
assert(0);
assert(0);
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
void ImprovedStaggeredFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx)
{
assert(0);
assert(0);
}
FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion5D);
FermOpStaggeredVec5dTemplateInstantiate(ImprovedStaggeredFermion5D);
}}
NAMESPACE_END(Grid);

View File

@ -28,40 +28,35 @@ directory
/* END LEGAL */
#include <Grid/Grid.h>
namespace Grid {
namespace QCD {
#pragma once
const std::vector<int>
ImprovedStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
const std::vector<int>
ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
NAMESPACE_BEGIN(Grid);
/////////////////////////////////
// Constructor and gauge import
/////////////////////////////////
template <class Impl>
ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid,
RealD _mass,
RealD _c1, RealD _c2,RealD _u0,
const ImplParams &p)
: Kernels(p),
_grid(&Fgrid),
_cbgrid(&Hgrid),
Stencil(&Fgrid, npoint, Even, directions, displacements),
StencilEven(&Hgrid, npoint, Even, directions, displacements), // source is Even
StencilOdd(&Hgrid, npoint, Odd, directions, displacements), // source is Odd
mass(_mass),
Lebesgue(_grid),
LebesgueEvenOdd(_cbgrid),
Umu(&Fgrid),
UmuEven(&Hgrid),
UmuOdd(&Hgrid),
UUUmu(&Fgrid),
UUUmuEven(&Hgrid),
UUUmuOdd(&Hgrid) ,
_tmp(&Hgrid)
: Kernels(p),
_grid(&Fgrid),
_cbgrid(&Hgrid),
Stencil(&Fgrid, npoint, Even, directions, displacements,p),
StencilEven(&Hgrid, npoint, Even, directions, displacements,p), // source is Even
StencilOdd(&Hgrid, npoint, Odd, directions, displacements,p), // source is Odd
mass(_mass),
Lebesgue(_grid),
LebesgueEvenOdd(_cbgrid),
Umu(&Fgrid),
UmuEven(&Hgrid),
UmuOdd(&Hgrid),
UUUmu(&Fgrid),
UUUmuEven(&Hgrid),
UUUmuOdd(&Hgrid) ,
_tmp(&Hgrid)
{
int vol4;
int LLs=1;
@ -85,17 +80,17 @@ ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GaugeField &_Uthin, Gau
ImportGauge(_Uthin,_Ufat);
}
////////////////////////////////////////////////////////////
// Momentum space propagator should be
// https://arxiv.org/pdf/hep-lat/9712010.pdf
//
// mom space action.
// gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m
//
// must track through staggered flavour/spin reduction in literature to
// turn to free propagator for the one component chi field, a la page 4/5
// of above link to implmement fourier based solver.
////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////
// Momentum space propagator should be
// https://arxiv.org/pdf/hep-lat/9712010.pdf
//
// mom space action.
// gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m
//
// must track through staggered flavour/spin reduction in literature to
// turn to free propagator for the one component chi field, a la page 4/5
// of above link to implmement fourier based solver.
////////////////////////////////////////////////////////////
template <class Impl>
void ImprovedStaggeredFermion<Impl>::ImportGaugeSimple(const GaugeField &_Utriple,const GaugeField &_Ufat)
{
@ -177,21 +172,21 @@ void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin,const
template <class Impl>
RealD ImprovedStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerNo);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
RealD ImprovedStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerYes);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
@ -199,7 +194,7 @@ void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
@ -208,27 +203,27 @@ void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionFie
template <class Impl>
void ImprovedStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
typename FermionField::scalar_type scal(mass);
out = scal * in;
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Mooee(in, out);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
out = (1.0 / (mass)) * in;
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,
FermionField &out) {
out.checkerboard = in.checkerboard;
FermionField &out) {
out.Checkerboard() = in.Checkerboard();
MooeeInv(in, out);
}
@ -244,8 +239,8 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
Compressor compressor;
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
FermionField Btilde(B.Grid());
FermionField Atilde(B.Grid());
Atilde = A;
st.HaloExchange(B, compressor);
@ -255,10 +250,13 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
////////////////////////
// Call the single hop
////////////////////////
PARALLEL_FOR_LOOP
for (int sss = 0; sss < B._grid->oSites(); sss++) {
Kernels::DhopDir(st, U, UUU, st.CommBuf(), sss, sss, B, Btilde, mu,1);
}
auto U_v = U.View();
auto UUU_v = UUU.View();
auto B_v = B.View();
auto Btilde_v = Btilde.View();
thread_for(sss,B.Grid()->oSites(),{
Kernels::DhopDirKernel(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
});
// Force in three link terms
//
@ -288,11 +286,11 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _grid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
conformable(U.Grid(), _grid);
conformable(U.Grid(), V.Grid());
conformable(U.Grid(), mat.Grid());
mat.checkerboard = U.checkerboard;
mat.Checkerboard() = U.Checkerboard();
DerivInternal(Stencil, Umu, UUUmu, mat, U, V, dag);
}
@ -300,13 +298,13 @@ void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionFie
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
conformable(U.Grid(), _cbgrid);
conformable(U.Grid(), V.Grid());
conformable(U.Grid(), mat.Grid());
assert(V.checkerboard == Even);
assert(U.checkerboard == Odd);
mat.checkerboard = Odd;
assert(V.Checkerboard() == Even);
assert(U.Checkerboard() == Odd);
mat.Checkerboard() = Odd;
DerivInternal(StencilEven, UmuOdd, UUUmuOdd, mat, U, V, dag);
}
@ -314,48 +312,51 @@ void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionF
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
conformable(U.Grid(), _cbgrid);
conformable(U.Grid(), V.Grid());
conformable(U.Grid(), mat.Grid());
assert(V.checkerboard == Odd);
assert(U.checkerboard == Even);
mat.checkerboard = Even;
assert(V.Checkerboard() == Odd);
assert(U.Checkerboard() == Even);
mat.Checkerboard() = Even;
DerivInternal(StencilOdd, UmuEven, UUUmuEven, mat, U, V, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
{
DhopCalls+=2;
conformable(in._grid, _grid); // verifies full grid
conformable(in._grid, out._grid);
conformable(in.Grid(), _grid); // verifies full grid
conformable(in.Grid(), out.Grid());
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
DhopInternal(Stencil, Lebesgue, Umu, UUUmu, in, out, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
{
DhopCalls+=1;
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check
assert(in.checkerboard == Even);
out.checkerboard = Odd;
assert(in.Checkerboard() == Even);
out.Checkerboard() = Odd;
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, UUUmuOdd, in, out, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag) {
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag)
{
DhopCalls+=1;
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check
assert(in.checkerboard == Odd);
out.checkerboard = Even;
assert(in.Checkerboard() == Odd);
out.Checkerboard() = Even;
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, UUUmuEven, in, out, dag);
}
@ -370,11 +371,13 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
Compressor compressor;
Stencil.HaloExchange(in, compressor);
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopDir(Stencil, Umu, UUUmu, Stencil.CommBuf(), sss, sss, in, out, dir, disp);
}
auto Umu_v = Umu.View();
auto UUUmu_v = UUUmu.View();
auto in_v = in.View();
auto out_v = out.View();
thread_for( sss, in.Grid()->oSites(),{
Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
});
};
template <class Impl>
@ -400,7 +403,7 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
{
#ifdef GRID_OMP
Compressor compressor;
int len = U._grid->oSites();
int len = U.Grid()->oSites();
const int LLs = 1;
DhopTotalTime -= usecond();
@ -439,17 +442,21 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
}
// do the compute
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss;
// Interior = 1; Exterior = 0; must implement for staggered
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,1,0);
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0);
}
} else {
for (int ss = myblock; ss < myblock+myn; ++ss) {
// Interior = 1; Exterior = 0;
int sU = ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,1,0);
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0);
}
}
} else {
@ -464,17 +471,23 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
DhopFaceTime -= usecond();
DhopComputeTime2 -= usecond();
if (dag == DaggerYes) {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1);
}
} else {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1);
{
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
int sz=st.surface_list.size();
thread_for(ss,sz,{
int sU = st.surface_list[ss];
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1);
});
} else {
int sz=st.surface_list.size();
thread_for(ss,sz,{
int sU = st.surface_list[ss];
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1);
});
}
}
DhopComputeTime2 += usecond();
@ -500,15 +513,19 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Le
st.HaloExchange(in, compressor);
DhopCommTime += usecond();
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
DhopComputeTime -= usecond();
if (dag == DaggerYes) {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
}
thread_for(sss, in.Grid()->oSites(),{
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
});
} else {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
}
thread_for(sss, in.Grid()->oSites(),{
Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
});
}
DhopComputeTime += usecond();
DhopTotalTime += usecond();
@ -520,7 +537,7 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Le
template<class Impl>
void ImprovedStaggeredFermion<Impl>::Report(void)
{
std::vector<int> latt = GridDefaultLatt();
Coordinate latt = _grid->GlobalDimensions();
RealD volume = 1; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
RealD NP = _grid->_Nprocessors;
RealD NN = _grid->NodeCount();
@ -574,31 +591,25 @@ void ImprovedStaggeredFermion<Impl>::ZeroCounters(void)
////////////////////////////////////////////////////////
template <class Impl>
void ImprovedStaggeredFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)
{
assert(0);
assert(0);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
void ImprovedStaggeredFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx)
{
assert(0);
assert(0);
}
FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion);
//AdjointFermOpTemplateInstantiate(ImprovedStaggeredFermion);
//TwoIndexFermOpTemplateInstantiate(ImprovedStaggeredFermion);
}}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,453 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
Vector<Coeff_t> &lower, Vector<Coeff_t> &diag, Vector<Coeff_t> &upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1;
spinor tmp2;
for(int s=0; s<Ls; s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1, psi(idx_u));
spProj5p(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
Vector<Coeff_t> &lower, Vector<Coeff_t> &diag, Vector<Coeff_t> &upper,
Vector<Coeff_t> &shift_coeffs)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
auto pm = this->pm;
int shift_s = (pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1;
spinor tmp2;
spinor tmp;
for(int s=0; s<Ls; s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1, psi(idx_u));
spProj5p(tmp2, psi(idx_l));
if(pm == 1){ spProj5p(tmp, psi(ss+shift_s)); }
else { spProj5m(tmp, psi(ss+shift_s)); }
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 +lower[s]*tmp2 + shift_coeffs[s]*tmp);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
Vector<Coeff_t> &lower, Vector<Coeff_t> &diag, Vector<Coeff_t> &upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(), {
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1, tmp2;
for(int s=0; s<Ls; s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1, psi(idx_u));
spProj5m(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
Vector<Coeff_t> &lower, Vector<Coeff_t> &diag, Vector<Coeff_t> &upper,
Vector<Coeff_t> &shift_coeffs)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
auto pm = this->pm;
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1, tmp2, tmp;
tmp1=Zero();
coalescedWrite(chi[ss+Ls-1],tmp1);
for(int s=0; s<Ls; s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1, psi(idx_u));
spProj5m(tmp2, psi(idx_l));
if(s==(Ls-1)) coalescedWrite(chi[ss+s], chi(ss+s)+ diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
else coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
if(pm == 1){ spProj5p(tmp, psi(ss+s)); }
else { spProj5m(tmp, psi(ss+s)); }
coalescedWrite(chi[ss+shift_s],chi(ss+shift_s)+shift_coeffs[s]*tmp);
}
});
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &chi_i)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
// Apply (L^{\prime})^{-1}
coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp, chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp);
}
});
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionField &chi_i)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
auto pm = this->pm;
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
auto pMooeeInv_shift_lc = &MooeeInv_shift_lc[0];
auto pMooeeInv_shift_norm = &MooeeInv_shift_norm[0];
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2,tmp2_spProj;
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0]
tmp2 = pMooeeInv_shift_lc[0]*psi(ss);
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp1);
tmp2 = tmp2 + pMooeeInv_shift_lc[s]*psi(ss+s);
}
if(pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else { spProj5m(tmp2_spProj, tmp2); }
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp1);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls-1])*tmp1);
}
// chi[ss+Ls-1] = (1.0/pdee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
spProj5m(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + pMooeeInv_shift_norm[Ls-1]*tmp2_spProj);
// Apply U^{-1} and add shift term
for(int s=Ls-2; s>=0; s--){
coalescedWrite(chi[ss+s] , chi(ss+s) - puee[s]*tmp1);
spProj5m(tmp1, chi(ss+s));
coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInv_shift_norm[s]*tmp2_spProj);
}
});
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionField &chi_i)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi_i,chi_i); return; }
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
// Apply (U^{\prime})^{-dag}
coalescedWrite(chi[ss], psi(ss));
for(int s=1; s<Ls; s++){
spProj5m(tmp, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - puee[s-1]*tmp);
}
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pueem[s]*tmp);
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pleem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp, chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp);
}
});
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, FermionField &chi_i)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
auto pm = this->pm;
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0];
auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0];
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2,tmp2_spProj;
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
coalescedWrite(chi[ss], psi(ss));
tmp2 = pMooeeInvDag_shift_lc[0]*psi(ss);
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s],psi(ss+s) - puee[s-1]*tmp1);
tmp2 = tmp2 + pMooeeInvDag_shift_lc[s]*psi(ss+s);
}
if(pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else { spProj5m(tmp2_spProj, tmp2);}
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pueem[s]*tmp1);
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pleem[s]/pdee[Ls-1])*tmp1);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
spProj5p(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + pMooeeInvDag_shift_norm[Ls-1]*tmp2_spProj);
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp1);
spProj5p(tmp1, chi(ss+s));
coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInvDag_shift_norm[s]*tmp2_spProj);
}
});
this->MooeeInvTime += usecond();
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,407 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
NAMESPACE_BEGIN(Grid);
template<class Impl>
MobiusEOFAFermion<Impl>::MobiusEOFAFermion(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3,
RealD _shift, int _pm, RealD _M5,
RealD _b, RealD _c, const ImplParams &p) :
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
_shift, _pm, _M5, _b, _c, p)
{
int Ls = this->Ls;
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps, this->Ls);
assert(zdata->n == this->Ls);
std::cout << GridLogMessage << "MobiusEOFAFermion (b=" << _b <<
",c=" << _c << ") with Ls=" << Ls << std::endl;
this->SetCoefficientsTanh(zdata, _b, _c);
std::cout << GridLogMessage << "EOFA parameters: (mq1=" << _mq1 <<
",mq2=" << _mq2 << ",mq3=" << _mq3 << ",shift=" << _shift <<
",pm=" << _pm << ")" << std::endl;
Approx::zolotarev_free(zdata);
if(_shift != 0.0){
SetCoefficientsPrecondShiftOps();
} else {
Mooee_shift.resize(Ls, 0.0);
MooeeInv_shift_lc.resize(Ls, 0.0);
MooeeInv_shift_norm.resize(Ls, 0.0);
MooeeInvDag_shift_lc.resize(Ls, 0.0);
MooeeInvDag_shift_norm.resize(Ls, 0.0);
}
}
/****************************************************************
* Additional EOFA operators only called outside the inverter.
* Since speed is not essential, simple axpby-style
* implementations should be fine.
***************************************************************/
template<class Impl>
void MobiusEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
{
int Ls = this->Ls;
RealD alpha = this->alpha;
Din = Zero();
if((sign == 1) && (dag == 0)) { // \Omega_{+}
for(int s=0; s<Ls; ++s){
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,Ls-s-1)/std::pow(1.0+alpha,Ls-s), psi, s, 0);
}
} else if((sign == -1) && (dag == 0)) { // \Omega_{-}
for(int s=0; s<Ls; ++s){
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,s)/std::pow(1.0+alpha,s+1), psi, s, 0);
}
} else if((sign == 1 ) && (dag == 1)) { // \Omega_{+}^{\dagger}
for(int sp=0; sp<Ls; ++sp){
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,Ls-sp-1)/std::pow(1.0+alpha,Ls-sp), psi, 0, sp);
}
} else if((sign == -1) && (dag == 1)) { // \Omega_{-}^{\dagger}
for(int sp=0; sp<Ls; ++sp){
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,sp)/std::pow(1.0+alpha,sp+1), psi, 0, sp);
}
}
}
// This is the operator relating the usual Ddwf to TWQCD's EOFA Dirac operator (arXiv:1706.05843, Eqn. 6).
// It also relates the preconditioned and unpreconditioned systems described in Appendix B.2.
template<class Impl>
void MobiusEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
RealD b = 0.5 * ( 1.0 + this->alpha );
RealD c = 0.5 * ( 1.0 - this->alpha );
RealD mq1 = this->mq1;
for(int s=0; s<Ls; ++s){
if(s == 0) {
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
axpby_ssp_pplus (chi, 1.0, chi, mq1*c, psi, s, Ls-1);
} else if(s == (Ls-1)) {
axpby_ssp_pminus(chi, b, psi, mq1*c, psi, s, 0);
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
} else {
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
RealD m = this->mq1;
RealD c = 0.5 * this->alpha;
RealD d = 0.5;
RealD DtInv_p(0.0), DtInv_m(0.0);
RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls);
FermionField tmp(this->FermionGrid());
for(int s=0; s<Ls; ++s){
for(int sp=0; sp<Ls; ++sp){
DtInv_p = m * std::pow(-1.0,s-sp+1) * std::pow(c-d,Ls+s-sp) / std::pow(c+d,s-sp+1) / N;
DtInv_p += (s < sp) ? 0.0 : std::pow(-1.0,s-sp) * std::pow(c-d,s-sp) / std::pow(c+d,s-sp+1);
DtInv_m = m * std::pow(-1.0,sp-s+1) * std::pow(c-d,Ls+sp-s) / std::pow(c+d,sp-s+1) / N;
DtInv_m += (s > sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1);
if(sp == 0){
axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp);
axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp);
} else {
axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp);
axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp);
}
}}
}
/*****************************************************************************************************/
template<class Impl>
RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
{
FermionField Din(psi.Grid());
this->Meooe5D(psi, Din);
this->DW(Din, chi, DaggerNo);
axpby(chi, 1.0, 1.0, chi, psi);
this->M5D(psi, chi);
return(norm2(chi));
}
template<class Impl>
RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
{
FermionField Din(psi.Grid());
this->DW(psi, Din, DaggerYes);
this->MeooeDag5D(Din, chi);
this->M5Ddag(psi, chi);
axpby(chi, 1.0, 1.0, chi, psi);
return(norm2(chi));
}
/********************************************************************
* Performance critical fermion operators called inside the inverter
********************************************************************/
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5Ddag_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
// half checkerboard operations
template<class Impl>
void MobiusEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
// coefficients of Mooee
Vector<Coeff_t> diag = this->bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] *= -this->mq1;
lower[0] *= -this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, psi, chi, lower, diag, upper); }
// fused M + shift operation
else { this->M5D_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
// coefficients of MooeeDag
Vector<Coeff_t> diag = this->bee;
Vector<Coeff_t> upper(Ls);
Vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
if(s==0) {
upper[s] = -this->cee[s+1];
lower[s] = this->mq1*this->cee[Ls-1];
} else if(s==(Ls-1)) {
upper[s] = this->mq1*this->cee[0];
lower[s] = -this->cee[s-1];
} else {
upper[s] = -this->cee[s+1];
lower[s] = -this->cee[s-1];
}
}
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, psi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5Ddag_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
/****************************************************************************************/
// Computes coefficients for applying Cayley preconditioned shift operators
// (Mooee + \Delta) --> Mooee_shift
// (Mooee + \Delta)^{-1} --> MooeeInv_shift_lc, MooeeInv_shift_norm
// (Mooee + \Delta)^{-dag} --> MooeeInvDag_shift_lc, MooeeInvDag_shift_norm
// For the latter two cases, the operation takes the form
// [ (Mooee + \Delta)^{-1} \psi ]_{i} = Mooee_{ij} \psi_{j} +
// ( MooeeInv_shift_norm )_{i} ( \sum_{j} [ MooeeInv_shift_lc ]_{j} P_{pm} \psi_{j} )
template<class Impl>
void MobiusEOFAFermion<Impl>::SetCoefficientsPrecondShiftOps()
{
int Ls = this->Ls;
int pm = this->pm;
RealD alpha = this->alpha;
RealD k = this->k;
RealD mq1 = this->mq1;
RealD shift = this->shift;
// Initialize
Mooee_shift.resize(Ls);
MooeeInv_shift_lc.resize(Ls);
MooeeInv_shift_norm.resize(Ls);
MooeeInvDag_shift_lc.resize(Ls);
MooeeInvDag_shift_norm.resize(Ls);
// Construct Mooee_shift
int idx(0);
Coeff_t N = ( (pm == 1) ? 1.0 : -1.0 ) * (2.0*shift*k) *
( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
for(int s=0; s<Ls; ++s){
idx = (pm == 1) ? (s) : (Ls-1-s);
Mooee_shift[idx] = N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1);
}
// Tridiagonal solve for MooeeInvDag_shift_lc
{
Coeff_t m(0.0);
Vector<Coeff_t> d = Mooee_shift;
Vector<Coeff_t> u(Ls,0.0);
Vector<Coeff_t> y(Ls,0.0);
Vector<Coeff_t> q(Ls,0.0);
if(pm == 1){ u[0] = 1.0; }
else{ u[Ls-1] = 1.0; }
// Tridiagonal matrix algorithm + Sherman-Morrison formula
//
// We solve
// ( Mooee' + u \otimes v ) MooeeInvDag_shift_lc = Mooee_shift
// where Mooee' is the tridiagonal part of Mooee_{+}, and
// u = (1,0,...,0) and v = (0,...,0,mq1*cee[0]) are chosen
// so that the outer-product u \otimes v gives the (0,Ls-1)
// entry of Mooee_{+}.
//
// We do this as two solves: Mooee'*y = d and Mooee'*q = u,
// and then construct the solution to the original system
// MooeeInvDag_shift_lc = y - <v,y> / ( 1 + <v,q> ) q
if(pm == 1){
for(int s=1; s<Ls; ++s){
m = -this->cee[s] / this->bee[s-1];
d[s] -= m*d[s-1];
u[s] -= m*u[s-1];
}
}
y[Ls-1] = d[Ls-1] / this->bee[Ls-1];
q[Ls-1] = u[Ls-1] / this->bee[Ls-1];
for(int s=Ls-2; s>=0; --s){
if(pm == 1){
y[s] = d[s] / this->bee[s];
q[s] = u[s] / this->bee[s];
} else {
y[s] = ( d[s] + this->cee[s]*y[s+1] ) / this->bee[s];
q[s] = ( u[s] + this->cee[s]*q[s+1] ) / this->bee[s];
}
}
// Construct MooeeInvDag_shift_lc
for(int s=0; s<Ls; ++s){
if(pm == 1){
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[0]*y[Ls-1] /
(1.0+mq1*this->cee[0]*q[Ls-1]) * q[s];
} else {
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[Ls-1]*y[0] /
(1.0+mq1*this->cee[Ls-1]*q[0]) * q[s];
}
}
// Compute remaining coefficients
N = (pm == 1) ? (1.0 + MooeeInvDag_shift_lc[Ls-1]) : (1.0 + MooeeInvDag_shift_lc[0]);
for(int s=0; s<Ls; ++s){
// MooeeInv_shift_lc
if(pm == 1){ MooeeInv_shift_lc[s] = pow(this->bee[s],s) * pow(this->cee[s],Ls-1-s); }
else { MooeeInv_shift_lc[s] = pow(this->bee[s],Ls-1-s) * pow(this->cee[s],s); }
// MooeeInv_shift_norm
MooeeInv_shift_norm[s] = -MooeeInvDag_shift_lc[s] /
( pow(this->bee[s],Ls) + mq1*pow(this->cee[s],Ls) ) / N;
// MooeeInvDag_shift_norm
if(pm == 1){ MooeeInvDag_shift_norm[s] = -pow(this->bee[s],s) * pow(this->cee[s],(Ls-1-s)) /
( pow(this->bee[s],Ls) + mq1*pow(this->cee[s],Ls) ) / N; }
else{ MooeeInvDag_shift_norm[s] = -pow(this->bee[s],(Ls-1-s)) * pow(this->cee[s],s) /
( pow(this->bee[s],Ls) + mq1*pow(this->cee[s],Ls) ) / N; }
}
}
}
// Recompute coefficients for a different value of shift constant
template<class Impl>
void MobiusEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
{
this->shift = new_shift;
if(new_shift != 0.0){
SetCoefficientsPrecondShiftOps();
} else {
int Ls = this->Ls;
Mooee_shift.resize(Ls,0.0);
MooeeInv_shift_lc.resize(Ls,0.0);
MooeeInv_shift_norm.resize(Ls,0.0);
MooeeInvDag_shift_lc.resize(Ls,0.0);
MooeeInvDag_shift_norm.resize(Ls,0.0);
}
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,450 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h>
NAMESPACE_BEGIN(Grid);
template<class Impl>
void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
// this does both dag and undag but is trivial; make a common helper routing
int Ls = this->Ls;
this->DhopDir(psi,chi,dir,disp);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(chi,-scale,chi,0.0,chi,s,s);
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
}
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
{
int Ls = this->Ls;
if ( psi.Checkerboard() == Odd ) {
this->DhopEO(psi,chi,DaggerNo);
} else {
this->DhopOE(psi,chi,DaggerNo);
}
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(chi,-scale,chi,0.0,chi,s,s);
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
}
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Mooee_internal(const FermionField &psi, FermionField &chi,int dag)
{
// again dag and undag are trivially related
int sign = dag ? (-1) : 1;
int Ls = this->Ls;
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
// Do each 2x2 block aligned at s and multiplies Dw site diagonal by G5 so Hw
ag5xpby_ssp(chi,-dw_diag*scale,psi,amax*sqrt(qq)*scale,psi, s ,s+1);
ag5xpby_ssp(chi, dw_diag*scale,psi,amax*sqrt(qq)*scale,psi, s+1,s);
axpby_ssp (chi, 1.0, chi,sqrt(amax*pp)*scale*sign,psi,s+1,Ls-1);
}
{
RealD R=(1+mass)/(1-mass);
//R g5 psi[Ls-1] + p[0] H
ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale*dw_diag/amax,psi,Ls-1,Ls-1);
for(int b=0;b<nblock;b++){
int s = 2*b+1;
RealD pp = p[nblock-1-b];
axpby_ssp(chi,1.0,chi,-sqrt(amax*pp)*scale*sign,psi,Ls-1,s);
}
}
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInv_internal(const FermionField &psi, FermionField &chi,int dag)
{
int sign = dag ? (-1) : 1;
int Ls = this->Ls;
FermionField tmp(psi.Grid());
///////////////////////////////////////////////////////////////////////////////////////
//Linv
///////////////////////////////////////////////////////////////////////////////////////
int nblock=(Ls-1)/2;
axpy(chi,0.0,psi,psi); // Identity piece
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=sign*sqrt(amax*amax*amax*pp*qq) / ( dw_diag*dw_diag + amax*amax* qq);
RealD coeff2=sign*sqrt(amax*pp)*dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
axpby_ssp (chi,1.0,chi,coeff1,psi,Ls-1,s);
axpbg5y_ssp(chi,1.0,chi,coeff2,psi,Ls-1,s+1);
}
///////////////////////////////////////////////////////////////////////////////////////
//Dinv (note D isn't really diagonal -- just diagonal enough that we can still invert)
// Compute Seeinv (coeff of gamma5)
///////////////////////////////////////////////////////////////////////////////////////
RealD R=(1+mass)/(1-mass);
RealD Seeinv = R + p[nblock]*dw_diag/amax;
for(int b=0;b<nblock;b++){
Seeinv += p[nblock-1-b]*dw_diag/amax / ( dw_diag*dw_diag/amax/amax + q[nblock-1-b]);
}
Seeinv = 1.0/Seeinv;
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
RealD coeff2=amax*sqrt(qq) / ( dw_diag*dw_diag + amax*amax* qq);
ag5xpby_ssp (tmp,-coeff1,chi,coeff2,chi,s,s+1);
ag5xpby_ssp (tmp, coeff1,chi,coeff2,chi,s+1,s);
}
ag5xpby_ssp (tmp, Seeinv,chi,0.0,chi,Ls-1,Ls-1);
///////////////////////////////////////////////////////////////////////////////////////
// Uinv
///////////////////////////////////////////////////////////////////////////////////////
for(int b=0;b<nblock;b++){
int s = 2*b;
RealD pp = p[nblock-1-b];
RealD qq = q[nblock-1-b];
RealD coeff1=-sign*sqrt(amax*amax*amax*pp*qq) / ( dw_diag*dw_diag + amax*amax* qq);
RealD coeff2=-sign*sqrt(amax*pp)*dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
axpby_ssp (chi,1.0/scale,tmp,coeff1/scale,tmp,s,Ls-1);
axpbg5y_ssp(chi,1.0/scale,tmp,coeff2/scale,tmp,s+1,Ls-1);
}
axpby_ssp (chi, 1.0/scale,tmp,0.0,tmp,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::M_internal(const FermionField &psi, FermionField &chi,int dag)
{
FermionField D(psi.Grid());
int Ls = this->Ls;
int sign = dag ? (-1) : 1;
// For partial frac Hw case (b5=c5=1) chroma quirkily computes
//
// Conventions for partfrac appear to be a mess.
// Tony's Nara lectures have
//
// BlockDiag( H/p_i 1 | 1 )
// ( 1 p_i H / q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R +p0 H )
//
//Chroma ( -2H 2sqrt(q_i) | 0 )
// (2 sqrt(q_i) 2H | 2 sqrt(p_i) )
// ---------------------------------
// ( 0 -2 sqrt(p_i) | 2 R gamma_5 + p0 2H
//
// Edwards/Joo/Kennedy/Wenger
//
// Here, the "beta's" selected by chroma to scale the unphysical bulk constraint fields
// incorporate the approx scale factor. This is obtained by propagating the
// scale on "H" out to the off diagonal elements as follows:
//
// BlockDiag( H/p_i 1 | 1 )
// ( 1 p_i H / q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R + p_0 H )
//
// becomes:
// BlockDiag( H/ sp_i 1 | 1 )
// ( 1 sp_i H / s^2q_i^2 | 0 )
// ---------------------------------
// ( -1 0 | R + p_0/s H )
//
//
// This is implemented in Chroma by
// p0' = p0/approxMax
// p_i' = p_i*approxMax
// q_i' = q_i*approxMax*approxMax
//
// After the equivalence transform is applied the matrix becomes
//
//Chroma ( -2H sqrt(q'_i) | 0 )
// (sqrt(q'_i) 2H | sqrt(p'_i) )
// ---------------------------------
// ( 0 -sqrt(p'_i) | 2 R gamma_5 + p'0 2H
//
// = ( -2H sqrt(q_i)amax | 0 )
// (sqrt(q_i)amax 2H | sqrt(p_i*amax) )
// ---------------------------------
// ( 0 -sqrt(p_i)*amax | 2 R gamma_5 + p0/amax 2H
//
this->DW(psi,D,DaggerNo);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
double pp = p[nblock-1-b];
double qq = q[nblock-1-b];
// Do each 2x2 block aligned at s and
ag5xpby_ssp(chi,-1.0*scale,D,amax*sqrt(qq)*scale,psi, s ,s+1); // Multiplies Dw by G5 so Hw
ag5xpby_ssp(chi, 1.0*scale,D,amax*sqrt(qq)*scale,psi, s+1,s);
// Pick up last column
axpby_ssp (chi, 1.0, chi,sqrt(amax*pp)*scale*sign,psi,s+1,Ls-1);
}
{
double R=(1+this->mass)/(1-this->mass);
//R g5 psi[Ls] + p[0] H
ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale/amax,D,Ls-1,Ls-1);
for(int b=0;b<nblock;b++){
int s = 2*b+1;
double pp = p[nblock-1-b];
axpby_ssp(chi,1.0,chi,-sqrt(amax*pp)*scale*sign,psi,Ls-1,s);
}
}
}
template<class Impl>
RealD PartialFractionFermion5D<Impl>::M (const FermionField &in, FermionField &out)
{
M_internal(in,out,DaggerNo);
return norm2(out);
}
template<class Impl>
RealD PartialFractionFermion5D<Impl>::Mdag (const FermionField &in, FermionField &out)
{
M_internal(in,out,DaggerYes);
return norm2(out);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe (const FermionField &in, FermionField &out)
{
Meooe_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MeooeDag (const FermionField &in, FermionField &out)
{
Meooe_internal(in,out,DaggerYes);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Mooee (const FermionField &in, FermionField &out)
{
Mooee_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeDag (const FermionField &in, FermionField &out)
{
Mooee_internal(in,out,DaggerYes);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInv (const FermionField &in, FermionField &out)
{
MooeeInv_internal(in,out,DaggerNo);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInvDag (const FermionField &in, FermionField &out)
{
MooeeInv_internal(in,out,DaggerYes);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void PartialFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDeriv(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDerivOE(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V.Grid());
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDerivEO(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){
SetCoefficientsZolotarev(1.0/scale,zdata);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){
// check on degree matching
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
int Ls = this->Ls;
assert(Ls == (2*zdata->da -1) );
// Part frac
// RealD R;
R=(1+mass)/(1-mass);
dw_diag = (4.0-this->M5);
// std::vector<RealD> p;
// std::vector<RealD> q;
p.resize(zdata->da);
q.resize(zdata->dd);
for(int n=0;n<zdata->da;n++){
p[n] = zdata -> alpha[n];
}
for(int n=0;n<zdata->dd;n++){
q[n] = -zdata -> ap[n];
}
scale= part_frac_chroma_convention ? 2.0 : 1.0; // Chroma conventions annoy me
amax=zolo_hi;
}
template<class Impl>
void PartialFractionFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
conformable(solution5d.Grid(),this->FermionGrid());
conformable(exported4d.Grid(),this->GaugeGrid());
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
int Ls = this->Ls;
conformable(imported5d.Grid(),this->FermionGrid());
conformable(input4d.Grid() ,this->GaugeGrid());
FermionField tmp(this->FermionGrid());
tmp=Zero();
InsertSlice(input4d, tmp, Ls-1, Ls-1);
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
this->Dminus(tmp,imported5d);
}
// Constructors
template<class Impl>
PartialFractionFermion5D<Impl>::PartialFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,
const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5,p),
mass(_mass)
{
int Ls = this->Ls;
assert((Ls&0x1)==1); // Odd Ls required
int nrational=Ls-1;
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);
// NB: chroma uses a cast to "float" for the zolotarev range(!?).
// this creates a real difference in the operator which I do not like but we can replicate here
// to demonstrate compatibility
// RealD eps = (zolo_lo / zolo_hi);
// zdata = bfm_zolotarev(eps,nrational,0);
SetCoefficientsTanh(zdata,1.0);
Approx::zolotarev_free(zdata);
}
NAMESPACE_END(Grid);

View File

@ -26,6 +26,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/Grid.h>
#ifdef AVX512
@ -586,11 +588,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
VADD(UChi_00,UChi_10,UChi_00) \
VADD(UChi_01,UChi_11,UChi_01) \
VADD(UChi_02,UChi_12,UChi_02) ); \
asm ( \
VSTORE(0,%0,pUChi_00) \
VSTORE(1,%0,pUChi_01) \
VSTORE(2,%0,pUChi_02) \
: : "r" (out) : "memory" );
asm ( \
VSTORE(0,%0,pUChi_00) \
VSTORE(1,%0,pUChi_01) \
VSTORE(2,%0,pUChi_02) \
: : "r" (out) : "memory" );
// FIXME is sign right in the VSUB ?
#define nREDUCEa(out) \
@ -613,20 +615,20 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
permute##dir(Chi_1,Chi_1);\
permute##dir(Chi_2,Chi_2);
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
{
assert(0);
};
//#define CONDITIONAL_MOVE(l,o,out) if ( l ) { out = (uint64_t) &in._odata[o] ; } else { out =(uint64_t) &buf[o]; }
//#define CONDITIONAL_MOVE(l,o,out) if ( l ) { out = (uint64_t) &in[o] ; } else { out =(uint64_t) &buf[o]; }
#define CONDITIONAL_MOVE(l,o,out) { const SiteSpinor *ptr = l? in_p : buf; out = (uint64_t) &ptr[o]; }
@ -673,22 +675,23 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
CONDITIONAL_MOVE(l3,o3,addr3); \
PF_CHI(addr3); \
\
gauge0 =(uint64_t)&UU._odata[sU]( X ); \
gauge1 =(uint64_t)&UU._odata[sU]( Y ); \
gauge2 =(uint64_t)&UU._odata[sU]( Z ); \
gauge3 =(uint64_t)&UU._odata[sU]( T );
gauge0 =(uint64_t)&UU[sU]( X ); \
gauge1 =(uint64_t)&UU[sU]( Y ); \
gauge2 =(uint64_t)&UU[sU]( Z ); \
gauge3 =(uint64_t)&UU[sU]( T );
// This is the single precision 5th direction vectorised kernel
#include <Grid/simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
const SiteSpinor *in_p; in_p = &in[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
@ -719,7 +722,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
addr0 = (uint64_t) &out[sF];
if ( dag ) {
nREDUCE(addr0);
} else {
@ -734,14 +737,15 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
#include <Grid/simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionFieldView &in, FermionFieldView &out, int dag)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
const SiteSpinor *in_p; in_p = &in[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
@ -771,7 +775,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
addr0 = (uint64_t) &out[sF];
if ( dag ) {
nREDUCE(addr0);
} else {
@ -818,14 +822,15 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
#include <Grid/simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
const SiteSpinor *in_p; in_p = &in[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
@ -872,7 +877,7 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
addr0 = (uint64_t) &out[sF];
if ( dag ) {
nREDUCEa(addr0);
} else {
@ -886,14 +891,15 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
#include <Grid/simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
const SiteSpinor *in_p; in_p = &in[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
@ -940,7 +946,7 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
addr0 = (uint64_t) &out[sF];
if ( dag ) {
nREDUCEa(addr0);
} else {
@ -952,17 +958,5 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
#endif
}
#define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \
template void CLASS<IMPL>::FUNC(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U, \
DoubledGaugeField &UUU, \
SiteSpinor *buf, int LLs, \
int sU, const FermionField &in, FermionField &out,int dag);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplD);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplF);
}}
NAMESPACE_END(Grid);

View File

@ -28,6 +28,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#include <Grid/Grid.h>
#pragma once
NAMESPACE_BEGIN(Grid);
#define LOAD_CHI(b) \
const SiteSpinor & ref (b[offset]); \
@ -38,7 +41,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
// To splat or not to splat depends on the implementation
#define MULT(A,UChi) \
auto & ref(U._odata[sU](A)); \
auto & ref(U[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
@ -59,7 +62,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
UChi ## _2 += U_22*Chi_2;
#define MULT_ADD(U,A,UChi) \
auto & ref(U._odata[sU](A)); \
auto & ref(U[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
@ -92,7 +95,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
local = SE->_is_local; \
perm = SE->_permute; \
if ( local ) { \
LOAD_CHI(in._odata); \
LOAD_CHI(in); \
if ( perm) { \
PERMUTE_DIR(Perm); \
} \
@ -120,14 +123,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
local = SE->_is_local; \
perm = SE->_permute; \
if ( local ) { \
LOAD_CHI(in._odata); \
LOAD_CHI(in); \
if ( perm) { \
PERMUTE_DIR(Perm); \
} \
} else if ( st.same_node[Dir] ) { \
LOAD_CHI(buf); \
} \
if (SE->_is_local || st.same_node[Dir] ) { \
if (local || st.same_node[Dir] ) { \
MULT_ADD(U,Dir,even); \
}
@ -135,22 +138,18 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
SE=st.GetEntry(ptype,Dir+skew,sF); \
offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
if ((!local) && (!st.same_node[Dir]) ) { \
nmu++; \
{ LOAD_CHI(buf); } \
{ MULT_ADD(U,Dir,even); } \
}
namespace Grid {
namespace QCD {
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
const FermionFieldView &in, FermionFieldView &out,int dag)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -213,16 +212,16 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
result()()(1) = even_1 + odd_1;
result()()(2) = even_2 + odd_2;
}
vstream(out._odata[sF],result);
vstream(out[sF],result);
}
}
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
const FermionFieldView &in, FermionFieldView &out,int dag)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -249,7 +248,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
Simd U_22;
SiteSpinor result;
int offset,local,perm, ptype;
int offset, ptype, local, perm;
StencilEntry *SE;
int skew;
@ -257,8 +256,8 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
for(int s=0;s<LLs;s++){
int sF=s+LLs*sU;
even_0 = zero; even_1 = zero; even_2 = zero;
odd_0 = zero; odd_1 = zero; odd_2 = zero;
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
skew = 0;
HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);
@ -289,16 +288,16 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
result()()(1) = even_1 + odd_1;
result()()(2) = even_2 + odd_2;
}
vstream(out._odata[sF],result);
vstream(out[sF],result);
}
}
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag)
const FermionFieldView &in, FermionFieldView &out,int dag)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -325,7 +324,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
Simd U_22;
SiteSpinor result;
int offset,local,perm, ptype;
int offset, ptype, local;
StencilEntry *SE;
int skew;
@ -333,8 +332,8 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
for(int s=0;s<LLs;s++){
int sF=s+LLs*sU;
even_0 = zero; even_1 = zero; even_2 = zero;
odd_0 = zero; odd_1 = zero; odd_2 = zero;
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
int nmu=0;
skew = 0;
HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);
@ -366,34 +365,29 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
result()()(1) = even_1 + odd_1;
result()()(2) = even_2 + odd_2;
}
out._odata[sF] = out._odata[sF] + result;
out[sF] = out[sF] + result;
}
}
}
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U,DoubledGaugeField &UUU, \
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
SiteSpinor *buf, int LLs, int sU, \
const FermionField &in, FermionField &out, int dag); \
const FermionFieldView &in, FermionFieldView &out, int dag); \
\
template void StaggeredKernels<IMPL>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U,DoubledGaugeField &UUU, \
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
SiteSpinor *buf, int LLs, int sU, \
const FermionField &in, FermionField &out, int dag); \
const FermionFieldView &in, FermionFieldView &out, int dag); \
\
template void StaggeredKernels<IMPL>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U,DoubledGaugeField &UUU, \
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
SiteSpinor *buf, int LLs, int sU, \
const FermionField &in, FermionField &out, int dag); \
const FermionFieldView &in, FermionFieldView &out, int dag); \
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD);
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF);
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD);
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplF);
#undef LOAD_CHI
NAMESPACE_END(Grid);
}
}

View File

@ -28,40 +28,38 @@ directory
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
namespace Grid {
namespace QCD {
#pragma once
int StaggeredKernelsStatic::Opt= StaggeredKernelsStatic::OptGeneric;
int StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsAndCompute;
NAMESPACE_BEGIN(Grid);
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \
SE = st.GetEntry(ptype, Dir+skew, sF); \
if (SE->_is_local ) { \
if (SE->_permute) { \
chi_p = &chi; \
permute(chi, in._odata[SE->_offset], ptype); \
permute(chi, in[SE->_offset], ptype); \
} else { \
chi_p = &in._odata[SE->_offset]; \
chi_p = &in[SE->_offset]; \
} \
} else { \
chi_p = &buf[SE->_offset]; \
} \
multLink(Uchi, U._odata[sU], *chi_p, Dir);
multLink(Uchi, U[sU], *chi_p, Dir);
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \
SE = st.GetEntry(ptype, Dir+skew, sF); \
if (SE->_is_local ) { \
if (SE->_permute) { \
chi_p = &chi; \
permute(chi, in._odata[SE->_offset], ptype); \
permute(chi, in[SE->_offset], ptype); \
} else { \
chi_p = &in._odata[SE->_offset]; \
chi_p = &in[SE->_offset]; \
} \
} else if ( st.same_node[Dir] ) { \
chi_p = &buf[SE->_offset]; \
} \
if (SE->_is_local || st.same_node[Dir] ) { \
multLink(Uchi, U._odata[sU], *chi_p, Dir); \
multLink(Uchi, U[sU], *chi_p, Dir); \
}
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
@ -69,7 +67,7 @@ int StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsAndCompute;
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
nmu++; \
chi_p = &buf[SE->_offset]; \
multLink(Uchi, U._odata[sU], *chi_p, Dir); \
multLink(Uchi, U[sU], *chi_p, Dir); \
}
template <class Impl>
@ -81,9 +79,9 @@ StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
////////////////////////////////////////////////////////////////////////////////////
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out, int dag) {
const FermionFieldView &in, FermionFieldView &out, int dag) {
const SiteSpinor *chi_p;
SiteSpinor chi;
SiteSpinor Uchi;
@ -114,7 +112,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
if ( dag ) {
Uchi = - Uchi;
}
vstream(out._odata[sF], Uchi);
vstream(out[sF], Uchi);
}
};
@ -123,9 +121,9 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
///////////////////////////////////////////////////
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag) {
const FermionFieldView &in, FermionFieldView &out,int dag) {
const SiteSpinor *chi_p;
SiteSpinor chi;
SiteSpinor Uchi;
@ -136,7 +134,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
for(int s=0;s<LLs;s++){
int sF=LLs*sU+s;
skew = 0;
Uchi=zero;
Uchi=Zero();
GENERIC_STENCIL_LEG_INT(U,Xp,skew,Impl::multLinkAdd);
GENERIC_STENCIL_LEG_INT(U,Yp,skew,Impl::multLinkAdd);
GENERIC_STENCIL_LEG_INT(U,Zp,skew,Impl::multLinkAdd);
@ -157,7 +155,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
if ( dag ) {
Uchi = - Uchi;
}
vstream(out._odata[sF], Uchi);
vstream(out[sF], Uchi);
}
};
@ -167,11 +165,11 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
///////////////////////////////////////////////////
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,int dag) {
const FermionFieldView &in, FermionFieldView &out,int dag) {
const SiteSpinor *chi_p;
SiteSpinor chi;
// SiteSpinor chi;
SiteSpinor Uchi;
StencilEntry *SE;
int ptype;
@ -181,7 +179,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
for(int s=0;s<LLs;s++){
int sF=LLs*sU+s;
skew = 0;
Uchi=zero;
Uchi=Zero();
GENERIC_STENCIL_LEG_EXT(U,Xp,skew,Impl::multLinkAdd);
GENERIC_STENCIL_LEG_EXT(U,Yp,skew,Impl::multLinkAdd);
GENERIC_STENCIL_LEG_EXT(U,Zp,skew,Impl::multLinkAdd);
@ -202,9 +200,9 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
if ( nmu ) {
if ( dag ) {
out._odata[sF] = out._odata[sF] - Uchi;
out[sF] = out[sF] - Uchi;
} else {
out._odata[sF] = out._odata[sF] + Uchi;
out[sF] = out[sF] + Uchi;
}
}
}
@ -215,9 +213,9 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
////////////////////////////////////////////////////////////////////////////////////
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,
const FermionFieldView &in, FermionFieldView &out,
int interior,int exterior)
{
int dag=1;
@ -225,9 +223,9 @@ void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, Dou
};
template <class Impl>
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out,
const FermionFieldView &in, FermionFieldView &out,
int interior,int exterior)
{
int dag=0;
@ -235,9 +233,9 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
};
template <class Impl>
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out,
int sU, const FermionFieldView &in, FermionFieldView &out,
int dag,int interior,int exterior)
{
switch(Opt) {
@ -277,8 +275,8 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
};
template <class Impl>
void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int disp)
void StaggeredKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
{
// Disp should be either +1,-1,+3,-3
// What about "dag" ?
@ -287,8 +285,6 @@ void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U, Do
assert(0);
}
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
NAMESPACE_END(Grid);
}}

View File

@ -26,23 +26,21 @@
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
//#include <Grid/Eigen/Dense>
#include <Grid/qcd/spin/Dirac.h>
namespace Grid
{
namespace QCD
{
#include <Grid/Grid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h>
NAMESPACE_BEGIN(Grid);
// *NOT* EO
template <class Impl>
RealD WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
{
FermionField temp(out._grid);
FermionField temp(out.Grid());
// Wilson term
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
this->Dhop(in, out, DaggerNo);
// Clover term
@ -55,10 +53,10 @@ RealD WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
template <class Impl>
RealD WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
{
FermionField temp(out._grid);
FermionField temp(out.Grid());
// Wilson term
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
this->Dhop(in, out, DaggerYes);
// Clover term
@ -72,7 +70,7 @@ template <class Impl>
void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
{
WilsonFermion<Impl>::ImportGauge(_Umu);
GridBase *grid = _Umu._grid;
GridBase *grid = _Umu.Grid();
typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid);
// Compute the field strength terms mu>nu
@ -93,27 +91,29 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
CloverTerm += fillCloverZT(Ez) * csw_t;
CloverTerm += diag_mass;
int lvol = _Umu._grid->lSites();
int lvol = _Umu.Grid()->lSites();
int DimRep = Impl::Dimension;
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
std::vector<int> lcoor;
typename SiteCloverType::scalar_object Qx = zero, Qxinv = zero;
Coordinate lcoor;
typename SiteCloverType::scalar_object Qx = Zero(), Qxinv = Zero();
for (int site = 0; site < lvol; site++)
{
grid->LocalIndexToLocalCoor(site, lcoor);
EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
peekLocalSite(Qx, CloverTerm, lcoor);
Qxinv = zero;
Qxinv = Zero();
//if (csw!=0){
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++)
EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b);
for (int b = 0; b < DimRep; b++){
auto zz = Qx()(j, k)(a, b);
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
}
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
EigenInvCloverOp = EigenCloverOp.inverse();
@ -169,15 +169,15 @@ void WilsonCloverFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField
template <class Impl>
void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv)
{
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
CloverFieldType *Clover;
assert(in.checkerboard == Odd || in.checkerboard == Even);
assert(in.Checkerboard() == Odd || in.Checkerboard() == Even);
if (dag)
{
if (in._grid->_isCheckerBoarded)
if (in.Grid()->_isCheckerBoarded)
{
if (in.checkerboard == Odd)
if (in.Checkerboard() == Odd)
{
Clover = (inv) ? &CloverTermInvDagOdd : &CloverTermDagOdd;
}
@ -195,10 +195,10 @@ void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionFie
}
else
{
if (in._grid->_isCheckerBoarded)
if (in.Grid()->_isCheckerBoarded)
{
if (in.checkerboard == Odd)
if (in.Checkerboard() == Odd)
{
// std::cout << "Calling clover term Odd" << std::endl;
Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd;
@ -209,7 +209,7 @@ void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionFie
Clover = (inv) ? &CloverTermInvEven : &CloverTermEven;
}
out = *Clover * in;
// std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl;
// std::cout << GridLogMessage << "*Clover.Checkerboard() " << (*Clover).Checkerboard() << std::endl;
}
else
{
@ -235,9 +235,4 @@ void WilsonCloverFermion<Impl>::MeeDeriv(GaugeField &mat, const FermionField &U,
assert(0); // not implemented yet
}
FermOpTemplateInstantiate(WilsonCloverFermion);
AdjointFermOpTemplateInstantiate(WilsonCloverFermion);
TwoIndexFermOpTemplateInstantiate(WilsonCloverFermion);
//GparityFermOpTemplateInstantiate(WilsonCloverFermion);
}
}
NAMESPACE_END(Grid);

View File

@ -36,13 +36,8 @@ Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
#include <Grid/perfmon/PerfCount.h>
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
// S-direction is INNERMOST and takes no part in the parity.
const std::vector<int> WilsonFermion5DStatic::directions ({1,2,3,4, 1, 2, 3, 4});
const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1});
// 5d lattice for DWF.
template<class Impl>
WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
@ -56,9 +51,9 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
_FourDimGrid (&FourDimGrid),
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
Stencil (_FiveDimGrid,npoint,Even,directions,displacements,p),
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd
M5(_M5),
Umu(_FourDimGrid),
UmuEven(_FourDimRedBlackGrid),
@ -105,8 +100,8 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
for(int d=0;d<4;d++){
assert(FourDimGrid._simd_layout[d]=1);
assert(FourDimRedBlackGrid._simd_layout[d]=1);
assert(FourDimGrid._simd_layout[d]==1);
assert(FourDimRedBlackGrid._simd_layout[d]==1);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
}
@ -141,7 +136,7 @@ void WilsonFermion5D<Impl>::Report(void)
RealD NP = _FourDimGrid->_Nprocessors;
RealD NN = _FourDimGrid->NodeCount();
RealD volume = Ls;
std::vector<int> latt = _FourDimGrid->GlobalDimensions();
Coordinate latt = _FourDimGrid->GlobalDimensions();
for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
if ( DhopCalls > 0 ) {
@ -221,7 +216,7 @@ void WilsonFermion5D<Impl>::ZeroCounters(void) {
template<class Impl>
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
{
GaugeField HUmu(_Umu._grid);
GaugeField HUmu(_Umu.Grid());
HUmu = _Umu*(-0.5);
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
pickCheckerboard(Even,UmuEven,Umu);
@ -235,51 +230,43 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
// assert( (disp==1)||(disp==-1) );
// assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in,compressor);
int skip = (disp==1) ? 0 : 1;
int dirdisp = dir+skip*4;
int gamma = dir+(1-skip)*4;
assert(dirdisp<=7);
assert(dirdisp>=0);
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in,compressor);
uint64_t Nsite = Umu.Grid()->oSites();
Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
parallel_for(int ss=0;ss<Umu._grid->oSites();ss++){
for(int s=0;s<Ls;s++){
int sU=ss;
int sF = s+Ls*sU;
Kernels::DhopDir(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
}
}
};
template<class Impl>
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
DerivCalls++;
assert((dag==DaggerNo) ||(dag==DaggerYes));
conformable(st._grid,A._grid);
conformable(st._grid,B._grid);
conformable(st.Grid(),A.Grid());
conformable(st.Grid(),B.Grid());
Compressor compressor(dag);
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
FermionField Btilde(B.Grid());
FermionField Atilde(B.Grid());
DerivCommTime-=usecond();
st.HaloExchange(B,compressor);
DerivCommTime+=usecond();
Atilde=A;
int LLs = B._grid->_rdimensions[0];
int LLs = B.Grid()->_rdimensions[0];
DerivComputeTime-=usecond();
@ -295,21 +282,11 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
////////////////////////
DerivDhopComputeTime -= usecond();
parallel_for (int sss = 0; sss < U._grid->oSites(); sss++) {
for (int s = 0; s < Ls; s++) {
int sU = sss;
int sF = s + Ls * sU;
assert(sF < B._grid->oSites());
assert(sU < U._grid->oSites());
int Usites = U.Grid()->oSites();
Kernels::DhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma);
////////////////////////////
// spin trace outer product
////////////////////////////
}
}
////////////////////////////
// spin trace outer product
////////////////////////////
@ -325,12 +302,13 @@ void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
const FermionField &B,
int dag)
{
conformable(A._grid,FermionGrid());
conformable(A._grid,B._grid);
conformable(A.Grid(),FermionGrid());
conformable(A.Grid(),B.Grid());
//conformable(GaugeGrid(),mat._grid);// this is not general! leaving as a comment
//conformable(GaugeGrid(),mat.Grid());// this is not general! leaving as a comment
mat.checkerboard = A.checkerboard;
mat.Checkerboard() = A.Checkerboard();
// mat.checkerboard = A.checkerboard;
DerivInternal(Stencil,Umu,mat,A,B,dag);
}
@ -341,12 +319,12 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &B,
int dag)
{
conformable(A._grid,FermionRedBlackGrid());
conformable(A._grid,B._grid);
conformable(A.Grid(),FermionRedBlackGrid());
conformable(A.Grid(),B.Grid());
assert(B.checkerboard==Odd);
assert(A.checkerboard==Even);
mat.checkerboard = Even;
assert(B.Checkerboard()==Odd);
assert(A.Checkerboard()==Even);
mat.Checkerboard() = Even;
DerivInternal(StencilOdd,UmuEven,mat,A,B,dag);
}
@ -358,12 +336,12 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &B,
int dag)
{
conformable(A._grid,FermionRedBlackGrid());
conformable(A._grid,B._grid);
conformable(A.Grid(),FermionRedBlackGrid());
conformable(A.Grid(),B.Grid());
assert(B.checkerboard==Even);
assert(A.checkerboard==Odd);
mat.checkerboard = Odd;
assert(B.Checkerboard()==Even);
assert(A.Checkerboard()==Odd);
mat.Checkerboard() = Odd;
DerivInternal(StencilEven,UmuOdd,mat,A,B,dag);
}
@ -374,11 +352,9 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
const FermionField &in, FermionField &out,int dag)
{
DhopTotalTime-=usecond();
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
else
#endif
DhopInternalSerialComms(st,lo,U,in,out,dag);
DhopTotalTime+=usecond();
}
@ -389,131 +365,84 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
#ifdef GRID_OMP
// assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag);
int LLs = in._grid->_rdimensions[0];
int len = U._grid->oSites();
int LLs = in.Grid()->_rdimensions[0];
int len = U.Grid()->oSites();
/////////////////////////////
// Start comms // Gather intranode and extra node differentiated??
/////////////////////////////
DhopFaceTime-=usecond();
st.HaloExchangeOptGather(in,compressor);
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
DhopFaceTime+=usecond();
double ctime=0;
double ptime=0;
DhopCommTime -=usecond();
std::vector<std::vector<CommsRequest_t> > requests;
st.CommunicateBegin(requests);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Ugly explicit thread mapping introduced for OPA reasons.
//////////////////////////////////////////////////////////////////////////////////////////////////////
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
{
int tid = omp_get_thread_num();
int nthreads = omp_get_num_threads();
int ncomms = CartesianCommunicator::nCommThreads;
if (ncomms == -1) ncomms = 1;
assert(nthreads > ncomms);
if (tid >= ncomms) {
double start = usecond();
nthreads -= ncomms;
int ttid = tid - ncomms;
int n = U._grid->oSites();
int chunk = n / nthreads;
int rem = n % nthreads;
int myblock, myn;
if (ttid < rem) {
myblock = ttid * chunk + ttid;
myn = chunk+1;
} else {
myblock = ttid*chunk + rem;
myn = chunk;
}
/////////////////////////////
// Overlap with comms
/////////////////////////////
DhopFaceTime-=usecond();
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
DhopFaceTime+=usecond();
// do the compute
if (dag == DaggerYes) {
for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
}
} else {
for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
}
}
ptime = usecond() - start;
} else {
double start = usecond();
st.CommunicateThreaded();
ctime = usecond() - start;
}
/////////////////////////////
// do the compute interior
/////////////////////////////
int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know
DhopComputeTime-=usecond();
if (dag == DaggerYes) {
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
} else {
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
}
DhopCommTime += ctime;
DhopComputeTime+=ptime;
DhopComputeTime+=usecond();
// First to enter, last to leave timing
st.CollateThreads();
/////////////////////////////
// Complete comms
/////////////////////////////
st.CommunicateComplete(requests);
DhopCommTime +=usecond();
/////////////////////////////
// do the compute exterior
/////////////////////////////
DhopFaceTime-=usecond();
st.CommsMerge(compressor);
DhopFaceTime+=usecond();
DhopComputeTime2-=usecond();
if (dag == DaggerYes) {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
} else {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
}
DhopComputeTime2+=usecond();
#else
assert(0);
#endif
}
template<class Impl>
void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeField & U,
const FermionField &in,
FermionField &out,int dag)
{
// assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag);
int LLs = in._grid->_rdimensions[0];
int LLs = in.Grid()->_rdimensions[0];
DhopCommTime-=usecond();
st.HaloExchangeOpt(in,compressor);
DhopCommTime+=usecond();
DhopComputeTime-=usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
}
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
} else {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
}
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
}
DhopComputeTime+=usecond();
}
@ -523,11 +452,11 @@ template<class Impl>
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
{
DhopCalls++;
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.checkerboard==Even);
out.checkerboard = Odd;
assert(in.Checkerboard()==Even);
out.Checkerboard() = Odd;
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
}
@ -535,11 +464,11 @@ template<class Impl>
void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
{
DhopCalls++;
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.checkerboard==Odd);
out.checkerboard = Even;
assert(in.Checkerboard()==Odd);
out.Checkerboard() = Even;
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
}
@ -547,17 +476,17 @@ template<class Impl>
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
{
DhopCalls+=2;
conformable(in._grid,FermionGrid()); // verifies full grid
conformable(in._grid,out._grid);
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
}
template<class Impl>
void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
{
out.checkerboard=in.checkerboard;
out.Checkerboard()=in.Checkerboard();
Dhop(in,out,dag); // -0.5 is included
axpy(out,4.0-M5,in,out);
}
@ -569,7 +498,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
GridBase *_grid = _FourDimGrid;
GridBase *_5dgrid = _FiveDimGrid;
conformable(_5dgrid,out._grid);
conformable(_5dgrid,out.Grid());
FermionField PRsource(_5dgrid);
FermionField PLsource(_5dgrid);
@ -580,7 +509,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
FermionField bufL_4d(_grid);
FermionField bufR_4d(_grid);
unsigned int Ls = in._grid->_rdimensions[0];
unsigned int Ls = in.Grid()->_rdimensions[0];
typedef typename FermionField::vector_type vector_type;
typedef typename FermionField::scalar_type ScalComplex;
@ -596,12 +525,12 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
Gamma g5(Gamma::Algebra::Gamma5);
std::vector<int> latt_size = _grid->_fdimensions;
Coordinate latt_size = _grid->_fdimensions;
LatComplex sk(_grid); sk = zero;
LatComplex sk2(_grid); sk2= zero;
LatComplex W(_grid); W= zero;
LatComplex a(_grid); a= zero;
LatComplex sk(_grid); sk = Zero();
LatComplex sk2(_grid); sk2= Zero();
LatComplex W(_grid); W= Zero();
LatComplex a(_grid); a= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
LatComplex cosha(_grid);
LatComplex kmu(_grid);
@ -643,9 +572,9 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
// FIXME Need a Lattice acosh
for(int idx=0;idx<_grid->lSites();idx++){
std::vector<int> lcoor(Nd);
Coordinate lcoor(Nd);
Tcomplex cc;
RealD sgn;
// RealD sgn;
_grid->LocalIndexToLocalCoor(idx,lcoor);
peekLocalSite(cc,cosha,lcoor);
assert((double)real(cc)>=1.0);
@ -678,8 +607,8 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
//calculate GR, GL
for(unsigned int ss=1;ss<=Ls;ss++)
{
bufR_4d = zero;
bufL_4d = zero;
bufR_4d = Zero();
bufL_4d = Zero();
for(unsigned int tt=1;tt<=Ls;tt++)
{
//possible sign if W<0
@ -688,7 +617,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
unsigned int f = (ss > tt) ? ss-tt : tt-ss; //f = abs(ss-tt)
//GR
buf1_4d = zero;
buf1_4d = Zero();
ExtractSlice(buf1_4d, PRsource, (tt-1), 0);
//G(s,t)
bufR_4d = bufR_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf1_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf1_4d;
@ -702,7 +631,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
bufR_4d = bufR_4d + Amm * exp(-a*ss) * exp(-a*tt) * signW * buf1_4d ;
//GL
buf2_4d = zero;
buf2_4d = Zero();
ExtractSlice(buf2_4d, PLsource, (tt-1), 0);
//G(s,t)
bufL_4d = bufL_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf2_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf2_4d;
@ -722,13 +651,13 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
//calculate propagator
for(unsigned int ss=1;ss<=Ls;ss++)
{
bufR_4d = zero;
bufL_4d = zero;
bufR_4d = Zero();
bufL_4d = Zero();
//(i*gamma_mu*sin(p_mu) - W)*(GL*P- source)
buf1_4d = zero;
buf1_4d = Zero();
ExtractSlice(buf1_4d, GL, (ss-1), 0);
buf2_4d = zero;
buf2_4d = Zero();
for(int mu=0;mu<Nd;mu++) {
LatticeCoordinate(kmu,mu);
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
@ -738,9 +667,9 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
bufL_4d = buf2_4d - W * buf1_4d;
//(i*gamma_mu*sin(p_mu) - W)*(GR*P+ source)
buf1_4d = zero;
buf1_4d = Zero();
ExtractSlice(buf1_4d, GR, (ss-1), 0);
buf2_4d = zero;
buf2_4d = Zero();
for(int mu=0;mu<Nd;mu++) {
LatticeCoordinate(kmu,mu);
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
@ -781,7 +710,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
{
// what type LatticeComplex
GridBase *_grid = _FourDimGrid;
conformable(_grid,out._grid);
conformable(_grid,out.Grid());
typedef typename FermionField::vector_type vector_type;
typedef typename FermionField::scalar_type ScalComplex;
@ -795,17 +724,17 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
Gamma::Algebra::GammaT
};
std::vector<int> latt_size = _grid->_fdimensions;
Coordinate latt_size = _grid->_fdimensions;
FermionField num (_grid); num = zero;
FermionField num (_grid); num = Zero();
LatComplex sk(_grid); sk = zero;
LatComplex sk2(_grid); sk2= zero;
LatComplex W(_grid); W= zero;
LatComplex a(_grid); a= zero;
LatComplex sk(_grid); sk = Zero();
LatComplex sk2(_grid); sk2= Zero();
LatComplex W(_grid); W= Zero();
LatComplex a(_grid); a= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
LatComplex denom(_grid); denom= zero;
LatComplex denom(_grid); denom= Zero();
LatComplex cosha(_grid);
LatComplex kmu(_grid);
LatComplex Wea(_grid);
@ -838,9 +767,9 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
// FIXME Need a Lattice acosh
for(int idx=0;idx<_grid->lSites();idx++){
std::vector<int> lcoor(Nd);
Coordinate lcoor(Nd);
Tcomplex cc;
RealD sgn;
// RealD sgn;
_grid->LocalIndexToLocalCoor(idx,lcoor);
peekLocalSite(cc,cosha,lcoor);
assert((double)real(cc)>=1.0);
@ -868,7 +797,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
};
GridBase *_grid = _FourDimGrid;
conformable(_grid,out._grid);
conformable(_grid,out.Grid());
typedef typename FermionField::vector_type vector_type;
typedef typename FermionField::scalar_type ScalComplex;
@ -876,18 +805,18 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
typedef Lattice<iSinglet<vector_type> > LatComplex;
std::vector<int> latt_size = _grid->_fdimensions;
Coordinate latt_size = _grid->_fdimensions;
LatComplex sk(_grid); sk = zero;
LatComplex sk2(_grid); sk2= zero;
LatComplex sk(_grid); sk = Zero();
LatComplex sk2(_grid); sk2= Zero();
LatComplex w_k(_grid); w_k= zero;
LatComplex b_k(_grid); b_k= zero;
LatComplex w_k(_grid); w_k= Zero();
LatComplex b_k(_grid); b_k= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
FermionField num (_grid); num = zero;
LatComplex denom(_grid); denom= zero;
FermionField num (_grid); num = Zero();
LatComplex denom(_grid); denom= Zero();
LatComplex kmu(_grid);
ScalComplex ci(0.0,1.0);
@ -928,7 +857,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
// Helper macro to reverse Simd vector. Fixme: slow, generic implementation.
#define REVERSE_LS(qSite, qSiteRev, Nsimd) \
{ \
std::vector<typename SitePropagator::scalar_object> qSiteVec(Nsimd); \
ExtractBuffer<typename SitePropagator::scalar_object> qSiteVec(Nsimd); \
extract(qSite, qSiteVec); \
for (int i = 0; i < Nsimd / 2; ++i) \
{ \
@ -946,31 +875,35 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
template<class vobj>
Lattice<vobj> spProj5p(const Lattice<vobj> & in)
{
GridBase *grid=in._grid;
GridBase *grid=in.Grid();
Gamma G5(Gamma::Algebra::Gamma5);
Lattice<vobj> ret(grid);
parallel_for(int ss=0;ss<grid->oSites();ss++){
ret._odata[ss] = in._odata[ss] + G5*in._odata[ss];
}
auto ret_v = ret.View();
auto in_v = in.View();
thread_for(ss,grid->oSites(),{
ret_v[ss] = in_v[ss] + G5*in_v[ss];
});
return ret;
}
template<class vobj>
Lattice<vobj> spProj5m(const Lattice<vobj> & in)
{
Gamma G5(Gamma::Algebra::Gamma5);
GridBase *grid=in._grid;
GridBase *grid=in.Grid();
Lattice<vobj> ret(grid);
parallel_for(int ss=0;ss<grid->oSites();ss++){
ret._odata[ss] = in._odata[ss] - G5*in._odata[ss];
}
auto ret_v = ret.View();
auto in_v = in.View();
thread_for(ss,grid->oSites(),{
ret_v[ss] = in_v[ss] - G5*in_v[ss];
});
return ret;
}
template <class Impl>
void WilsonFermion5D<Impl>::ContractJ5q(FermionField &q_in,ComplexField &J5q)
{
conformable(GaugeGrid(), J5q._grid);
conformable(q_in._grid, FermionGrid());
conformable(GaugeGrid(), J5q.Grid());
conformable(q_in.Grid(), FermionGrid());
// 4d field
int Ls = this->Ls;
@ -990,8 +923,8 @@ void WilsonFermion5D<Impl>::ContractJ5q(FermionField &q_in,ComplexField &J5q)
template <class Impl>
void WilsonFermion5D<Impl>::ContractJ5q(PropagatorField &q_in,ComplexField &J5q)
{
conformable(GaugeGrid(), J5q._grid);
conformable(q_in._grid, FermionGrid());
conformable(GaugeGrid(), J5q.Grid());
conformable(q_in.Grid(), FermionGrid());
// 4d field
int Ls = this->Ls;
@ -1015,20 +948,26 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
Current curr_type,
unsigned int mu)
{
conformable(q_in_1._grid, FermionGrid());
conformable(q_in_1._grid, q_in_2._grid);
conformable(_FourDimGrid, q_out._grid);
conformable(q_in_1.Grid(), FermionGrid());
conformable(q_in_1.Grid(), q_in_2.Grid());
conformable(_FourDimGrid, q_out.Grid());
PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid());
unsigned int LLs = q_in_1._grid->_rdimensions[0];
q_out = zero;
unsigned int LLs = q_in_1.Grid()->_rdimensions[0];
q_out = Zero();
// Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s),
// q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one.
tmp1 = Cshift(q_in_1, mu + 1, 1);
tmp2 = Cshift(q_in_2, mu + 1, 1);
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
{
auto q_in_1_v = q_in_1.View();
auto q_in_2_v = q_in_2.View();
auto tmp1_v = tmp1.View();
auto tmp2_v = tmp2.View();
auto q_out_v = q_out.View();
auto Umu_v = Umu.View();
thread_for(sU, Umu.Grid()->oSites(),{
unsigned int sF1 = sU * LLs;
unsigned int sF2 = (sU + 1) * LLs - 1;
@ -1042,26 +981,26 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
// sites correctly.
if (Impl::LsVectorised)
{
REVERSE_LS(q_in_2._odata[sF2], qSite2, Ls / LLs);
REVERSE_LS(tmp2._odata[sF2], qmuSite2, Ls / LLs);
REVERSE_LS(q_in_2_v[sF2], qSite2, Ls / LLs);
REVERSE_LS(tmp2_v[sF2], qmuSite2, Ls / LLs);
}
else
{
qSite2 = q_in_2._odata[sF2];
qmuSite2 = tmp2._odata[sF2];
qSite2 = q_in_2_v[sF2];
qmuSite2 = tmp2_v[sF2];
}
Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sF1],
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sF1],
qSite2,
q_out._odata[sU],
Umu, sU, mu, axial_sign);
Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sF1],
q_out_v[sU],
Umu_v, sU, mu, axial_sign);
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sF1],
qmuSite2,
q_out._odata[sU],
Umu, sU, mu, axial_sign);
q_out_v[sU],
Umu_v, sU, mu, axial_sign);
sF1++;
sF2--;
}
}
});
}
@ -1074,18 +1013,21 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int tmax,
ComplexField &lattice_cmplx)
{
conformable(q_in._grid, FermionGrid());
conformable(q_in._grid, q_out._grid);
conformable(q_in.Grid(), FermionGrid());
conformable(q_in.Grid(), q_out.Grid());
PropagatorField tmp(GaugeGrid()),tmp2(GaugeGrid());
unsigned int tshift = (mu == Tp) ? 1 : 0;
unsigned int LLs = q_in._grid->_rdimensions[0];
unsigned int LLs = q_in.Grid()->_rdimensions[0];
unsigned int LLt = GridDefaultLatt()[Tp];
q_out = zero;
q_out = Zero();
LatticeInteger coords(_FourDimGrid);
LatticeCoordinate(coords, Tp);
auto q_out_v = q_out.View();
auto tmp2_v = tmp2.View();
auto coords_v= coords.View();
auto Umu_v = Umu.View();
for (unsigned int s = 0; s < LLs; ++s)
{
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
@ -1098,59 +1040,51 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
tmp = Cshift(tmp2, mu, 1); //q(x+mu,s)
tmp2 = tmp*lattice_cmplx; //q(x+mu,s)*A(x)
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
{
thread_for(sU, Umu.Grid()->oSites(),{
// Compute the sequential conserved current insertion only if our simd
// object contains a timeslice we need.
vInteger t_mask = ((coords._odata[sU] >= tmin) &&
(coords._odata[sU] <= tmax));
Integer timeSlices = Reduce(t_mask);
vPredicate t_mask;
t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax));
Integer timeSlices = Reduce(t_mask());
if (timeSlices > 0)
{
unsigned int sF = sU * LLs + s;
Kernels::SeqConservedCurrentSiteFwd(tmp2._odata[sU],
q_out._odata[sF], Umu, sU,
mu, t_mask, switch_sgn);
Kernels::SeqConservedCurrentSiteFwd(tmp2_v[sU],
q_out_v[sF], Umu_v, sU,
mu, t_mask, switch_sgn);
}
}
});
//backward direction: Need q(x - mu, s)*A(x-mu)
ExtractSlice(tmp2, q_in, s, 0); //q(x,s)
tmp = lattice_cmplx*tmp2; //q(x,s)*A(x)
tmp2 = Cshift(tmp, mu, -1); //q(x-mu,s)*A(x-mu,s)
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
thread_for(sU, Umu.Grid()->oSites(),
{
vInteger t_mask = ((coords._odata[sU] >= (tmin + tshift)) &&
(coords._odata[sU] <= (tmax + tshift)));
vPredicate t_mask;
t_mask()= ((coords_v[sU] >= (tmin + tshift)) && (coords_v[sU] <= (tmax + tshift)));
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords._odata[sU] == t0 ));
Integer timeSlices = Reduce(t_mask);
if (timeSlices > 0)
{
unsigned int sF = sU * LLs + s;
Kernels::SeqConservedCurrentSiteBwd(tmp2._odata[sU],
q_out._odata[sF], Umu, sU,
mu, t_mask, axial_sign);
}
}
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 ));
Integer timeSlices = Reduce(t_mask());
if (timeSlices > 0) {
unsigned int sF = sU * LLs + s;
Kernels::SeqConservedCurrentSiteBwd(tmp2_v[sU],
q_out_v[sF], Umu_v, sU,
mu, t_mask, axial_sign);
}
});
}
}
FermOpTemplateInstantiate(WilsonFermion5D);
GparityFermOpTemplateInstantiate(WilsonFermion5D);
}}
NAMESPACE_END(Grid);

View File

@ -1,4 +1,3 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -29,16 +28,11 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonFermion.h>
namespace Grid {
namespace QCD {
const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
int WilsonFermionStatic::HandOptDslash;
NAMESPACE_BEGIN(Grid);
/////////////////////////////////
// Constructor and gauge import
@ -49,18 +43,19 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid, RealD _mass,
const ImplParams &p,
const WilsonAnisotropyCoefficients &anis)
: Kernels(p),
_grid(&Fgrid),
_cbgrid(&Hgrid),
Stencil(&Fgrid, npoint, Even, directions, displacements),
StencilEven(&Hgrid, npoint, Even, directions,displacements), // source is Even
StencilOdd(&Hgrid, npoint, Odd, directions,displacements), // source is Odd
mass(_mass),
Lebesgue(_grid),
LebesgueEvenOdd(_cbgrid),
Umu(&Fgrid),
UmuEven(&Hgrid),
UmuOdd(&Hgrid),
:
Kernels(p),
_grid(&Fgrid),
_cbgrid(&Hgrid),
Stencil(&Fgrid, npoint, Even, directions, displacements,p),
StencilEven(&Hgrid, npoint, Even, directions,displacements,p), // source is Even
StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p), // source is Odd
mass(_mass),
Lebesgue(_grid),
LebesgueEvenOdd(_cbgrid),
Umu(&Fgrid),
UmuEven(&Hgrid),
UmuOdd(&Hgrid),
_tmp(&Hgrid),
anisotropyCoeff(anis)
{
@ -76,8 +71,9 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
}
template <class Impl>
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) {
GaugeField HUmu(_Umu._grid);
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
{
GaugeField HUmu(_Umu.Grid());
//Here multiply the anisotropy coefficients
if (anisotropyCoeff.isAnisotropic)
@ -107,21 +103,21 @@ void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) {
template <class Impl>
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerNo);
return axpy_norm(out, diag_mass, in, out);
}
template <class Impl>
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerYes);
return axpy_norm(out, diag_mass, in, out);
}
template <class Impl>
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
@ -130,7 +126,7 @@ void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
template <class Impl>
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
@ -139,26 +135,26 @@ void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
template <class Impl>
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
typename FermionField::scalar_type scal(diag_mass);
out = scal * in;
}
template <class Impl>
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
Mooee(in, out);
}
template<class Impl>
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
out = (1.0/(diag_mass))*in;
}
template<class Impl>
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
MooeeInv(in,out);
}
template<class Impl>
@ -169,7 +165,7 @@ void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const Fermi
typedef Lattice<iSinglet<vector_type> > LatComplex;
// what type LatticeComplex
conformable(_grid,out._grid);
conformable(_grid,out.Grid());
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
@ -178,13 +174,13 @@ void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const Fermi
Gamma::Algebra::GammaT
};
std::vector<int> latt_size = _grid->_fdimensions;
Coordinate latt_size = _grid->_fdimensions;
FermionField num (_grid); num = zero;
LatComplex wilson(_grid); wilson= zero;
FermionField num (_grid); num = Zero();
LatComplex wilson(_grid); wilson= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
LatComplex denom(_grid); denom= zero;
LatComplex denom(_grid); denom= Zero();
LatComplex kmu(_grid);
ScalComplex ci(0.0,1.0);
// momphase = n * 2pi / L
@ -229,9 +225,9 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
Compressor compressor(dag);
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
Atilde = A;//redundant
FermionField Btilde(B.Grid());
FermionField Atilde(B.Grid());
Atilde = A;
st.HaloExchange(B, compressor);
@ -242,12 +238,8 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
int gamma = mu;
if (!dag) gamma += Nd;
////////////////////////
// Call the single hop
////////////////////////
parallel_for (int sss = 0; sss < B._grid->oSites(); sss++) {
Kernels::DhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, gamma);
}
int Ls=1;
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
//////////////////////////////////////////////////
// spin trace outer product
@ -258,70 +250,70 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
template <class Impl>
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _grid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
conformable(U.Grid(), _grid);
conformable(U.Grid(), V.Grid());
conformable(U.Grid(), mat.Grid());
mat.checkerboard = U.checkerboard;
mat.Checkerboard() = U.Checkerboard();
DerivInternal(Stencil, Umu, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
//conformable(U._grid, mat._grid); not general, leaving as a comment (Guido)
conformable(U.Grid(), _cbgrid);
conformable(U.Grid(), V.Grid());
//conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido)
// Motivation: look at the SchurDiff operator
assert(V.checkerboard == Even);
assert(U.checkerboard == Odd);
mat.checkerboard = Odd;
assert(V.Checkerboard() == Even);
assert(U.Checkerboard() == Odd);
mat.Checkerboard() = Odd;
DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
//conformable(U._grid, mat._grid);
conformable(U.Grid(), _cbgrid);
conformable(U.Grid(), V.Grid());
//conformable(U.Grid(), mat.Grid());
assert(V.checkerboard == Odd);
assert(U.checkerboard == Even);
mat.checkerboard = Even;
assert(V.Checkerboard() == Odd);
assert(U.Checkerboard() == Even);
mat.Checkerboard() = Even;
DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
conformable(in._grid, _grid); // verifies full grid
conformable(in._grid, out._grid);
conformable(in.Grid(), _grid); // verifies full grid
conformable(in.Grid(), out.Grid());
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check
assert(in.checkerboard == Even);
out.checkerboard = Odd;
assert(in.Checkerboard() == Even);
out.Checkerboard() = Odd;
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) {
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check
assert(in.checkerboard == Odd);
out.checkerboard = Even;
assert(in.Checkerboard() == Odd);
out.Checkerboard() = Even;
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
}
@ -332,7 +324,8 @@ void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int di
}
template <class Impl>
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) {
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
{
int skip = (disp == 1) ? 0 : 1;
int dirdisp = dir + skip * 4;
int gamma = dir + (1 - skip) * 4;
@ -341,16 +334,16 @@ void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int
};
template <class Impl>
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag) {
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
{
Compressor compressor(dag);
Stencil.HaloExchange(in, compressor);
int Ls=1;
int Nsite=in.oSites();
Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
};
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma);
}
}
/*Change starts*/
template <class Impl>
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
@ -367,71 +360,51 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
template <class Impl>
void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out, int dag) {
DoubledGaugeField &U,
const FermionField &in,
FermionField &out, int dag) {
assert((dag == DaggerNo) || (dag == DaggerYes));
#ifdef GRID_OMP
Compressor compressor;
int len = U._grid->oSites();
const int LLs = 1;
st.Prepare();
st.HaloGather(in,compressor);
st.CommsMergeSHM(compressor);
#pragma omp parallel
{
int tid = omp_get_thread_num();
int nthreads = omp_get_num_threads();
int ncomms = CartesianCommunicator::nCommThreads;
if (ncomms == -1) ncomms = 1;
assert(nthreads > ncomms);
if (tid >= ncomms) {
nthreads -= ncomms;
int ttid = tid - ncomms;
int n = len;
int chunk = n / nthreads;
int rem = n % nthreads;
int myblock, myn;
if (ttid < rem) {
myblock = ttid * chunk + ttid;
myn = chunk+1;
} else {
myblock = ttid*chunk + rem;
myn = chunk;
}
// do the compute
if (dag == DaggerYes) {
for (int sss = myblock; sss < myblock+myn; ++sss) {
Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
} else {
for (int sss = myblock; sss < myblock+myn; ++sss) {
Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
} //else
} else {
st.CommunicateThreaded();
}
Compressor compressor(dag);
int len = U.Grid()->oSites();
/////////////////////////////
// Start comms // Gather intranode and extra node differentiated??
/////////////////////////////
std::vector<std::vector<CommsRequest_t> > requests;
st.Prepare();
st.HaloGather(in,compressor);
st.CommunicateBegin(requests);
/////////////////////////////
// Overlap with comms
/////////////////////////////
st.CommsMergeSHM(compressor);
/////////////////////////////
// do the compute interior
/////////////////////////////
int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
} else {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
}
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
}
} //pragma
#else
assert(0);
#endif
/////////////////////////////
// Complete comms
/////////////////////////////
st.CommunicateComplete(requests);
st.CommsMerge(compressor);
/////////////////////////////
// do the compute exterior
/////////////////////////////
if (dag == DaggerYes) {
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
} else {
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
}
};
@ -444,14 +417,11 @@ void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
Compressor compressor(dag);
st.HaloExchange(in, compressor);
int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
} else {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
}
};
/*Change ends */
@ -468,28 +438,33 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
Current curr_type,
unsigned int mu)
{
Gamma g5(Gamma::Algebra::Gamma5);
conformable(_grid, q_in_1._grid);
conformable(_grid, q_in_2._grid);
conformable(_grid, q_out._grid);
PropagatorField tmp1(_grid), tmp2(_grid);
q_out = zero;
Gamma g5(Gamma::Algebra::Gamma5);
conformable(_grid, q_in_1.Grid());
conformable(_grid, q_in_2.Grid());
conformable(_grid, q_out.Grid());
PropagatorField tmp1(_grid), tmp2(_grid);
q_out = Zero();
// Forward, need q1(x + mu), q2(x). Backward, need q1(x), q2(x + mu).
// Inefficient comms method but not performance critical.
tmp1 = Cshift(q_in_1, mu, 1);
tmp2 = Cshift(q_in_2, mu, 1);
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
{
Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sU],
q_in_2._odata[sU],
q_out._odata[sU],
Umu, sU, mu);
Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sU],
tmp2._odata[sU],
q_out._odata[sU],
Umu, sU, mu);
}
// Forward, need q1(x + mu), q2(x). Backward, need q1(x), q2(x + mu).
// Inefficient comms method but not performance critical.
tmp1 = Cshift(q_in_1, mu, 1);
tmp2 = Cshift(q_in_2, mu, 1);
auto tmp1_v = tmp1.View();
auto tmp2_v = tmp2.View();
auto q_in_1_v=q_in_1.View();
auto q_in_2_v=q_in_2.View();
auto q_out_v = q_out.View();
auto Umu_v = Umu.View();
thread_for(sU, Umu.Grid()->oSites(),{
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU],
q_in_2_v[sU],
q_out_v[sU],
Umu_v, sU, mu);
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sU],
tmp2_v[sU],
q_out_v[sU],
Umu_v, sU, mu);
});
}
@ -502,61 +477,61 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int tmax,
ComplexField &lattice_cmplx)
{
conformable(_grid, q_in._grid);
conformable(_grid, q_out._grid);
PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid);
unsigned int tshift = (mu == Tp) ? 1 : 0;
unsigned int LLt = GridDefaultLatt()[Tp];
conformable(_grid, q_in.Grid());
conformable(_grid, q_out.Grid());
q_out = zero;
LatticeInteger coords(_grid);
LatticeCoordinate(coords, Tp);
// Lattice<iSinglet<Simd>> ph(_grid), coor(_grid);
Complex i(0.0,1.0);
PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid);
unsigned int tshift = (mu == Tp) ? 1 : 0;
unsigned int LLt = GridDefaultLatt()[Tp];
// Need q(x + mu) and q(x - mu).
tmp = Cshift(q_in, mu, 1);
tmpFwd = tmp*lattice_cmplx;
tmp = lattice_cmplx*q_in;
tmpBwd = Cshift(tmp, mu, -1);
q_out = Zero();
LatticeInteger coords(_grid);
LatticeCoordinate(coords, Tp);
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
{
// Compute the sequential conserved current insertion only if our simd
// object contains a timeslice we need.
vInteger t_mask = ((coords._odata[sU] >= tmin) &&
(coords._odata[sU] <= tmax));
Integer timeSlices = Reduce(t_mask);
// Need q(x + mu) and q(x - mu).
tmp = Cshift(q_in, mu, 1);
tmpFwd = tmp*lattice_cmplx;
tmp = lattice_cmplx*q_in;
tmpBwd = Cshift(tmp, mu, -1);
if (timeSlices > 0)
{
Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sU],
q_out._odata[sU],
Umu, sU, mu, t_mask);
}
auto coords_v = coords.View();
auto tmpFwd_v = tmpFwd.View();
auto tmpBwd_v = tmpBwd.View();
auto Umu_v = Umu.View();
auto q_out_v = q_out.View();
// Repeat for backward direction.
t_mask = ((coords._odata[sU] >= (tmin + tshift)) &&
(coords._odata[sU] <= (tmax + tshift)));
thread_for(sU, Umu.Grid()->oSites(), {
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords._odata[sU] == t0 ));
// Compute the sequential conserved current insertion only if our simd
// object contains a timeslice we need.
vPredicate t_mask;
t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax));
Integer timeSlices = Reduce(t_mask());
timeSlices = Reduce(t_mask);
if (timeSlices > 0)
{
Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sU],
q_out._odata[sU],
Umu, sU, mu, t_mask);
}
if (timeSlices > 0) {
Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU],
q_out_v[sU],
Umu_v, sU, mu, t_mask);
}
// Repeat for backward direction.
t_mask() = ((coords_v[sU] >= (tmin + tshift)) &&
(coords_v[sU] <= (tmax + tshift)));
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 ));
timeSlices = Reduce(t_mask());
if (timeSlices > 0) {
Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU],
q_out_v[sU],
Umu_v, sU, mu, t_mask);
}
});
}
FermOpTemplateInstantiate(WilsonFermion);
AdjointFermOpTemplateInstantiate(WilsonFermion);
TwoIndexFermOpTemplateInstantiate(WilsonFermion);
GparityFermOpTemplateInstantiate(WilsonFermion);
}
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,716 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#if defined(AVX512)
///////////////////////////////////////////////////////////
// If we are AVX512 specialise the single precision routine
///////////////////////////////////////////////////////////
#include <simd/Intel512wilson.h>
#include <simd/Intel512single.h>
/// Switch off the 5d vectorised code optimisations
#undef DWFVEC5D
static Vector<vComplexF> signsF;
template<typename vtype>
int setupSigns(Vector<vtype>& signs ){
Vector<vtype> bother(2);
signs = bother;
vrsign(signs[0]);
visign(signs[1]);
return 1;
}
static int signInitF = setupSigns(signsF);
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define COMPLEX_SIGNS(isigns) vComplexF *isigns = &signsF[0];
/////////////////////////////////////////////////////////////////
// XYZT vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
#define MAYBEPERM(A,B)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
/////////////////////////////////////////////////////////////////
// Ls vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#ifdef DWFVEC5D
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#undef MULT_2SPIN
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf)
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif // VEC 5D
#undef COMPLEX_SIGNS
#undef MAYBEPERM
#undef MULT_2SPIN
///////////////////////////////////////////////////////////
// If we are AVX512 specialise the double precision routine
///////////////////////////////////////////////////////////
#include <simd/Intel512double.h>
static Vector<vComplexD> signsD;
static int signInitD = setupSigns(signsD);
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define COMPLEX_SIGNS(isigns) vComplexD *isigns = &signsD[0];
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
/////////////////////////////////////////////////////////////////
// XYZT vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
#define MAYBEPERM(A,B)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
/////////////////////////////////////////////////////////////////
// Ls vectorised, undag Kernel, single
/////////////////////////////////////////////////////////////////
#ifdef DWFVEC5D
#undef KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#undef MULT_2SPIN
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf)
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif // VEC 5D
#undef COMPLEX_SIGNS
#undef MAYBEPERM
#undef MULT_2SPIN
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef UChi_00
#undef UChi_01
#undef UChi_02
#undef UChi_10
#undef UChi_11
#undef UChi_12
#undef UChi_20
#undef UChi_21
#undef UChi_22
#undef UChi_30
#undef UChi_31
#undef UChi_32
#undef Psi_00
#undef Psi_01
#undef Psi_02
#undef Psi_10
#undef Psi_11
#undef Psi_12
#undef Psi_20
#undef Psi_21
#undef Psi_22
#undef Psi_30
#undef Psi_31
#undef Psi_32
#undef Phi_00
#undef Phi_01
#undef Phi_02
#undef Phi_10
#undef Phi_11
#undef Phi_12
#undef Phi_20
#undef Phi_21
#undef Phi_22
#undef Phi_30
#undef Phi_31
#undef Phi_32
#endif //AVX512

View File

@ -130,16 +130,18 @@
int local,perm, ptype;
uint64_t base;
uint64_t basep;
const uint64_t plocal =(uint64_t) & in._odata[0];
const uint64_t plocal =(uint64_t) & in[0];
COMPLEX_SIGNS(isigns);
MASK_REGS;
int nmax=U._grid->oSites();
int nmax=U.oSites();
for(int site=0;site<Ns;site++) {
#ifndef EXTERIOR
int sU =lo.Reorder(ssU);
// int sU =lo.Reorder(ssU);
int sU =ssU;
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
int sUn=lo.Reorder(ssn);
// int sUn=lo.Reorder(ssn);
int sUn=ssn;
LOCK_GAUGE(0);
#else
int sU =ssU;
@ -166,7 +168,7 @@
if (nmu==0) break;
// if (nmu!=0) std::cout << "EXT "<<sU<<std::endl;
#endif
base = (uint64_t) &out._odata[ss];
base = (uint64_t) &out[ss];
basep= st.GetPFInfo(nent,plocal); nent++;
RESULT(base,basep);
}

View File

@ -0,0 +1,86 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsm.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////////////////////////
// Default to no assembler implementation
// Will specialise to
///////////////////////////////////////////////////////////
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
NAMESPACE_END(Grid);

View File

@ -28,7 +28,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#if defined(QPX)
@ -52,18 +52,18 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
WilsonKernels<WilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
@ -75,18 +75,18 @@ WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,Do
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
template<> void
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
@ -104,9 +104,9 @@ WilsonKernels<DomainWallVec5dImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrde
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
WilsonKernels<WilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
@ -115,9 +115,9 @@ WilsonKernels<WilsonImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,Doubl
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
#undef MAYBEPERM
@ -129,9 +129,9 @@ WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,Do
/////////////////////////////////////////////////////////////////
#undef KERNEL_DAG
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
@ -139,9 +139,9 @@ WilsonKernels<DomainWallVec5dImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder &
/////////////////////////////////////////////////////////////////
#define KERNEL_DAG
template<> void
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
WilsonKernels<DomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
#undef MAYBEPERM

View File

@ -26,6 +26,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
#define REGISTER
@ -45,7 +48,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
Chimu_32=ref(F)(3)(2)
#define LOAD_CHIMU(DIR,F,PERM) \
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
{ const SiteSpinor & ref (in[offset]); LOAD_CHIMU_BODY(F); }
#define LOAD_CHI_BODY(F) \
Chi_00 = ref(F)(0)(0);\
@ -92,9 +95,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
g = F; \
direction = st._directions[DIR]; \
distance = st._distances[DIR]; \
sl = st._grid->_simd_layout[direction]; \
sl = st._simd_layout[direction]; \
inplace_twist = 0; \
if(SE->_around_the_world && this->Params.twists[DIR % 4]){ \
if(SE->_around_the_world && st.parameters.twists[DIR % 4]){ \
if(sl == 1){ \
g = (F+1) % 2; \
}else{ \
@ -103,7 +106,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
}
#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
{ const SiteSpinor &ref(in._odata[offset]); \
{ const SiteSpinor &ref(in[offset]); \
LOAD_CHI_SETUP(DIR,F); \
if(!inplace_twist){ \
LOAD_CHIMU_BODY(g); \
@ -201,10 +204,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define MULT_2SPIN(A,F) \
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
{auto & ref(U[sU](A)); MULT_2SPIN_BODY; }
#define MULT_2SPIN_GPARITY(A,F) \
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
{auto & ref(U[sU](F)(A)); MULT_2SPIN_BODY; }
#define PERMUTE_DIR(dir) \
@ -468,8 +471,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
perm = SE->_permute; \
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
LOAD_CHI_IMPL(DIR,F,PERM); \
MULT_2SPIN_IMPL(DIR,F); \
@ -479,7 +481,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define HAND_RESULT(ss,F) \
{ \
SiteSpinor & ref (out._odata[ss]); \
SiteSpinor & ref (out[ss]); \
vstream(ref(F)(0)(0),result_00); \
vstream(ref(F)(0)(1),result_01); \
vstream(ref(F)(0)(2),result_02); \
@ -496,7 +498,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define HAND_RESULT_EXT(ss,F) \
if (nmu){ \
SiteSpinor & ref (out._odata[ss]); \
SiteSpinor & ref (out[ss]); \
ref(F)(0)(0)+=result_00; \
ref(F)(0)(1)+=result_01; \
ref(F)(0)(2)+=result_02; \
@ -545,18 +547,18 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
Simd U_21;
#define ZERO_RESULT \
result_00=zero; \
result_01=zero; \
result_02=zero; \
result_10=zero; \
result_11=zero; \
result_12=zero; \
result_20=zero; \
result_21=zero; \
result_22=zero; \
result_30=zero; \
result_31=zero; \
result_32=zero;
result_00=Zero(); \
result_01=Zero(); \
result_02=Zero(); \
result_10=Zero(); \
result_11=Zero(); \
result_12=Zero(); \
result_20=Zero(); \
result_21=Zero(); \
result_22=Zero(); \
result_30=Zero(); \
result_31=Zero(); \
result_32=Zero();
#define Chimu_00 Chi_00
#define Chimu_01 Chi_01
@ -571,21 +573,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define Chimu_31 UChi_11
#define Chimu_32 UChi_12
namespace Grid {
namespace QCD {
template<class Impl> void
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
NAMESPACE_BEGIN(Grid);
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
@ -598,21 +586,6 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
@ -624,22 +597,6 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
@ -652,21 +609,6 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
@ -678,23 +620,6 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
int nmu=0;
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
@ -708,22 +633,6 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT_EXT(ss,F)
HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
int nmu=0;
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
@ -736,13 +645,10 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT_EXT(ss,F)
HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
#define HAND_SPECIALISE_GPARITY(IMPL) \
template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -756,9 +662,9 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -772,9 +678,9 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -788,9 +694,9 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
\
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -805,8 +711,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
} \
\
template<> void \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -814,16 +720,16 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
\
HAND_DECLARATIONS(ignore); \
\
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
int offset,perm, ptype, g, direction, distance, sl, inplace_twist; \
StencilEntry *SE; \
int nmu=0; \
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
nmu = 0; \
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -832,47 +738,11 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DECLARATIONS(ignore); \
\
StencilEntry *SE; \
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
int offset,perm, ptype, g, direction, distance, sl, inplace_twist; \
int nmu=0; \
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
nmu = 0; \
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
}
HAND_SPECIALISE_GPARITY(GparityWilsonImplF);
HAND_SPECIALISE_GPARITY(GparityWilsonImplD);
HAND_SPECIALISE_GPARITY(GparityWilsonImplFH);
HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
////////////// Wilson ; uses this implementation /////////////////////
#define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(GparityWilsonImplF);
INSTANTIATE_THEM(GparityWilsonImplD);
INSTANTIATE_THEM(GparityWilsonImplFH);
INSTANTIATE_THEM(GparityWilsonImplDF);
}}
NAMESPACE_END(Grid);

View File

@ -26,12 +26,58 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
#undef LOAD_CHIMU
#undef LOAD_CHI
#undef MULT_2SPIN
#undef PERMUTE_DIR
#undef XP_PROJ
#undef YP_PROJ
#undef ZP_PROJ
#undef TP_PROJ
#undef XM_PROJ
#undef YM_PROJ
#undef ZM_PROJ
#undef TM_PROJ
#undef XP_RECON
#undef XP_RECON_ACCUM
#undef XM_RECON
#undef XM_RECON_ACCUM
#undef YP_RECON_ACCUM
#undef YM_RECON_ACCUM
#undef ZP_RECON_ACCUM
#undef ZM_RECON_ACCUM
#undef TP_RECON_ACCUM
#undef TM_RECON_ACCUM
#undef ZERO_RESULT
#undef Chimu_00
#undef Chimu_01
#undef Chimu_02
#undef Chimu_10
#undef Chimu_11
#undef Chimu_12
#undef Chimu_20
#undef Chimu_21
#undef Chimu_22
#undef Chimu_30
#undef Chimu_31
#undef Chimu_32
#undef HAND_STENCIL_LEG
#undef HAND_STENCIL_LEG_INT
#undef HAND_STENCIL_LEG_EXT
#undef HAND_RESULT
#undef HAND_RESULT_INT
#undef HAND_RESULT_EXT
#define REGISTER
#define LOAD_CHIMU \
{const SiteSpinor & ref (in._odata[offset]); \
{const SiteSpinor & ref (in[offset]); \
Chimu_00=ref()(0)(0);\
Chimu_01=ref()(0)(1);\
Chimu_02=ref()(0)(2);\
@ -56,7 +102,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
// To splat or not to splat depends on the implementation
#define MULT_2SPIN(A)\
{auto & ref(U._odata[sU](A)); \
{auto & ref(U[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
@ -355,7 +401,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define HAND_RESULT(ss) \
{ \
SiteSpinor & ref (out._odata[ss]); \
SiteSpinor & ref (out[ss]); \
vstream(ref()(0)(0),result_00); \
vstream(ref()(0)(1),result_01); \
vstream(ref()(0)(2),result_02); \
@ -372,7 +418,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define HAND_RESULT_EXT(ss) \
if (nmu){ \
SiteSpinor & ref (out._odata[ss]); \
SiteSpinor & ref (out[ss]); \
ref()(0)(0)+=result_00; \
ref()(0)(1)+=result_01; \
ref()(0)(2)+=result_02; \
@ -421,18 +467,18 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
Simd U_21;
#define ZERO_RESULT \
result_00=zero; \
result_01=zero; \
result_02=zero; \
result_10=zero; \
result_11=zero; \
result_12=zero; \
result_20=zero; \
result_21=zero; \
result_22=zero; \
result_30=zero; \
result_31=zero; \
result_32=zero;
result_00=Zero(); \
result_01=Zero(); \
result_02=Zero(); \
result_10=Zero(); \
result_11=Zero(); \
result_12=Zero(); \
result_20=Zero(); \
result_21=Zero(); \
result_22=Zero(); \
result_30=Zero(); \
result_31=Zero(); \
result_32=Zero();
#define Chimu_00 Chi_00
#define Chimu_01 Chi_01
@ -447,12 +493,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define Chimu_31 UChi_11
#define Chimu_32 UChi_12
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template<class Impl> void
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
@ -475,8 +520,8 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -498,8 +543,8 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
@ -522,8 +567,8 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -545,8 +590,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
@ -554,7 +599,7 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
int offset, ptype;
StencilEntry *SE;
int nmu=0;
ZERO_RESULT;
@ -570,8 +615,8 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -579,7 +624,7 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
int offset, ptype;
int nmu=0;
ZERO_RESULT;
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
@ -595,37 +640,45 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
////////////// Wilson ; uses this implementation /////////////////////
#define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(WilsonImplF);
INSTANTIATE_THEM(WilsonImplD);
INSTANTIATE_THEM(ZWilsonImplF);
INSTANTIATE_THEM(ZWilsonImplD);
INSTANTIATE_THEM(DomainWallVec5dImplF);
INSTANTIATE_THEM(DomainWallVec5dImplD);
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
INSTANTIATE_THEM(WilsonImplFH);
INSTANTIATE_THEM(WilsonImplDF);
INSTANTIATE_THEM(ZWilsonImplFH);
INSTANTIATE_THEM(ZWilsonImplDF);
INSTANTIATE_THEM(DomainWallVec5dImplFH);
INSTANTIATE_THEM(DomainWallVec5dImplDF);
INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplF);
INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplD);
}}
NAMESPACE_END(Grid);
#undef LOAD_CHIMU
#undef LOAD_CHI
#undef MULT_2SPIN
#undef PERMUTE_DIR
#undef XP_PROJ
#undef YP_PROJ
#undef ZP_PROJ
#undef TP_PROJ
#undef XM_PROJ
#undef YM_PROJ
#undef ZM_PROJ
#undef TM_PROJ
#undef XP_RECON
#undef XP_RECON_ACCUM
#undef XM_RECON
#undef XM_RECON_ACCUM
#undef YP_RECON_ACCUM
#undef YM_RECON_ACCUM
#undef ZP_RECON_ACCUM
#undef ZM_RECON_ACCUM
#undef TP_RECON_ACCUM
#undef TM_RECON_ACCUM
#undef ZERO_RESULT
#undef Chimu_00
#undef Chimu_01
#undef Chimu_02
#undef Chimu_10
#undef Chimu_11
#undef Chimu_12
#undef Chimu_20
#undef Chimu_21
#undef Chimu_22
#undef Chimu_30
#undef Chimu_31
#undef Chimu_32
#undef HAND_STENCIL_LEG
#undef HAND_STENCIL_LEG_INT
#undef HAND_STENCIL_LEG_EXT
#undef HAND_RESULT
#undef HAND_RESULT_INT
#undef HAND_RESULT_EXT

View File

@ -0,0 +1,551 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////
// Generic implementation; move to different file?
////////////////////////////////////////////
accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
{
#ifdef __CUDA_ARCH__
static_assert(sizeof(StencilEntry)==sizeof(uint4),"Unexpected Stencil Entry Size");
uint4 * mem_pun = (uint4 *)mem; // force 128 bit loads
uint4 * chip_pun = (uint4 *)&chip;
* chip_pun = * mem_pun;
#else
chip = *mem;
#endif
return;
}
#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
} else { \
chi = coalescedRead(buf[SE->_offset],lane); \
} \
synchronise(); \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi);
#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
} else if ( st.same_node[Dir] ) { \
chi = coalescedRead(buf[SE->_offset],lane); \
} \
synchronise(); \
if (SE->_is_local || st.same_node[Dir] ) { \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi); \
} \
synchronise();
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
auto chi = coalescedRead(buf[SE->_offset],lane); \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi); \
nmu++; \
} \
synchronise();
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
if (gamma == Dir) { \
if (SE->_is_local ) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
} else { \
chi = coalescedRead(buf[SE->_offset],lane); \
} \
synchronise(); \
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
Recon(result, Uchi); \
synchronise(); \
}
////////////////////////////////////////////////////////////////////
// All legs kernels ; comms then compute
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcHalfSpinor chi;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp);
GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm);
coalescedWrite(out[sF],result,lane);
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcHalfSpinor chi;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp);
GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm);
coalescedWrite(out[sF], result,lane);
};
////////////////////////////////////////////////////////////////////
// Interior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcHalfSpinor chi;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_INT(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_INT(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm);
coalescedWrite(out[sF], result,lane);
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
calcHalfSpinor chi;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
result=Zero();
GENERIC_STENCIL_LEG_INT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_INT(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_INT(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm);
coalescedWrite(out[sF], result,lane);
};
////////////////////////////////////////////////////////////////////
// Exterior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_EXT(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_EXT(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_EXT(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm);
if ( nmu ) {
auto out_t = coalescedRead(out[sF],lane);
out_t = out_t + result;
coalescedWrite(out[sF],out_t,lane);
}
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
// calcHalfSpinor *chi_p;
calcHalfSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_EXT(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_EXT(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_EXT(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm);
if ( nmu ) {
auto out_t = coalescedRead(out[sF],lane);
out_t = out_t + result;
coalescedWrite(out[sF],out_t,lane);
}
};
template <class Impl>
void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
{
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcHalfSpinor chi;
calcSpinor result;
calcHalfSpinor Uchi;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=SIMTlane(Nsimd);
SE = st.GetEntry(ptype, dir, sF);
if (gamma == Xp) {
if (SE->_is_local ) {
int perm= SE->_permute;
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);
spProjXp(chi,tmp);
} else {
chi = coalescedRead(buf[SE->_offset],lane);
}
Impl::multLink(Uchi, U[sU], chi, dir, SE, st);
spReconXp(result, Uchi);
}
GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp);
GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp);
GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp);
GENERIC_DHOPDIR_LEG(Xm,spProjXm,spReconXm);
GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm);
GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm);
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
coalescedWrite(out[sF], result,lane);
}
template <class Impl>
void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
{
assert(dirdisp<=7);
assert(dirdisp>=0);
auto U_v = U.View();
auto in_v = in.View();
auto out_v = out.View();
auto st_v = st.View();
accelerator_for(ss,Nsite,Simd::Nsimd(),{
for(int s=0;s<Ls;s++){
int sU=ss;
int sF = s+Ls*sU;
DhopDirK(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma);
}
});
}
#define KERNEL_CALLNB(A) \
const uint64_t NN = Nsite*Ls; \
accelerator_forNB( ss, NN, Simd::Nsimd(), { \
int sF = ss; \
int sU = ss/Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
});
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
#define ASM_CALL(A) \
thread_for( ss, Nsite, { \
int sU = ss; \
int sF = ss*Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \
});
template <class Impl>
void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
{
auto U_v = U.View();
auto in_v = in.View();
auto out_v = out.View();
auto st_v = st.View();
if( interior && exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;}
#endif
} else if( interior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;}
#endif
} else if( exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;}
#endif
}
assert(0 && " Kernel optimisation case not covered ");
}
template <class Impl>
void WilsonKernels<Impl>::DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
{
auto U_v = U.View();
auto in_v = in.View();
auto out_v = out.View();
auto st_v = st.View();
if( interior && exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDag); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDag); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;}
#endif
} else if( interior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
#endif
} else if( exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
#endif
}
assert(0 && " Kernel optimisation case not covered ");
}
/*******************************************************************************
* Conserved current utilities for Wilson fermions, for contracting propagators
* to make a conserved current sink or inserting the conserved current
* sequentially. Common to both 4D and 5D.
******************************************************************************/
// N.B. Functions below assume a -1/2 factor within U.
#define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr))
#define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr))
/*******************************************************************************
* Name: ContractConservedCurrentSiteFwd
* Operation: (1/2) * q2[x] * U(x) * (g[mu] - 1) * q1[x + mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in_1 shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLink(tmp, U[sU], q_in_1, mu);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
if (switch_sign) {
q_out -= result;
} else {
q_out += result;
}
}
/*******************************************************************************
* Name: ContractConservedCurrentSiteBwd
* Operation: (1/2) * q2[x + mu] * U^dag(x) * (g[mu] + 1) * q1[x]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in_2 shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLink(tmp, U[sU], q_in_1, mu + Nd);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
if (switch_sign) {
q_out += result;
} else {
q_out -= result;
}
}
/*******************************************************************************
* Name: SeqConservedCurrentSiteFwd
* Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vPredicate t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLink(result, U[sU], q_in, mu);
result = WilsonCurrentFwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign) {
q_out -= result;
} else {
q_out += result;
}
}
/*******************************************************************************
* Name: SeqConservedCurrentSiteFwd
* Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in shifted in -ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vPredicate t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLink(result, U[sU], q_in, mu + Nd);
result = WilsonCurrentBwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign) {
q_out += result;
} else {
q_out -= result;
}
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,97 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc
Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonTMFermion.h>
#pragma once
NAMESPACE_BEGIN(Grid);
/*
* BF sequence
*
void bfmbase<Float>::MooeeInv(Fermion_t psi,
Fermion_t chi,
int dag, int cb)
double m = this->mass;
double tm = this->twistedmass;
double mtil = 4.0+this->mass;
double sq = mtil*mtil + tm*tm;
double a = mtil/sq;
double b = -tm /sq;
if(dag) b=-b;
axpibg5x(chi,psi,a,b);
void bfmbase<Float>::Mooee(Fermion_t psi,
Fermion_t chi,
int dag,int cb)
double a = 4.0+this->mass;
double b = this->twistedmass;
if(dag) b=-b;
axpibg5x(chi,psi,a,b);
*/
template<class Impl>
void WilsonTMFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
RealD a = 4.0+this->mass;
RealD b = this->mu;
out.Checkerboard() = in.Checkerboard();
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
RealD a = 4.0+this->mass;
RealD b = -this->mu;
out.Checkerboard() = in.Checkerboard();
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
RealD m = this->mass;
RealD tm = this->mu;
RealD mtil = 4.0+m;
RealD sq = mtil*mtil+tm*tm;
RealD a = mtil/sq;
RealD b = -tm /sq;
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
RealD m = this->mass;
RealD tm = this->mu;
RealD mtil = 4.0+m;
RealD sq = mtil*mtil+tm*tm;
RealD a = mtil/sq;
RealD b = tm /sq;
axpibg5x(out,in,a,b);
}
NAMESPACE_END(Grid);

Some files were not shown because too many files have changed in this diff Show More