mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Merge branch 'develop' into feature/hirep
This commit is contained in:
		@@ -60,6 +60,12 @@ namespace QCD {
 | 
			
		||||
    static const int SpinIndex   = 1;
 | 
			
		||||
    static const int LorentzIndex= 0;
 | 
			
		||||
 | 
			
		||||
    // Also should make these a named enum type
 | 
			
		||||
    static const int DaggerNo=0;
 | 
			
		||||
    static const int DaggerYes=1;
 | 
			
		||||
    static const int InverseNo=0;
 | 
			
		||||
    static const int InverseYes=1;
 | 
			
		||||
 | 
			
		||||
    // Useful traits is this a spin index
 | 
			
		||||
    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
 | 
			
		||||
@@ -485,25 +491,25 @@ namespace QCD {
 | 
			
		||||
} // Grid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/utils/SpaceTimeGrid.h>
 | 
			
		||||
#include <Grid/qcd/spin/Dirac.h>
 | 
			
		||||
#include <Grid/qcd/spin/TwoSpinor.h>
 | 
			
		||||
#include <Grid/qcd/utils/LinalgUtils.h>
 | 
			
		||||
#include <Grid/qcd/utils/CovariantCshift.h>
 | 
			
		||||
	
 | 
			
		||||
#include <Grid/qcd/utils/SUn.h>
 | 
			
		||||
#include <Grid/qcd/utils/SUnAdjoint.h>
 | 
			
		||||
	
 | 
			
		||||
#include <Grid/qcd/representations/hmc_types.h>
 | 
			
		||||
	
 | 
			
		||||
#include <Grid/qcd/action/Actions.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/spin/Dirac.h>
 | 
			
		||||
#include <Grid/qcd/smearing/Smearing.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/spin/TwoSpinor.h>
 | 
			
		||||
#include <Grid/qcd/hmc/integrators/Integrator.h>
 | 
			
		||||
#include <Grid/qcd/hmc/integrators/Integrator_algorithm.h>
 | 
			
		||||
#include <Grid/qcd/hmc/HMC.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/utils/SpaceTimeGrid.h>
 | 
			
		||||
#include <qcd/utils/LinalgUtils.h>
 | 
			
		||||
#include <qcd/utils/CovariantCshift.h>
 | 
			
		||||
#include <qcd/utils/SUn.h>
 | 
			
		||||
#include <qcd/utils/SUnAdjoint.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/representations/hmc_types.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/action/Actions.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/smearing/Smearing.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/hmc/integrators/Integrator.h>
 | 
			
		||||
#include <qcd/hmc/integrators/Integrator_algorithm.h>
 | 
			
		||||
#include <qcd/hmc/HMC.h>
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -40,25 +40,25 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
// Abstract base interface
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
#include <qcd/action/ActionBase.h>
 | 
			
		||||
#include <qcd/action/ActionParams.h>
 | 
			
		||||
#include <Grid/qcd/action/ActionBase.h>
 | 
			
		||||
#include <Grid/qcd/action/ActionParams.h>
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
// Utility functions
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
#include <qcd/action/gauge/GaugeImpl.h>
 | 
			
		||||
#include <qcd/utils/WilsonLoops.h>
 | 
			
		||||
#include <Grid/qcd/action/gauge/GaugeImpl.h>
 | 
			
		||||
#include <Grid/qcd/utils/WilsonLoops.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/action/fermion/WilsonCompressor.h>     //used by all wilson type fermions
 | 
			
		||||
#include <qcd/action/fermion/FermionOperatorImpl.h>
 | 
			
		||||
#include <qcd/action/fermion/FermionOperator.h>
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernels.h>        //used by all wilson type fermions
 | 
			
		||||
#include <Grid/qcd/action/fermion/WilsonCompressor.h>     //used by all wilson type fermions
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionOperatorImpl.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionOperator.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/WilsonKernels.h>        //used by all wilson type fermions
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
// Gauge Actions
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
#include <qcd/action/gauge/WilsonGaugeAction.h>
 | 
			
		||||
#include <qcd/action/gauge/PlaqPlusRectangleAction.h>
 | 
			
		||||
#include <Grid/qcd/action/gauge/WilsonGaugeAction.h>
 | 
			
		||||
#include <Grid/qcd/action/gauge/PlaqPlusRectangleAction.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
@@ -107,7 +107,8 @@ typedef SymanzikGaugeAction<ConjugateGimplD>        ConjugateSymanzikGaugeAction
 | 
			
		||||
// for EVERY .cc file. This define centralises the list and restores global push of impl cases
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
#define FermOpTemplateInstantiate(A) \
 | 
			
		||||
 | 
			
		||||
#define FermOp4dVecTemplateInstantiate(A) \
 | 
			
		||||
  template class A<WilsonImplF>;		\
 | 
			
		||||
  template class A<WilsonImplD>;		\
 | 
			
		||||
  template class A<GparityWilsonImplF>;		\
 | 
			
		||||
@@ -117,35 +118,44 @@ typedef SymanzikGaugeAction<ConjugateGimplD>        ConjugateSymanzikGaugeAction
 | 
			
		||||
  template class A<WilsonAdjImplF>; \
 | 
			
		||||
  template class A<WilsonAdjImplD>; 
 | 
			
		||||
 | 
			
		||||
#define FermOp5dVecTemplateInstantiate(A) \
 | 
			
		||||
  template class A<DomainWallVec5dImplF>;	\
 | 
			
		||||
  template class A<DomainWallVec5dImplD>;	
 | 
			
		||||
 | 
			
		||||
#define FermOpTemplateInstantiate(A) \
 | 
			
		||||
 FermOp4dVecTemplateInstantiate(A) \
 | 
			
		||||
 FermOp5dVecTemplateInstantiate(A) 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define GparityFermOpTemplateInstantiate(A) 
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
// Fermion operators / actions
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
#include <qcd/action/fermion/WilsonFermion.h>       // 4d wilson like
 | 
			
		||||
#include <qcd/action/fermion/WilsonTMFermion.h>       // 4d wilson like
 | 
			
		||||
#include <qcd/action/fermion/WilsonFermion5D.h>     // 5d base used by all 5d overlap types
 | 
			
		||||
#include <Grid/qcd/action/fermion/WilsonFermion.h>       // 4d wilson like
 | 
			
		||||
#include <Grid/qcd/action/fermion/WilsonTMFermion.h>       // 4d wilson like
 | 
			
		||||
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>     // 5d base used by all 5d overlap types
 | 
			
		||||
 | 
			
		||||
//#include <qcd/action/fermion/CloverFermion.h>
 | 
			
		||||
//#include <Grid/qcd/action/fermion/CloverFermion.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/action/fermion/CayleyFermion5D.h>     // Cayley types
 | 
			
		||||
#include <qcd/action/fermion/DomainWallFermion.h>
 | 
			
		||||
#include <qcd/action/fermion/DomainWallFermion.h>
 | 
			
		||||
#include <qcd/action/fermion/MobiusFermion.h>
 | 
			
		||||
#include <qcd/action/fermion/ScaledShamirFermion.h>
 | 
			
		||||
#include <qcd/action/fermion/MobiusZolotarevFermion.h>
 | 
			
		||||
#include <qcd/action/fermion/ShamirZolotarevFermion.h>
 | 
			
		||||
#include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
 | 
			
		||||
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>     // Cayley types
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/action/fermion/ContinuedFractionFermion5D.h>               // Continued fraction
 | 
			
		||||
#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
 | 
			
		||||
#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>               // Continued fraction
 | 
			
		||||
#include <Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/action/fermion/PartialFractionFermion5D.h>                 // Partial fraction
 | 
			
		||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
 | 
			
		||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h>                 // Partial fraction
 | 
			
		||||
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// More maintainable to maintain the following typedef list centrally, as more "impl" targets
 | 
			
		||||
@@ -230,21 +240,21 @@ typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#include <qcd/action/fermion/g5HermitianLinop.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/g5HermitianLinop.h>
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////
 | 
			
		||||
// Pseudo fermion combinations for HMC
 | 
			
		||||
////////////////////////////////////////
 | 
			
		||||
#include <qcd/action/pseudofermion/EvenOddSchurDifferentiable.h>
 | 
			
		||||
#include <Grid/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/action/pseudofermion/TwoFlavour.h>
 | 
			
		||||
#include <qcd/action/pseudofermion/TwoFlavourRatio.h>
 | 
			
		||||
#include <qcd/action/pseudofermion/TwoFlavourEvenOdd.h>
 | 
			
		||||
#include <qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h>
 | 
			
		||||
#include <Grid/qcd/action/pseudofermion/TwoFlavour.h>
 | 
			
		||||
#include <Grid/qcd/action/pseudofermion/TwoFlavourRatio.h>
 | 
			
		||||
#include <Grid/qcd/action/pseudofermion/TwoFlavourEvenOdd.h>
 | 
			
		||||
#include <Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/action/pseudofermion/OneFlavourRational.h>
 | 
			
		||||
#include <qcd/action/pseudofermion/OneFlavourRationalRatio.h>
 | 
			
		||||
#include <qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
 | 
			
		||||
#include <qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
 | 
			
		||||
#include <Grid/qcd/action/pseudofermion/OneFlavourRational.h>
 | 
			
		||||
#include <Grid/qcd/action/pseudofermion/OneFlavourRationalRatio.h>
 | 
			
		||||
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
 | 
			
		||||
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -28,7 +28,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
@@ -45,486 +48,342 @@ namespace QCD {
 | 
			
		||||
		   FourDimGrid,
 | 
			
		||||
 	 	   FourDimRedBlackGrid,_M5,p),
 | 
			
		||||
   mass(_mass)
 | 
			
		||||
 {
 | 
			
		||||
 }
 | 
			
		||||
 { }
 | 
			
		||||
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::Meooe5D    (const FermionField &psi, FermionField &Din)
 | 
			
		||||
  {
 | 
			
		||||
    // Assemble Din
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	//	Din = bs psi[s] + cs[s] psi[s+1}
 | 
			
		||||
	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
 | 
			
		||||
	//      Din+= -mass*cs[s] psi[s+1}
 | 
			
		||||
	axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
 | 
			
		||||
	axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
template<class Impl>  
 | 
			
		||||
void CayleyFermion5D<Impl>::M5D   (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag (Ls,1.0);
 | 
			
		||||
  std::vector<RealD> upper(Ls,-1.0); upper[Ls-1]=mass;
 | 
			
		||||
  std::vector<RealD> lower(Ls,-1.0); lower[0]   =mass;
 | 
			
		||||
  M5D(psi,chi,chi,lower,diag,upper);
 | 
			
		||||
}
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::Meooe5D    (const FermionField &psi, FermionField &Din)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag = bs;
 | 
			
		||||
  std::vector<RealD> upper= cs;
 | 
			
		||||
  std::vector<RealD> lower= cs; 
 | 
			
		||||
  upper[Ls-1]=-mass*upper[Ls-1];
 | 
			
		||||
  lower[0]   =-mass*lower[0];
 | 
			
		||||
  M5D(psi,psi,Din,lower,diag,upper);
 | 
			
		||||
}
 | 
			
		||||
template<class Impl> void CayleyFermion5D<Impl>::Meo5D     (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag = beo;
 | 
			
		||||
  std::vector<RealD> upper(Ls);
 | 
			
		||||
  std::vector<RealD> lower(Ls);
 | 
			
		||||
  for(int i=0;i<Ls;i++) {
 | 
			
		||||
    upper[i]=-ceo[i];
 | 
			
		||||
    lower[i]=-ceo[i];
 | 
			
		||||
  }
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MeooeDag5D    (const FermionField &psi, FermionField &Din)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(Din,1.0,Din,-mass*cs[Ls-1],psi,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pplus (Din,bs[s],psi,-mass*cs[0],psi,s,0);
 | 
			
		||||
	axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  upper[Ls-1]=-mass*upper[Ls-1];
 | 
			
		||||
  lower[0]   =-mass*lower[0];
 | 
			
		||||
  M5D(psi,psi,chi,lower,diag,upper);
 | 
			
		||||
}
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::Mooee       (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag = bee;
 | 
			
		||||
  std::vector<RealD> upper(Ls);
 | 
			
		||||
  std::vector<RealD> lower(Ls);
 | 
			
		||||
  for(int i=0;i<Ls;i++) {
 | 
			
		||||
    upper[i]=-cee[i];
 | 
			
		||||
    lower[i]=-cee[i];
 | 
			
		||||
  }
 | 
			
		||||
  upper[Ls-1]=-mass*upper[Ls-1];
 | 
			
		||||
  lower[0]   =-mass*lower[0];
 | 
			
		||||
  M5D(psi,psi,chi,lower,diag,upper);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  // override multiply
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  RealD CayleyFermion5D<Impl>::M    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeDag    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag = bee;
 | 
			
		||||
  std::vector<RealD> upper(Ls);
 | 
			
		||||
  std::vector<RealD> lower(Ls);
 | 
			
		||||
 | 
			
		||||
    FermionField Din(psi._grid);
 | 
			
		||||
 | 
			
		||||
    // Assemble Din
 | 
			
		||||
    /*
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	//	Din = bs psi[s] + cs[s] psi[s+1}
 | 
			
		||||
	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
 | 
			
		||||
	//      Din+= -mass*cs[s] psi[s+1}
 | 
			
		||||
	axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
 | 
			
		||||
	axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    */
 | 
			
		||||
    Meooe5D(psi,Din);
 | 
			
		||||
 | 
			
		||||
    this->DW(Din,chi,DaggerNo);
 | 
			
		||||
    // ((b D_W + D_w hop terms +1) on s-diag
 | 
			
		||||
    axpby(chi,1.0,1.0,chi,psi); 
 | 
			
		||||
 | 
			
		||||
    // Call Mooee??
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ){
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) {
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,0);
 | 
			
		||||
	axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    return norm2(chi);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    // Under adjoint
 | 
			
		||||
    //D1+        D1- P-    ->   D1+^dag   P+ D2-^dag
 | 
			
		||||
    //D2- P+     D2+            P-D1-^dag D2+dag
 | 
			
		||||
 | 
			
		||||
    FermionField Din(psi._grid);
 | 
			
		||||
    // Apply Dw
 | 
			
		||||
    this->DW(psi,Din,DaggerYes); 
 | 
			
		||||
 | 
			
		||||
    MeooeDag5D(Din,chi);
 | 
			
		||||
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
 | 
			
		||||
      // Collect the terms in DW
 | 
			
		||||
      //	Chi = bs Din[s] + cs[s] Din[s+1}
 | 
			
		||||
      //    Chi+= -mass*cs[s] psi[s+1}
 | 
			
		||||
      /*
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,-mass*cs[Ls-1],Din,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pplus (chi,bs[s],Din,-mass*cs[0],Din,s,0);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
      */
 | 
			
		||||
 | 
			
		||||
      // FIXME just call MooeeDag??
 | 
			
		||||
 | 
			
		||||
      // Collect the terms indept of DW
 | 
			
		||||
      if ( s==0 ){
 | 
			
		||||
	axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) {
 | 
			
		||||
	axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,0);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pplus(chi,1.0,chi,-1.0,psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    // ((b D_W + D_w hop terms +1) on s-diag
 | 
			
		||||
    axpby (chi,1.0,1.0,chi,psi); 
 | 
			
		||||
    return norm2(chi);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // half checkerboard operations
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::Meooe       (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
 | 
			
		||||
    FermionField tmp(psi._grid);
 | 
			
		||||
  for (int s=0;s<Ls;s++){
 | 
			
		||||
    // Assemble the 5d matrix
 | 
			
		||||
    Meooe5D(psi,tmp); 
 | 
			
		||||
#if 0
 | 
			
		||||
    std::cout << "Meooe Test replacement norm2 tmp = " <<norm2(tmp)<<std::endl;
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	//	tmp = bs psi[s] + cs[s] psi[s+1}
 | 
			
		||||
	//      tmp+= -mass*cs[s] psi[s+1}
 | 
			
		||||
	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
 | 
			
		||||
	axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
 | 
			
		||||
	axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    std::cout << "Meooe Test replacement norm2 tmp old = " <<norm2(tmp)<<std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    // Apply 4d dslash
 | 
			
		||||
    if ( psi.checkerboard == Odd ) {
 | 
			
		||||
      this->DhopEO(tmp,chi,DaggerNo);
 | 
			
		||||
    if ( s==0 ) {
 | 
			
		||||
      upper[s] = -cee[s+1] ;
 | 
			
		||||
      lower[s] = mass*cee[Ls-1];
 | 
			
		||||
    } else if ( s==(Ls-1)) { 
 | 
			
		||||
      upper[s] = mass*cee[0];
 | 
			
		||||
      lower[s] = -cee[s-1];
 | 
			
		||||
    } else {
 | 
			
		||||
      this->DhopOE(tmp,chi,DaggerNo);
 | 
			
		||||
      upper[s]=-cee[s+1];
 | 
			
		||||
      lower[s]=-cee[s-1];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MeooeDag    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    FermionField tmp(psi._grid);
 | 
			
		||||
    // Apply 4d dslash
 | 
			
		||||
    if ( psi.checkerboard == Odd ) {
 | 
			
		||||
      this->DhopEO(psi,tmp,DaggerYes);
 | 
			
		||||
    } else {
 | 
			
		||||
      this->DhopOE(psi,tmp,DaggerYes);
 | 
			
		||||
    }
 | 
			
		||||
  M5Ddag(psi,psi,chi,lower,diag,upper);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
    MeooeDag5D(tmp,chi); 
 | 
			
		||||
#if 0
 | 
			
		||||
    std::cout << "Meooe Test replacement norm2 chi new = " <<norm2(chi)<<std::endl;
 | 
			
		||||
    // Assemble the 5d matrix
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	axpby_ssp_pplus(chi,beo[s],tmp,   -ceo[s+1]  ,tmp,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,   1.0,chi,mass*ceo[Ls-1],tmp,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pplus(chi,beo[s],tmp,mass*ceo[0],tmp,s,0);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,-ceo[s-1],tmp,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pplus(chi,beo[s],tmp,-ceo[s+1],tmp,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0   ,chi,-ceo[s-1],tmp,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    std::cout << "Meooe Test replacement norm2 chi old = " <<norm2(chi)<<std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag(Ls,1.0);
 | 
			
		||||
  std::vector<RealD> upper(Ls,-1.0);
 | 
			
		||||
  std::vector<RealD> lower(Ls,-1.0);
 | 
			
		||||
  upper[Ls-1]=-mass*upper[Ls-1];
 | 
			
		||||
  lower[0]   =-mass*lower[0];
 | 
			
		||||
  M5Ddag(psi,chi,chi,lower,diag,upper);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MeooeDag5D    (const FermionField &psi, FermionField &Din)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag =bs;
 | 
			
		||||
  std::vector<RealD> upper=cs;
 | 
			
		||||
  std::vector<RealD> lower=cs;
 | 
			
		||||
  upper[Ls-1]=-mass*upper[Ls-1];
 | 
			
		||||
  lower[0]   =-mass*lower[0];
 | 
			
		||||
  M5Ddag(psi,psi,Din,lower,diag,upper);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
RealD CayleyFermion5D<Impl>::M    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  
 | 
			
		||||
  FermionField Din(psi._grid);
 | 
			
		||||
  
 | 
			
		||||
  // Assemble Din
 | 
			
		||||
  Meooe5D(psi,Din);
 | 
			
		||||
  
 | 
			
		||||
  this->DW(Din,chi,DaggerNo);
 | 
			
		||||
  // ((b D_W + D_w hop terms +1) on s-diag
 | 
			
		||||
  axpby(chi,1.0,1.0,chi,psi); 
 | 
			
		||||
  
 | 
			
		||||
  M5D(psi,chi);
 | 
			
		||||
  return(norm2(chi));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  // Under adjoint
 | 
			
		||||
  //D1+        D1- P-    ->   D1+^dag   P+ D2-^dag
 | 
			
		||||
  //D2- P+     D2+            P-D1-^dag D2+dag
 | 
			
		||||
  
 | 
			
		||||
  FermionField Din(psi._grid);
 | 
			
		||||
  // Apply Dw
 | 
			
		||||
  this->DW(psi,Din,DaggerYes); 
 | 
			
		||||
  
 | 
			
		||||
  MeooeDag5D(Din,chi);
 | 
			
		||||
  
 | 
			
		||||
  M5Ddag(psi,chi);
 | 
			
		||||
  // ((b D_W + D_w hop terms +1) on s-diag
 | 
			
		||||
  axpby (chi,1.0,1.0,chi,psi); 
 | 
			
		||||
  return norm2(chi);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// half checkerboard operations
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::Meooe       (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  FermionField tmp(psi._grid);
 | 
			
		||||
 | 
			
		||||
  Meooe5D(psi,tmp); 
 | 
			
		||||
 | 
			
		||||
  if ( psi.checkerboard == Odd ) {
 | 
			
		||||
    this->DhopEO(tmp,chi,DaggerNo);
 | 
			
		||||
  } else {
 | 
			
		||||
    this->DhopOE(tmp,chi,DaggerNo);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::Mooee       (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    for (int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pplus (chi,1.0,chi,mass*cee[s],psi,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pminus(chi,bee[s],psi,mass*cee[s],psi,s,0);
 | 
			
		||||
	axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pminus(chi,bee[s],psi,-cee[s],psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MeooeDag    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  FermionField tmp(psi._grid);
 | 
			
		||||
  // Apply 4d dslash
 | 
			
		||||
  if ( psi.checkerboard == Odd ) {
 | 
			
		||||
    this->DhopEO(psi,tmp,DaggerYes);
 | 
			
		||||
  } else {
 | 
			
		||||
    this->DhopOE(psi,tmp,DaggerYes);
 | 
			
		||||
  }
 | 
			
		||||
  MeooeDag5D(tmp,chi); 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void  CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    FermionField tmp(psi._grid);
 | 
			
		||||
    // Assemble the 5d matrix
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	//	tmp = bs psi[s] + cs[s] psi[s+1}
 | 
			
		||||
	//      tmp+= -mass*cs[s] psi[s+1}
 | 
			
		||||
	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
 | 
			
		||||
	axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
 | 
			
		||||
	axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    // Apply 4d dslash fragment
 | 
			
		||||
    this->DhopDir(tmp,chi,dir,disp);
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void  CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
 | 
			
		||||
  FermionField tmp(psi._grid);
 | 
			
		||||
  Meo5D(psi,tmp);
 | 
			
		||||
  // Apply 4d dslash fragment
 | 
			
		||||
  this->DhopDir(tmp,chi,dir,disp);
 | 
			
		||||
}
 | 
			
		||||
// force terms; five routines; default to Dhop on diagonal
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
 | 
			
		||||
{
 | 
			
		||||
  FermionField Din(V._grid);
 | 
			
		||||
  
 | 
			
		||||
  if ( dag == DaggerNo ) {
 | 
			
		||||
    //      U d/du [D_w D5] V = U d/du DW D5 V
 | 
			
		||||
    Meooe5D(V,Din);
 | 
			
		||||
    this->DhopDeriv(mat,U,Din,dag);
 | 
			
		||||
  } else {
 | 
			
		||||
    //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
 | 
			
		||||
    Meooe5D(U,Din);
 | 
			
		||||
    this->DhopDeriv(mat,Din,V,dag);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MooeeDag    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    for (int s=0;s<Ls;s++){
 | 
			
		||||
      // Assemble the 5d matrix
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1]  ,psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,mass*cee[Ls-1],psi,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pplus(chi,bee[s],psi,mass*cee[0],psi,s,0);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,-cee[s-1],psi,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1],psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0   ,chi,-cee[s-1],psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    // Apply (L^{\prime})^{-1}
 | 
			
		||||
    axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0]
 | 
			
		||||
    for (int s=1;s<Ls;s++){
 | 
			
		||||
      axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1]
 | 
			
		||||
    }
 | 
			
		||||
    // L_m^{-1} 
 | 
			
		||||
    for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
      axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s);
 | 
			
		||||
    }
 | 
			
		||||
    // U_m^{-1} D^{-1}
 | 
			
		||||
    for (int s=0;s<Ls-1;s++){
 | 
			
		||||
      // Chi[s] + 1/d chi[s] 
 | 
			
		||||
      axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1);
 | 
			
		||||
    }	
 | 
			
		||||
    axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable 
 | 
			
		||||
    
 | 
			
		||||
    // Apply U^{-1}
 | 
			
		||||
    for (int s=Ls-2;s>=0;s--){
 | 
			
		||||
      axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1);  // chi[Ls]
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    // Apply (U^{\prime})^{-dagger}
 | 
			
		||||
    axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0]
 | 
			
		||||
    for (int s=1;s<Ls;s++){
 | 
			
		||||
      axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1);
 | 
			
		||||
    }
 | 
			
		||||
    // U_m^{-\dagger} 
 | 
			
		||||
    for (int s=0;s<Ls-1;s++){
 | 
			
		||||
      axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s);
 | 
			
		||||
    }
 | 
			
		||||
    // L_m^{-\dagger} D^{-dagger}
 | 
			
		||||
    for (int s=0;s<Ls-1;s++){
 | 
			
		||||
      axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1);
 | 
			
		||||
    }	
 | 
			
		||||
    axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable 
 | 
			
		||||
    
 | 
			
		||||
    // Apply L^{-dagger}
 | 
			
		||||
    for (int s=Ls-2;s>=0;s--){
 | 
			
		||||
      axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1);  // chi[Ls]
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // force terms; five routines; default to Dhop on diagonal
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
 | 
			
		||||
  {
 | 
			
		||||
    FermionField Din(V._grid);
 | 
			
		||||
 | 
			
		||||
    if ( dag == DaggerNo ) {
 | 
			
		||||
      //      U d/du [D_w D5] V = U d/du DW D5 V
 | 
			
		||||
      Meooe5D(V,Din);
 | 
			
		||||
      this->DhopDeriv(mat,U,Din,dag);
 | 
			
		||||
    } else {
 | 
			
		||||
      //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
 | 
			
		||||
      Meooe5D(U,Din);
 | 
			
		||||
      this->DhopDeriv(mat,Din,V,dag);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
 | 
			
		||||
  {
 | 
			
		||||
    FermionField Din(V._grid);
 | 
			
		||||
 | 
			
		||||
    if ( dag == DaggerNo ) {
 | 
			
		||||
      //      U d/du [D_w D5] V = U d/du DW D5 V
 | 
			
		||||
      Meooe5D(V,Din);
 | 
			
		||||
      this->DhopDerivOE(mat,U,Din,dag);
 | 
			
		||||
    } else {
 | 
			
		||||
      //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
 | 
			
		||||
};
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
 | 
			
		||||
{
 | 
			
		||||
  FermionField Din(V._grid);
 | 
			
		||||
  
 | 
			
		||||
  if ( dag == DaggerNo ) {
 | 
			
		||||
    //      U d/du [D_w D5] V = U d/du DW D5 V
 | 
			
		||||
    Meooe5D(V,Din);
 | 
			
		||||
    this->DhopDerivOE(mat,U,Din,dag);
 | 
			
		||||
  } else {
 | 
			
		||||
    //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
 | 
			
		||||
      Meooe5D(U,Din);
 | 
			
		||||
      this->DhopDerivOE(mat,Din,V,dag);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
 | 
			
		||||
  {
 | 
			
		||||
    FermionField Din(V._grid);
 | 
			
		||||
 | 
			
		||||
    if ( dag == DaggerNo ) {
 | 
			
		||||
      //      U d/du [D_w D5] V = U d/du DW D5 V
 | 
			
		||||
      Meooe5D(V,Din);
 | 
			
		||||
      this->DhopDerivEO(mat,U,Din,dag);
 | 
			
		||||
    } else {
 | 
			
		||||
      //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
 | 
			
		||||
      Meooe5D(U,Din);
 | 
			
		||||
      this->DhopDerivEO(mat,Din,V,dag);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
 | 
			
		||||
{
 | 
			
		||||
  FermionField Din(V._grid);
 | 
			
		||||
  
 | 
			
		||||
  // Tanh
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
 | 
			
		||||
  {
 | 
			
		||||
    SetCoefficientsZolotarev(1.0,zdata,b,c);
 | 
			
		||||
 | 
			
		||||
  if ( dag == DaggerNo ) {
 | 
			
		||||
    //      U d/du [D_w D5] V = U d/du DW D5 V
 | 
			
		||||
    Meooe5D(V,Din);
 | 
			
		||||
    this->DhopDerivEO(mat,U,Din,dag);
 | 
			
		||||
  } else {
 | 
			
		||||
    //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
 | 
			
		||||
    Meooe5D(U,Din);
 | 
			
		||||
    this->DhopDerivEO(mat,Din,V,dag);
 | 
			
		||||
  }
 | 
			
		||||
  //Zolo
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
};
 | 
			
		||||
  
 | 
			
		||||
// Tanh
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
 | 
			
		||||
{
 | 
			
		||||
  SetCoefficientsZolotarev(1.0,zdata,b,c);
 | 
			
		||||
}
 | 
			
		||||
//Zolo
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
    // The Cayley coeffs (unprec)
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
    omega.resize(Ls);
 | 
			
		||||
    bs.resize(Ls);
 | 
			
		||||
    cs.resize(Ls);
 | 
			
		||||
    as.resize(Ls);
 | 
			
		||||
  ///////////////////////////////////////////////////////////
 | 
			
		||||
  // The Cayley coeffs (unprec)
 | 
			
		||||
  ///////////////////////////////////////////////////////////
 | 
			
		||||
  omega.resize(Ls);
 | 
			
		||||
  bs.resize(Ls);
 | 
			
		||||
  cs.resize(Ls);
 | 
			
		||||
  as.resize(Ls);
 | 
			
		||||
  
 | 
			
		||||
  // 
 | 
			
		||||
  // Ts = (    [bs+cs]Dw        )^-1 (    (bs+cs) Dw         )
 | 
			
		||||
  //     -(g5  -------       -1 )    ( g5 ---------     + 1  )
 | 
			
		||||
  //      (   {2+(bs-cs)Dw}     )    (    2+(bs-cs) Dw       )
 | 
			
		||||
  //
 | 
			
		||||
  //  bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2(  1/omega(b+c) + (b-c) )
 | 
			
		||||
  //  cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2(  1/omega(b+c) - (b-c) )
 | 
			
		||||
  //
 | 
			
		||||
  // bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c)
 | 
			
		||||
  // bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c
 | 
			
		||||
  //
 | 
			
		||||
  // So 
 | 
			
		||||
  //
 | 
			
		||||
  // Ts = (    [b+c]Dw/omega_s    )^-1 (    (b+c) Dw /omega_s        )
 | 
			
		||||
  //     -(g5  -------         -1 )    ( g5 ---------           + 1  )
 | 
			
		||||
  //      (   {2+(b-c)Dw}         )    (    2+(b-c) Dw               )
 | 
			
		||||
  //
 | 
			
		||||
  // Ts = (    [b+c]Dw            )^-1 (    (b+c) Dw                 )
 | 
			
		||||
  //     -(g5  -------    -omega_s)    ( g5 ---------      + omega_s )
 | 
			
		||||
  //      (   {2+(b-c)Dw}         )    (    2+(b-c) Dw               )
 | 
			
		||||
  // 
 | 
			
		||||
    
 | 
			
		||||
    // 
 | 
			
		||||
    // Ts = (    [bs+cs]Dw        )^-1 (    (bs+cs) Dw         )
 | 
			
		||||
    //     -(g5  -------       -1 )    ( g5 ---------     + 1  )
 | 
			
		||||
    //      (   {2+(bs-cs)Dw}     )    (    2+(bs-cs) Dw       )
 | 
			
		||||
    //
 | 
			
		||||
    //  bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2(  1/omega(b+c) + (b-c) )
 | 
			
		||||
    //  cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2(  1/omega(b+c) - (b-c) )
 | 
			
		||||
    //
 | 
			
		||||
    // bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c)
 | 
			
		||||
    // bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c
 | 
			
		||||
    //
 | 
			
		||||
    // So 
 | 
			
		||||
    //
 | 
			
		||||
    // Ts = (    [b+c]Dw/omega_s    )^-1 (    (b+c) Dw /omega_s        )
 | 
			
		||||
    //     -(g5  -------         -1 )    ( g5 ---------           + 1  )
 | 
			
		||||
    //      (   {2+(b-c)Dw}         )    (    2+(b-c) Dw               )
 | 
			
		||||
    //
 | 
			
		||||
    // Ts = (    [b+c]Dw            )^-1 (    (b+c) Dw                 )
 | 
			
		||||
    //     -(g5  -------    -omega_s)    ( g5 ---------      + omega_s )
 | 
			
		||||
    //      (   {2+(b-c)Dw}         )    (    2+(b-c) Dw               )
 | 
			
		||||
    // 
 | 
			
		||||
  double bpc = b+c;
 | 
			
		||||
  double bmc = b-c;
 | 
			
		||||
  for(int i=0; i < Ls; i++){
 | 
			
		||||
    as[i] = 1.0;
 | 
			
		||||
    omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
 | 
			
		||||
    bs[i] = 0.5*(bpc/omega[i] + bmc);
 | 
			
		||||
    cs[i] = 0.5*(bpc/omega[i] - bmc);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
  // Constants for the preconditioned matrix Cayley form
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
  bee.resize(Ls);
 | 
			
		||||
  cee.resize(Ls);
 | 
			
		||||
  beo.resize(Ls);
 | 
			
		||||
  ceo.resize(Ls);
 | 
			
		||||
  
 | 
			
		||||
  for(int i=0;i<Ls;i++){
 | 
			
		||||
    bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
 | 
			
		||||
    cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
 | 
			
		||||
    beo[i]=as[i]*bs[i];
 | 
			
		||||
    ceo[i]=-as[i]*cs[i];
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  aee.resize(Ls);
 | 
			
		||||
  aeo.resize(Ls);
 | 
			
		||||
  for(int i=0;i<Ls;i++){
 | 
			
		||||
    aee[i]=cee[i];
 | 
			
		||||
    aeo[i]=ceo[i];
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  //////////////////////////////////////////
 | 
			
		||||
  // LDU decomposition of eeoo
 | 
			
		||||
  //////////////////////////////////////////
 | 
			
		||||
  dee.resize(Ls);
 | 
			
		||||
  lee.resize(Ls);
 | 
			
		||||
  leem.resize(Ls);
 | 
			
		||||
  uee.resize(Ls);
 | 
			
		||||
  ueem.resize(Ls);
 | 
			
		||||
  
 | 
			
		||||
  for(int i=0;i<Ls;i++){
 | 
			
		||||
    
 | 
			
		||||
    double bpc = b+c;
 | 
			
		||||
    double bmc = b-c;
 | 
			
		||||
    for(int i=0; i < Ls; i++){
 | 
			
		||||
      as[i] = 1.0;
 | 
			
		||||
      omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
 | 
			
		||||
      bs[i] = 0.5*(bpc/omega[i] + bmc);
 | 
			
		||||
      cs[i] = 0.5*(bpc/omega[i] - bmc);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////
 | 
			
		||||
    // Constants for the preconditioned matrix Cayley form
 | 
			
		||||
    ////////////////////////////////////////////////////////
 | 
			
		||||
    bee.resize(Ls);
 | 
			
		||||
    cee.resize(Ls);
 | 
			
		||||
    beo.resize(Ls);
 | 
			
		||||
    ceo.resize(Ls);
 | 
			
		||||
    dee[i] = bee[i];
 | 
			
		||||
    
 | 
			
		||||
    for(int i=0;i<Ls;i++){
 | 
			
		||||
      bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
 | 
			
		||||
      cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
 | 
			
		||||
      beo[i]=as[i]*bs[i];
 | 
			
		||||
      ceo[i]=-as[i]*cs[i];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    aee.resize(Ls);
 | 
			
		||||
    aeo.resize(Ls);
 | 
			
		||||
    for(int i=0;i<Ls;i++){
 | 
			
		||||
      aee[i]=cee[i];
 | 
			
		||||
      aeo[i]=ceo[i];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////
 | 
			
		||||
    // LDU decomposition of eeoo
 | 
			
		||||
    //////////////////////////////////////////
 | 
			
		||||
    dee.resize(Ls);
 | 
			
		||||
    lee.resize(Ls);
 | 
			
		||||
    leem.resize(Ls);
 | 
			
		||||
    uee.resize(Ls);
 | 
			
		||||
    ueem.resize(Ls);
 | 
			
		||||
    
 | 
			
		||||
    for(int i=0;i<Ls;i++){
 | 
			
		||||
    if ( i < Ls-1 ) {
 | 
			
		||||
      
 | 
			
		||||
      dee[i] = bee[i];
 | 
			
		||||
      lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
 | 
			
		||||
      
 | 
			
		||||
      if ( i < Ls-1 ) {
 | 
			
		||||
	
 | 
			
		||||
	lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
 | 
			
		||||
	    
 | 
			
		||||
	leem[i]=mass*cee[Ls-1]/bee[0];
 | 
			
		||||
	for(int j=0;j<i;j++)  leem[i]*= aee[j]/bee[j+1];
 | 
			
		||||
	
 | 
			
		||||
	uee[i] =-aee[i]/bee[i];   // up-diag entry on the ith row
 | 
			
		||||
	
 | 
			
		||||
	ueem[i]=mass;
 | 
			
		||||
	for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
 | 
			
		||||
	ueem[i]*= aee[0]/bee[0];
 | 
			
		||||
	    
 | 
			
		||||
      } else { 
 | 
			
		||||
	lee[i] =0.0;
 | 
			
		||||
	leem[i]=0.0;
 | 
			
		||||
	uee[i] =0.0;
 | 
			
		||||
	ueem[i]=0.0;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
	
 | 
			
		||||
    { 
 | 
			
		||||
      double delta_d=mass*cee[Ls-1];
 | 
			
		||||
      for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
 | 
			
		||||
      dee[Ls-1] += delta_d;
 | 
			
		||||
      leem[i]=mass*cee[Ls-1]/bee[0];
 | 
			
		||||
      for(int j=0;j<i;j++)  leem[i]*= aee[j]/bee[j+1];
 | 
			
		||||
      
 | 
			
		||||
      uee[i] =-aee[i]/bee[i];   // up-diag entry on the ith row
 | 
			
		||||
      
 | 
			
		||||
      ueem[i]=mass;
 | 
			
		||||
      for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
 | 
			
		||||
      ueem[i]*= aee[0]/bee[0];
 | 
			
		||||
      
 | 
			
		||||
    } else { 
 | 
			
		||||
      lee[i] =0.0;
 | 
			
		||||
      leem[i]=0.0;
 | 
			
		||||
      uee[i] =0.0;
 | 
			
		||||
      ueem[i]=0.0;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
	
 | 
			
		||||
  { 
 | 
			
		||||
    double delta_d=mass*cee[Ls-1];
 | 
			
		||||
    for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
 | 
			
		||||
    dee[Ls-1] += delta_d;
 | 
			
		||||
  }  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  FermOpTemplateInstantiate(CayleyFermion5D);
 | 
			
		||||
  GparityFermOpTemplateInstantiate(CayleyFermion5D);
 | 
			
		||||
 
 | 
			
		||||
@@ -51,6 +51,29 @@ namespace Grid {
 | 
			
		||||
      virtual void   MooeeDag    (const FermionField &in, FermionField &out);
 | 
			
		||||
      virtual void   MooeeInv    (const FermionField &in, FermionField &out);
 | 
			
		||||
      virtual void   MooeeInvDag (const FermionField &in, FermionField &out);
 | 
			
		||||
      virtual void   Meo5D (const FermionField &psi, FermionField &chi);
 | 
			
		||||
 | 
			
		||||
      virtual void   M5D   (const FermionField &psi, FermionField &chi);
 | 
			
		||||
      virtual void   M5Ddag(const FermionField &psi, FermionField &chi);
 | 
			
		||||
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // Instantiate different versions depending on Impl
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      void M5D(const FermionField &psi,
 | 
			
		||||
	       const FermionField &phi, 
 | 
			
		||||
	       FermionField &chi,
 | 
			
		||||
	       std::vector<RealD> &lower,
 | 
			
		||||
	       std::vector<RealD> &diag,
 | 
			
		||||
	       std::vector<RealD> &upper);
 | 
			
		||||
 | 
			
		||||
      void M5Ddag(const FermionField &psi,
 | 
			
		||||
		  const FermionField &phi, 
 | 
			
		||||
		  FermionField &chi,
 | 
			
		||||
		  std::vector<RealD> &lower,
 | 
			
		||||
		  std::vector<RealD> &diag,
 | 
			
		||||
		  std::vector<RealD> &upper);
 | 
			
		||||
      void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
 | 
			
		||||
 | 
			
		||||
      virtual void   Instantiatable(void)=0;
 | 
			
		||||
 | 
			
		||||
      // force terms; five routines; default to Dhop on diagonal
 | 
			
		||||
@@ -94,6 +117,8 @@ namespace Grid {
 | 
			
		||||
		      GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
		      RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    protected:
 | 
			
		||||
      void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
 | 
			
		||||
      void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
 | 
			
		||||
@@ -101,5 +126,15 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#define INSTANTIATE_DPERP(A)\
 | 
			
		||||
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
 | 
			
		||||
					std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \
 | 
			
		||||
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
 | 
			
		||||
					   std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \
 | 
			
		||||
template void CayleyFermion5D< A >::MooeeInv    (const FermionField &psi, FermionField &chi); \
 | 
			
		||||
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);
 | 
			
		||||
 | 
			
		||||
#define CAYLEY_DPERP_CACHE
 | 
			
		||||
#undef  CAYLEY_DPERP_LINALG
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										209
									
								
								lib/qcd/action/fermion/CayleyFermion5Dcache.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										209
									
								
								lib/qcd/action/fermion/CayleyFermion5Dcache.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,209 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  // FIXME -- make a version of these routines with site loop outermost for cache reuse.
 | 
			
		||||
 | 
			
		||||
  // Pminus fowards
 | 
			
		||||
  // Pplus  backwards..
 | 
			
		||||
template<class Impl>  
 | 
			
		||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
 | 
			
		||||
				const FermionField &phi, 
 | 
			
		||||
				FermionField &chi,
 | 
			
		||||
				std::vector<RealD> &lower,
 | 
			
		||||
				std::vector<RealD> &diag,
 | 
			
		||||
				std::vector<RealD> &upper)
 | 
			
		||||
{
 | 
			
		||||
  int Ls =this->Ls;
 | 
			
		||||
  GridBase *grid=psi._grid;
 | 
			
		||||
  assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      auto tmp = psi._odata[0];
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
 	                            spProj5m(tmp,psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
 | 
			
		||||
 | 
			
		||||
	                    spProj5p(tmp,psi._odata[ss+Ls-1]);
 | 
			
		||||
	chi[ss+s]=chi[ss+s]+lower[s]*tmp;
 | 
			
		||||
      } else if ( s==(Ls-1)) {
 | 
			
		||||
	                            spProj5m(tmp,psi._odata[ss+0]);
 | 
			
		||||
	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
 | 
			
		||||
 | 
			
		||||
 	                    spProj5p(tmp,psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s]=chi[ss+s]+lower[s]*tmp;
 | 
			
		||||
      } else { 
 | 
			
		||||
	                            spProj5m(tmp,psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
 | 
			
		||||
 | 
			
		||||
	                    spProj5p(tmp,psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s]=chi[ss+s]+lower[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>  
 | 
			
		||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
 | 
			
		||||
				   const FermionField &phi, 
 | 
			
		||||
				   FermionField &chi,
 | 
			
		||||
				   std::vector<RealD> &lower,
 | 
			
		||||
				   std::vector<RealD> &diag,
 | 
			
		||||
				   std::vector<RealD> &upper)
 | 
			
		||||
{
 | 
			
		||||
  int Ls =this->Ls;
 | 
			
		||||
  GridBase *grid=psi._grid;
 | 
			
		||||
  assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
 | 
			
		||||
    auto tmp = psi._odata[0];
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	spProj5p(tmp,psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
 | 
			
		||||
 | 
			
		||||
	spProj5m(tmp,psi._odata[ss+Ls-1]);
 | 
			
		||||
	chi[ss+s]=chi[ss+s]+lower[s]*tmp;
 | 
			
		||||
      } else if ( s==(Ls-1)) {
 | 
			
		||||
	spProj5p(tmp,psi._odata[ss+0]);
 | 
			
		||||
	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
 | 
			
		||||
 | 
			
		||||
	spProj5m(tmp,psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s]=chi[ss+s]+lower[s]*tmp;
 | 
			
		||||
      } else { 
 | 
			
		||||
	spProj5p(tmp,psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
 | 
			
		||||
 | 
			
		||||
	spProj5m(tmp,psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s]=chi[ss+s]+lower[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid=psi._grid;
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
 | 
			
		||||
    auto tmp = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
    // Apply (L^{\prime})^{-1}
 | 
			
		||||
    chi[ss]=psi[ss]; // chi[0]=psi[0]
 | 
			
		||||
    for(int s=1;s<Ls;s++){
 | 
			
		||||
                            spProj5p(tmp,chi[ss+s-1]);  
 | 
			
		||||
      chi[ss+s] = psi[ss+s]-lee[s-1]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
    // L_m^{-1} 
 | 
			
		||||
    for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
                                   spProj5m(tmp,chi[ss+s]);    
 | 
			
		||||
      chi[ss+Ls-1] = chi[ss+Ls-1] - leem[s]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
    // U_m^{-1} D^{-1}
 | 
			
		||||
    for (int s=0;s<Ls-1;s++){
 | 
			
		||||
      // Chi[s] + 1/d chi[s] 
 | 
			
		||||
                                                spProj5p(tmp,chi[ss+Ls-1]); 
 | 
			
		||||
      chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(ueem[s]/dee[Ls-1])*tmp;
 | 
			
		||||
    }	
 | 
			
		||||
    chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
      
 | 
			
		||||
    // Apply U^{-1}
 | 
			
		||||
    for (int s=Ls-2;s>=0;s--){
 | 
			
		||||
                            spProj5m(tmp,chi[ss+s+1]);  
 | 
			
		||||
      chi[ss+s] = chi[ss+s] - uee[s]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid=psi._grid;
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
 | 
			
		||||
  assert(psi.checkerboard == psi.checkerboard);
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
 | 
			
		||||
 | 
			
		||||
    auto tmp = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
    // Apply (U^{\prime})^{-dagger}
 | 
			
		||||
    chi[ss]=psi[ss];
 | 
			
		||||
    for (int s=1;s<Ls;s++){
 | 
			
		||||
                            spProj5m(tmp,chi[ss+s-1]);
 | 
			
		||||
      chi[ss+s] = psi[ss+s]-uee[s-1]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
    // U_m^{-\dagger} 
 | 
			
		||||
    for (int s=0;s<Ls-1;s++){
 | 
			
		||||
                                   spProj5p(tmp,chi[ss+s]);
 | 
			
		||||
      chi[ss+Ls-1] = chi[ss+Ls-1] - ueem[s]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // L_m^{-\dagger} D^{-dagger}
 | 
			
		||||
    for (int s=0;s<Ls-1;s++){
 | 
			
		||||
      spProj5m(tmp,chi[ss+Ls-1]);
 | 
			
		||||
      chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(leem[s]/dee[Ls-1])*tmp;
 | 
			
		||||
    }	
 | 
			
		||||
    chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
  
 | 
			
		||||
    // Apply L^{-dagger}
 | 
			
		||||
    for (int s=Ls-2;s>=0;s--){
 | 
			
		||||
      spProj5p(tmp,chi[ss+s+1]);
 | 
			
		||||
      chi[ss+s] = chi[ss+s] - lee[s]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CAYLEY_DPERP_CACHE
 | 
			
		||||
  INSTANTIATE_DPERP(WilsonImplF);
 | 
			
		||||
  INSTANTIATE_DPERP(WilsonImplD);
 | 
			
		||||
  INSTANTIATE_DPERP(GparityWilsonImplF);
 | 
			
		||||
  INSTANTIATE_DPERP(GparityWilsonImplD);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
							
								
								
									
										133
									
								
								lib/qcd/action/fermion/CayleyFermion5Ddense.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								lib/qcd/action/fermion/CayleyFermion5Ddense.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,133 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/Eigen/Dense>
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
  /*
 | 
			
		||||
   * Dense matrix versions of routines
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
 | 
			
		||||
}
 | 
			
		||||
  
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
 | 
			
		||||
}
 | 
			
		||||
  */
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  int LLs = psi._grid->_rdimensions[0];
 | 
			
		||||
  int vol = psi._grid->oSites()/LLs;
 | 
			
		||||
  
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
  
 | 
			
		||||
  assert(Ls==LLs);
 | 
			
		||||
  
 | 
			
		||||
  Eigen::MatrixXd Pplus  = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
  Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
  
 | 
			
		||||
  for(int s=0;s<Ls;s++){
 | 
			
		||||
    Pplus(s,s) = bee[s];
 | 
			
		||||
    Pminus(s,s)= bee[s];
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  for(int s=0;s<Ls-1;s++){
 | 
			
		||||
    Pminus(s,s+1) = -cee[s];
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  for(int s=0;s<Ls-1;s++){
 | 
			
		||||
    Pplus(s+1,s) = -cee[s+1];
 | 
			
		||||
  }
 | 
			
		||||
  Pplus (0,Ls-1) = mass*cee[0];
 | 
			
		||||
  Pminus(Ls-1,0) = mass*cee[Ls-1];
 | 
			
		||||
  
 | 
			
		||||
  Eigen::MatrixXd PplusMat ;
 | 
			
		||||
  Eigen::MatrixXd PminusMat;
 | 
			
		||||
  
 | 
			
		||||
  if ( inv ) {
 | 
			
		||||
    PplusMat =Pplus.inverse();
 | 
			
		||||
    PminusMat=Pminus.inverse();
 | 
			
		||||
  } else { 
 | 
			
		||||
    PplusMat =Pplus;
 | 
			
		||||
    PminusMat=Pminus;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if(dag){
 | 
			
		||||
    PplusMat.adjointInPlace();
 | 
			
		||||
    PminusMat.adjointInPlace();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // For the non-vectorised s-direction this is simple
 | 
			
		||||
  
 | 
			
		||||
  for(auto site=0;site<vol;site++){
 | 
			
		||||
    
 | 
			
		||||
    SiteSpinor     SiteChi;
 | 
			
		||||
    SiteHalfSpinor SitePplus;
 | 
			
		||||
    SiteHalfSpinor SitePminus;
 | 
			
		||||
    
 | 
			
		||||
    for(int s1=0;s1<Ls;s1++){
 | 
			
		||||
      SiteChi =zero;
 | 
			
		||||
      for(int s2=0;s2<Ls;s2++){
 | 
			
		||||
	int lex2 = s2+Ls*site;
 | 
			
		||||
	
 | 
			
		||||
	if ( PplusMat(s1,s2) != 0.0 ) {
 | 
			
		||||
	  spProj5p(SitePplus,psi[lex2]);
 | 
			
		||||
	  accumRecon5p(SiteChi,PplusMat (s1,s2)*SitePplus);
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	if ( PminusMat(s1,s2) != 0.0 ) {
 | 
			
		||||
	  spProj5m(SitePminus,psi[lex2]);
 | 
			
		||||
	  accumRecon5m(SiteChi,PminusMat(s1,s2)*SitePminus);
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
      chi[s1+Ls*site] = SiteChi*0.5;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template void CayleyFermion5D<GparityWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
 | 
			
		||||
template void CayleyFermion5D<GparityWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
 | 
			
		||||
template void CayleyFermion5D<WilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
 | 
			
		||||
template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
							
								
								
									
										149
									
								
								lib/qcd/action/fermion/CayleyFermion5Dssp.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										149
									
								
								lib/qcd/action/fermion/CayleyFermion5Dssp.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,149 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  // FIXME -- make a version of these routines with site loop outermost for cache reuse.
 | 
			
		||||
 | 
			
		||||
  // Pminus fowards
 | 
			
		||||
  // Pplus  backwards
 | 
			
		||||
template<class Impl>  
 | 
			
		||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
 | 
			
		||||
				const FermionField &phi, 
 | 
			
		||||
				FermionField &chi,
 | 
			
		||||
				std::vector<RealD> &lower,
 | 
			
		||||
				std::vector<RealD> &diag,
 | 
			
		||||
				std::vector<RealD> &upper)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  for(int s=0;s<Ls;s++){
 | 
			
		||||
    if ( s==0 ) {
 | 
			
		||||
      axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1);
 | 
			
		||||
      axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,Ls-1);
 | 
			
		||||
    } else if ( s==(Ls-1)) { 
 | 
			
		||||
      axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,0);
 | 
			
		||||
      axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,s-1);
 | 
			
		||||
    } else {
 | 
			
		||||
      axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1);
 | 
			
		||||
      axpby_ssp_pplus(chi,1.0,chi,lower[s],psi,s,s-1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class Impl>  
 | 
			
		||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
 | 
			
		||||
				   const FermionField &phi, 
 | 
			
		||||
				   FermionField &chi,
 | 
			
		||||
				   std::vector<RealD> &lower,
 | 
			
		||||
				   std::vector<RealD> &diag,
 | 
			
		||||
				   std::vector<RealD> &upper)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  for(int s=0;s<Ls;s++){
 | 
			
		||||
    if ( s==0 ) {
 | 
			
		||||
      axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1);
 | 
			
		||||
      axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,Ls-1);
 | 
			
		||||
    } else if ( s==(Ls-1)) { 
 | 
			
		||||
      axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,0);
 | 
			
		||||
      axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1);
 | 
			
		||||
    } else {
 | 
			
		||||
      axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1);
 | 
			
		||||
      axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  // Apply (L^{\prime})^{-1}
 | 
			
		||||
  axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0]
 | 
			
		||||
  for (int s=1;s<Ls;s++){
 | 
			
		||||
    axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1]
 | 
			
		||||
  }
 | 
			
		||||
  // L_m^{-1} 
 | 
			
		||||
  for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
    axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s);
 | 
			
		||||
  }
 | 
			
		||||
  // U_m^{-1} D^{-1}
 | 
			
		||||
  for (int s=0;s<Ls-1;s++){
 | 
			
		||||
    // Chi[s] + 1/d chi[s] 
 | 
			
		||||
    axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1);
 | 
			
		||||
  }	
 | 
			
		||||
  axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable 
 | 
			
		||||
  
 | 
			
		||||
  // Apply U^{-1}
 | 
			
		||||
  for (int s=Ls-2;s>=0;s--){
 | 
			
		||||
    axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1);  // chi[Ls]
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  // Apply (U^{\prime})^{-dagger}
 | 
			
		||||
  axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0]
 | 
			
		||||
  for (int s=1;s<Ls;s++){
 | 
			
		||||
    axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1);
 | 
			
		||||
  }
 | 
			
		||||
  // U_m^{-\dagger} 
 | 
			
		||||
  for (int s=0;s<Ls-1;s++){
 | 
			
		||||
    axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s);
 | 
			
		||||
  }
 | 
			
		||||
  // L_m^{-\dagger} D^{-dagger}
 | 
			
		||||
  for (int s=0;s<Ls-1;s++){
 | 
			
		||||
    axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1);
 | 
			
		||||
  }	
 | 
			
		||||
  axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable 
 | 
			
		||||
  
 | 
			
		||||
  // Apply L^{-dagger}
 | 
			
		||||
  for (int s=Ls-2;s>=0;s--){
 | 
			
		||||
    axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1);  // chi[Ls]
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef CAYLEY_DPERP_LINALG
 | 
			
		||||
  INSTANTIATE(WilsonImplF);
 | 
			
		||||
  INSTANTIATE(WilsonImplD);
 | 
			
		||||
  INSTANTIATE(GparityWilsonImplF);
 | 
			
		||||
  INSTANTIATE(GparityWilsonImplD);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										305
									
								
								lib/qcd/action/fermion/CayleyFermion5Dvec.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										305
									
								
								lib/qcd/action/fermion/CayleyFermion5Dvec.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,305 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/Eigen/Dense>
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
  /*
 | 
			
		||||
   * Dense matrix versions of routines
 | 
			
		||||
   */
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
 | 
			
		||||
}
 | 
			
		||||
  
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
 | 
			
		||||
}
 | 
			
		||||
template<class Impl>  
 | 
			
		||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
 | 
			
		||||
				const FermionField &phi, 
 | 
			
		||||
				FermionField &chi,
 | 
			
		||||
				std::vector<RealD> &lower,
 | 
			
		||||
				std::vector<RealD> &diag,
 | 
			
		||||
				std::vector<RealD> &upper)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid=psi._grid;
 | 
			
		||||
  int Ls   = this->Ls;
 | 
			
		||||
  int LLs  = grid->_rdimensions[0];
 | 
			
		||||
  int nsimd= Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
  Vector<iSinglet<Simd> > u(LLs);
 | 
			
		||||
  Vector<iSinglet<Simd> > l(LLs);
 | 
			
		||||
  Vector<iSinglet<Simd> > d(LLs);
 | 
			
		||||
 | 
			
		||||
  assert(Ls/LLs==nsimd);
 | 
			
		||||
  assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
  // just directly address via type pun
 | 
			
		||||
  typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
  scalar_type * u_p = (scalar_type *)&u[0];
 | 
			
		||||
  scalar_type * l_p = (scalar_type *)&l[0];
 | 
			
		||||
  scalar_type * d_p = (scalar_type *)&d[0];
 | 
			
		||||
 | 
			
		||||
  for(int o=0;o<LLs;o++){ // outer
 | 
			
		||||
  for(int i=0;i<nsimd;i++){ //inner
 | 
			
		||||
    int s  = o+i*LLs;
 | 
			
		||||
    int ss = o*nsimd+i;
 | 
			
		||||
    u_p[ss] = upper[s];
 | 
			
		||||
    l_p[ss] = lower[s];
 | 
			
		||||
    d_p[ss] = diag[s];
 | 
			
		||||
  }}
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
    alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
    alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
    alignas(64) SiteSpinor fp;
 | 
			
		||||
    alignas(64) SiteSpinor fm;
 | 
			
		||||
 | 
			
		||||
    for(int v=0;v<LLs;v++){
 | 
			
		||||
 | 
			
		||||
      int vp=(v+1)%LLs;
 | 
			
		||||
      int vm=(v+LLs-1)%LLs;
 | 
			
		||||
 | 
			
		||||
      spProj5m(hp,psi[ss+vp]);
 | 
			
		||||
      spProj5p(hm,psi[ss+vm]);
 | 
			
		||||
      
 | 
			
		||||
      if ( vp<=v ) rotate(hp,hp,1);
 | 
			
		||||
      if ( vm>=v ) rotate(hm,hm,nsimd-1);
 | 
			
		||||
 | 
			
		||||
      hp=hp*0.5;
 | 
			
		||||
      hm=hm*0.5;
 | 
			
		||||
      spRecon5m(fp,hp);
 | 
			
		||||
      spRecon5p(fm,hm);
 | 
			
		||||
 | 
			
		||||
      chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
 | 
			
		||||
      chi[ss+v] = chi[ss+v]     +l[v]*fm;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>  
 | 
			
		||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
 | 
			
		||||
				   const FermionField &phi, 
 | 
			
		||||
				   FermionField &chi,
 | 
			
		||||
				   std::vector<RealD> &lower,
 | 
			
		||||
				   std::vector<RealD> &diag,
 | 
			
		||||
				   std::vector<RealD> &upper)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid=psi._grid;
 | 
			
		||||
  int Ls   = this->Ls;
 | 
			
		||||
  int LLs  = grid->_rdimensions[0];
 | 
			
		||||
  int nsimd= Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
  Vector<iSinglet<Simd> > u(LLs);
 | 
			
		||||
  Vector<iSinglet<Simd> > l(LLs);
 | 
			
		||||
  Vector<iSinglet<Simd> > d(LLs);
 | 
			
		||||
 | 
			
		||||
  assert(Ls/LLs==nsimd);
 | 
			
		||||
  assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
  // just directly address via type pun
 | 
			
		||||
  typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
  scalar_type * u_p = (scalar_type *)&u[0];
 | 
			
		||||
  scalar_type * l_p = (scalar_type *)&l[0];
 | 
			
		||||
  scalar_type * d_p = (scalar_type *)&d[0];
 | 
			
		||||
 | 
			
		||||
  for(int o=0;o<LLs;o++){ // outer
 | 
			
		||||
  for(int i=0;i<nsimd;i++){ //inner
 | 
			
		||||
    int s  = o+i*LLs;
 | 
			
		||||
    int ss = o*nsimd+i;
 | 
			
		||||
    u_p[ss] = upper[s];
 | 
			
		||||
    l_p[ss] = lower[s];
 | 
			
		||||
    d_p[ss] = diag[s];
 | 
			
		||||
  }}
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
    alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
    alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
    alignas(64) SiteSpinor fp;
 | 
			
		||||
    alignas(64) SiteSpinor fm;
 | 
			
		||||
 | 
			
		||||
    for(int v=0;v<LLs;v++){
 | 
			
		||||
 | 
			
		||||
      int vp=(v+1)%LLs;
 | 
			
		||||
      int vm=(v+LLs-1)%LLs;
 | 
			
		||||
 | 
			
		||||
      spProj5p(hp,psi[ss+vp]);
 | 
			
		||||
      spProj5m(hm,psi[ss+vm]);
 | 
			
		||||
 | 
			
		||||
      if ( vp<=v ) rotate(hp,hp,1);
 | 
			
		||||
      if ( vm>=v ) rotate(hm,hm,nsimd-1);
 | 
			
		||||
      
 | 
			
		||||
      hp=hp*0.5;
 | 
			
		||||
      hm=hm*0.5;
 | 
			
		||||
      spRecon5p(fp,hp);
 | 
			
		||||
      spRecon5m(fm,hm);
 | 
			
		||||
 | 
			
		||||
      chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
 | 
			
		||||
      chi[ss+v] = chi[ss+v]     +l[v]*fm;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  int LLs = psi._grid->_rdimensions[0];
 | 
			
		||||
  int vol = psi._grid->oSites()/LLs;
 | 
			
		||||
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
  
 | 
			
		||||
  Eigen::MatrixXd Pplus  = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
  Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
  
 | 
			
		||||
  for(int s=0;s<Ls;s++){
 | 
			
		||||
    Pplus(s,s) = bee[s];
 | 
			
		||||
    Pminus(s,s)= bee[s];
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  for(int s=0;s<Ls-1;s++){
 | 
			
		||||
    Pminus(s,s+1) = -cee[s];
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  for(int s=0;s<Ls-1;s++){
 | 
			
		||||
    Pplus(s+1,s) = -cee[s+1];
 | 
			
		||||
  }
 | 
			
		||||
  Pplus (0,Ls-1) = mass*cee[0];
 | 
			
		||||
  Pminus(Ls-1,0) = mass*cee[Ls-1];
 | 
			
		||||
  
 | 
			
		||||
  Eigen::MatrixXd PplusMat ;
 | 
			
		||||
  Eigen::MatrixXd PminusMat;
 | 
			
		||||
  
 | 
			
		||||
  if ( inv ) {
 | 
			
		||||
    PplusMat =Pplus.inverse();
 | 
			
		||||
    PminusMat=Pminus.inverse();
 | 
			
		||||
  } else { 
 | 
			
		||||
    PplusMat =Pplus;
 | 
			
		||||
    PminusMat=Pminus;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if(dag){
 | 
			
		||||
    PplusMat.adjointInPlace();
 | 
			
		||||
    PminusMat.adjointInPlace();
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  typedef typename SiteHalfSpinor::scalar_type scalar_type;
 | 
			
		||||
  const int Nsimd=Simd::Nsimd();
 | 
			
		||||
  Vector<iSinglet<Simd> > Matp(Ls*LLs);
 | 
			
		||||
  Vector<iSinglet<Simd> > Matm(Ls*LLs);
 | 
			
		||||
 | 
			
		||||
  for(int s2=0;s2<Ls;s2++){
 | 
			
		||||
  for(int s1=0;s1<LLs;s1++){
 | 
			
		||||
    int istride = LLs;
 | 
			
		||||
    int ostride = 1;
 | 
			
		||||
      Simd Vp;
 | 
			
		||||
      Simd Vm;
 | 
			
		||||
      scalar_type *sp = (scalar_type *)&Vp;
 | 
			
		||||
      scalar_type *sm = (scalar_type *)&Vm;
 | 
			
		||||
      for(int l=0;l<Nsimd;l++){
 | 
			
		||||
	sp[l] = PplusMat (l*istride+s1*ostride ,s2);
 | 
			
		||||
	sm[l] = PminusMat(l*istride+s1*ostride,s2);
 | 
			
		||||
      }
 | 
			
		||||
      Matp[LLs*s2+s1] = Vp;
 | 
			
		||||
      Matm[LLs*s2+s1] = Vm;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // Dynamic allocate on stack to get per thread without serialised heap acces
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(auto site=0;site<vol;site++){
 | 
			
		||||
    
 | 
			
		||||
    //    SiteHalfSpinor *SitePplus =(SiteHalfSpinor *) alloca(LLs*sizeof(SiteHalfSpinor));
 | 
			
		||||
    //    SiteHalfSpinor *SitePminus=(SiteHalfSpinor *) alloca(LLs*sizeof(SiteHalfSpinor));
 | 
			
		||||
    //    SiteSpinor     *SiteChi   =(SiteSpinor *)     alloca(LLs*sizeof(SiteSpinor));
 | 
			
		||||
 | 
			
		||||
    Vector<SiteHalfSpinor> SitePplus(LLs);
 | 
			
		||||
    Vector<SiteHalfSpinor> SitePminus(LLs);
 | 
			
		||||
    Vector<SiteHalfSpinor> SiteChiP(LLs);
 | 
			
		||||
    Vector<SiteHalfSpinor> SiteChiM(LLs);
 | 
			
		||||
    Vector<SiteSpinor>     SiteChi(LLs);
 | 
			
		||||
 | 
			
		||||
    SiteHalfSpinor BcastP;
 | 
			
		||||
    SiteHalfSpinor BcastM;
 | 
			
		||||
 | 
			
		||||
    for(int s=0;s<LLs;s++){
 | 
			
		||||
      int lex = s+LLs*site;
 | 
			
		||||
      spProj5p(SitePplus[s] ,psi[lex]);
 | 
			
		||||
      spProj5m(SitePminus[s],psi[lex]);
 | 
			
		||||
      SiteChiP[s]=zero;
 | 
			
		||||
      SiteChiM[s]=zero;
 | 
			
		||||
    }
 | 
			
		||||
      
 | 
			
		||||
    int s=0;
 | 
			
		||||
    for(int  l=0; l<Simd::Nsimd();l++){ // simd lane
 | 
			
		||||
      for(int s2=0;s2<LLs;s2++){ // Column loop of right hand side
 | 
			
		||||
	vbroadcast(BcastP,SitePplus [s2],l);
 | 
			
		||||
	vbroadcast(BcastM,SitePminus[s2],l);
 | 
			
		||||
	for(int s1=0;s1<LLs;s1++){ // Column loop of reduction variables
 | 
			
		||||
	  SiteChiP[s1]=SiteChiP[s1]+Matp[LLs*s+s1]*BcastP;
 | 
			
		||||
	  SiteChiM[s1]=SiteChiM[s1]+Matm[LLs*s+s1]*BcastM;
 | 
			
		||||
	}
 | 
			
		||||
      s++;
 | 
			
		||||
    }}
 | 
			
		||||
 | 
			
		||||
    for(int s=0;s<LLs;s++){
 | 
			
		||||
      int lex = s+LLs*site;
 | 
			
		||||
      spRecon5p(SiteChi[s],SiteChiP[s]);
 | 
			
		||||
      accumRecon5m(SiteChi[s],SiteChiM[s]);
 | 
			
		||||
      chi[lex] = SiteChi[s]*0.5;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
INSTANTIATE_DPERP(DomainWallVec5dImplD);
 | 
			
		||||
INSTANTIATE_DPERP(DomainWallVec5dImplF);
 | 
			
		||||
 | 
			
		||||
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
 | 
			
		||||
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef  GRID_QCD_DOMAIN_WALL_FERMION_H
 | 
			
		||||
#define  GRID_QCD_DOMAIN_WALL_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -100,433 +100,440 @@ namespace Grid {
 | 
			
		||||
    typedef typename Impl::SiteSpinor               SiteSpinor;		\
 | 
			
		||||
    typedef typename Impl::SiteHalfSpinor       SiteHalfSpinor;		\
 | 
			
		||||
    typedef typename Impl::Compressor               Compressor;		\
 | 
			
		||||
    typedef typename Impl::StencilImpl             StencilImpl;	  \
 | 
			
		||||
    typedef typename Impl::ImplParams ImplParams;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    typedef typename Impl::StencilImpl             StencilImpl;		\
 | 
			
		||||
    typedef typename Impl::ImplParams ImplParams;			\
 | 
			
		||||
    typedef typename Impl::Coeff_t       Coeff_t;
 | 
			
		||||
    
 | 
			
		||||
#define INHERIT_IMPL_TYPES(Base) \
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Base)      \
 | 
			
		||||
  INHERIT_FIMPL_TYPES(Base)
 | 
			
		||||
    INHERIT_GIMPL_TYPES(Base)	 \
 | 
			
		||||
    INHERIT_FIMPL_TYPES(Base)
 | 
			
		||||
    
 | 
			
		||||
    ///////
 | 
			
		||||
    // Single flavour four spinors with colour index
 | 
			
		||||
    ///////
 | 
			
		||||
    template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
 | 
			
		||||
    class WilsonImpl
 | 
			
		||||
      : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
 | 
			
		||||
    public:
 | 
			
		||||
      static const int Dimension = Representation::Dimension;
 | 
			
		||||
      typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
 | 
			
		||||
      
 | 
			
		||||
      //Necessary?
 | 
			
		||||
      constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
 | 
			
		||||
 | 
			
		||||
///////
 | 
			
		||||
// Single flavour four spinors with colour index
 | 
			
		||||
///////
 | 
			
		||||
template <class S, class Representation = FundamentalRepresentation >
 | 
			
		||||
class WilsonImpl
 | 
			
		||||
    : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
 | 
			
		||||
 public:
 | 
			
		||||
  static const int Dimension = Representation::Dimension;
 | 
			
		||||
//  static const int Nrepresentation = Representation::Dimension;
 | 
			
		||||
  typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
 | 
			
		||||
  
 | 
			
		||||
  //Necessary?
 | 
			
		||||
  constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
 | 
			
		||||
      const bool LsVectorised=false;
 | 
			
		||||
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplHalfSpinor =
 | 
			
		||||
      iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplDoubledGaugeField =
 | 
			
		||||
      iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
 | 
			
		||||
 | 
			
		||||
  typedef iImplSpinor<Simd> SiteSpinor;
 | 
			
		||||
  typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
 | 
			
		||||
  typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
 | 
			
		||||
 | 
			
		||||
  typedef Lattice<SiteSpinor> FermionField;
 | 
			
		||||
  typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
 | 
			
		||||
  typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
  typedef WilsonImplParams ImplParams;
 | 
			
		||||
  typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
 | 
			
		||||
  ImplParams Params;
 | 
			
		||||
 | 
			
		||||
  WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
 | 
			
		||||
  bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
 | 
			
		||||
 | 
			
		||||
  inline void multLink(SiteHalfSpinor &phi,
 | 
			
		||||
		       const SiteDoubledGaugeField &U,
 | 
			
		||||
                       const SiteHalfSpinor &chi,
 | 
			
		||||
		       int mu,
 | 
			
		||||
		       StencilEntry *SE,
 | 
			
		||||
                       StencilImpl &St) {
 | 
			
		||||
    mult(&phi(), &U(mu), &chi());
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <class ref>
 | 
			
		||||
  inline void loadLinkElement(Simd ®,
 | 
			
		||||
			      ref &memory) {
 | 
			
		||||
    reg = memory;
 | 
			
		||||
  }
 | 
			
		||||
  inline void DoubleStore(GridBase *GaugeGrid,
 | 
			
		||||
			  DoubledGaugeField &Uds,
 | 
			
		||||
                          const GaugeField &Umu) {
 | 
			
		||||
    conformable(Uds._grid, GaugeGrid);
 | 
			
		||||
    conformable(Umu._grid, GaugeGrid);
 | 
			
		||||
    GaugeLinkField U(GaugeGrid);
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
      U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
			
		||||
      PokeIndex<LorentzIndex>(Uds, U, mu);
 | 
			
		||||
      U = adj(Cshift(U, mu, -1));
 | 
			
		||||
      PokeIndex<LorentzIndex>(Uds, U, mu + 4);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void InsertForce4D(GaugeField &mat,
 | 
			
		||||
			    FermionField &Btilde,
 | 
			
		||||
                            FermionField &A,
 | 
			
		||||
			    int mu) {
 | 
			
		||||
    GaugeLinkField link(mat._grid);
 | 
			
		||||
    link = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
 | 
			
		||||
    PokeIndex<LorentzIndex>(mat, link, mu);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void InsertForce5D(GaugeField &mat,
 | 
			
		||||
			    FermionField &Btilde,
 | 
			
		||||
                            FermionField Ã,
 | 
			
		||||
			    int mu) {
 | 
			
		||||
    int Ls = Btilde._grid->_fdimensions[0];
 | 
			
		||||
 | 
			
		||||
    GaugeLinkField tmp(mat._grid);
 | 
			
		||||
    tmp = zero;
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int sss = 0; sss < tmp._grid->oSites(); sss++) {
 | 
			
		||||
      int sU = sss;
 | 
			
		||||
      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
        int sF = s + Ls * sU;
 | 
			
		||||
        tmp[sU] = tmp[sU] + traceIndex<SpinIndex>(outerProduct(
 | 
			
		||||
                                Btilde[sF], Atilde[sF]));  // ordering here
 | 
			
		||||
      typedef _Coeff_t Coeff_t;
 | 
			
		||||
      
 | 
			
		||||
      INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
      
 | 
			
		||||
      template <typename vtype> using iImplSpinor            = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
 | 
			
		||||
      template <typename vtype> using iImplHalfSpinor        = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
 | 
			
		||||
      template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
 | 
			
		||||
      
 | 
			
		||||
      typedef iImplSpinor<Simd>            SiteSpinor;
 | 
			
		||||
      typedef iImplHalfSpinor<Simd>        SiteHalfSpinor;
 | 
			
		||||
      typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
 | 
			
		||||
      
 | 
			
		||||
      typedef Lattice<SiteSpinor>            FermionField;
 | 
			
		||||
      typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
      
 | 
			
		||||
      typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
      typedef WilsonImplParams ImplParams;
 | 
			
		||||
      typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
      
 | 
			
		||||
      ImplParams Params;
 | 
			
		||||
      
 | 
			
		||||
      WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
      
 | 
			
		||||
      bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
 | 
			
		||||
      
 | 
			
		||||
      inline void multLink(SiteHalfSpinor &phi,
 | 
			
		||||
			   const SiteDoubledGaugeField &U,
 | 
			
		||||
			   const SiteHalfSpinor &chi,
 | 
			
		||||
			   int mu,
 | 
			
		||||
			   StencilEntry *SE,
 | 
			
		||||
			   StencilImpl &St) {
 | 
			
		||||
	mult(&phi(), &U(mu), &chi());
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      template <class ref>
 | 
			
		||||
      inline void loadLinkElement(Simd ®,
 | 
			
		||||
				  ref &memory) {
 | 
			
		||||
	reg = memory;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      inline void DoubleStore(GridBase *GaugeGrid,
 | 
			
		||||
			      DoubledGaugeField &Uds,
 | 
			
		||||
			      const GaugeField &Umu) {
 | 
			
		||||
	conformable(Uds._grid, GaugeGrid);
 | 
			
		||||
	conformable(Umu._grid, GaugeGrid);
 | 
			
		||||
	GaugeLinkField U(GaugeGrid);
 | 
			
		||||
	for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
	  U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
			
		||||
	  PokeIndex<LorentzIndex>(Uds, U, mu);
 | 
			
		||||
	  U = adj(Cshift(U, mu, -1));
 | 
			
		||||
	  PokeIndex<LorentzIndex>(Uds, U, mu + 4);
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    PokeIndex<LorentzIndex>(mat, tmp, mu);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
///////
 | 
			
		||||
// Single flavour four spinors with colour index, 5d redblack
 | 
			
		||||
///////
 | 
			
		||||
template <class S, int Nrepresentation = Nc>
 | 
			
		||||
class DomainWallRedBlack5dImpl
 | 
			
		||||
    : public PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
 | 
			
		||||
 public:
 | 
			
		||||
  static const int Dimension = Nrepresentation;
 | 
			
		||||
      inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
 | 
			
		||||
	GaugeLinkField link(mat._grid);
 | 
			
		||||
	link = TraceIndex<SpinIndex>(outerProduct(Btilde,A)); 
 | 
			
		||||
	PokeIndex<LorentzIndex>(mat,link,mu);
 | 
			
		||||
      }   
 | 
			
		||||
      
 | 
			
		||||
      inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
 | 
			
		||||
	
 | 
			
		||||
	int Ls=Btilde._grid->_fdimensions[0];
 | 
			
		||||
	
 | 
			
		||||
	GaugeLinkField tmp(mat._grid);
 | 
			
		||||
	tmp = zero;
 | 
			
		||||
	PARALLEL_FOR_LOOP
 | 
			
		||||
	  for(int sss=0;sss<tmp._grid->oSites();sss++){
 | 
			
		||||
	    int sU=sss;
 | 
			
		||||
	    for(int s=0;s<Ls;s++){
 | 
			
		||||
	      int sF = s+Ls*sU;
 | 
			
		||||
	      tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	PokeIndex<LorentzIndex>(mat,tmp,mu);
 | 
			
		||||
	
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
  typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
 | 
			
		||||
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplHalfSpinor =
 | 
			
		||||
      iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplDoubledGaugeField =
 | 
			
		||||
      iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplGaugeField =
 | 
			
		||||
      iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
 | 
			
		||||
 | 
			
		||||
  typedef iImplSpinor<Simd> SiteSpinor;
 | 
			
		||||
  typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
 | 
			
		||||
  typedef Lattice<SiteSpinor> FermionField;
 | 
			
		||||
 | 
			
		||||
  // Make the doubled gauge field a *scalar*
 | 
			
		||||
  typedef iImplDoubledGaugeField<typename Simd::scalar_type>
 | 
			
		||||
    ///////
 | 
			
		||||
    // Single flavour four spinors with colour index, 5d redblack
 | 
			
		||||
    ///////
 | 
			
		||||
    template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
 | 
			
		||||
    class DomainWallVec5dImpl :  public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { 
 | 
			
		||||
    public:
 | 
			
		||||
      
 | 
			
		||||
      static const int Dimension = Nrepresentation;
 | 
			
		||||
      const bool LsVectorised=true;
 | 
			
		||||
      
 | 
			
		||||
      typedef _Coeff_t Coeff_t;      
 | 
			
		||||
      typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
 | 
			
		||||
      
 | 
			
		||||
      INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
      
 | 
			
		||||
      template <typename vtype> using iImplSpinor            = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
 | 
			
		||||
      template <typename vtype> using iImplHalfSpinor        = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
 | 
			
		||||
      template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
 | 
			
		||||
      template <typename vtype> using iImplGaugeField        = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
 | 
			
		||||
      template <typename vtype> using iImplGaugeLink         = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
 | 
			
		||||
      
 | 
			
		||||
      typedef iImplSpinor<Simd> SiteSpinor;
 | 
			
		||||
      typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
 | 
			
		||||
      typedef Lattice<SiteSpinor> FermionField;
 | 
			
		||||
      
 | 
			
		||||
      // Make the doubled gauge field a *scalar*
 | 
			
		||||
      typedef iImplDoubledGaugeField<typename Simd::scalar_type>
 | 
			
		||||
      SiteDoubledGaugeField;  // This is a scalar
 | 
			
		||||
  typedef iImplGaugeField<typename Simd::scalar_type>
 | 
			
		||||
      typedef iImplGaugeField<typename Simd::scalar_type>
 | 
			
		||||
      SiteScalarGaugeField;  // scalar
 | 
			
		||||
  typedef iImplGaugeLink<typename Simd::scalar_type>
 | 
			
		||||
      typedef iImplGaugeLink<typename Simd::scalar_type>
 | 
			
		||||
      SiteScalarGaugeLink;  // scalar
 | 
			
		||||
 | 
			
		||||
  typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
 | 
			
		||||
  typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
  typedef WilsonImplParams ImplParams;
 | 
			
		||||
  typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
 | 
			
		||||
  ImplParams Params;
 | 
			
		||||
 | 
			
		||||
  DomainWallRedBlack5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
 | 
			
		||||
  bool overlapCommsCompute(void) { return false; };
 | 
			
		||||
 | 
			
		||||
  template <class ref>
 | 
			
		||||
  inline void loadLinkElement(Simd ®, ref &memory) {
 | 
			
		||||
    vsplat(reg, memory);
 | 
			
		||||
  }
 | 
			
		||||
  inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
 | 
			
		||||
                       const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
                       StencilImpl &St) {
 | 
			
		||||
    SiteGaugeLink UU;
 | 
			
		||||
    for (int i = 0; i < Nrepresentation; i++) {
 | 
			
		||||
      for (int j = 0; j < Nrepresentation; j++) {
 | 
			
		||||
        vsplat(UU()()(i, j), U(mu)()(i, j));
 | 
			
		||||
      
 | 
			
		||||
      typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
      
 | 
			
		||||
      typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
      typedef WilsonImplParams ImplParams;
 | 
			
		||||
      typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
      
 | 
			
		||||
      ImplParams Params;
 | 
			
		||||
      
 | 
			
		||||
      DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
      
 | 
			
		||||
      bool overlapCommsCompute(void) { return false; };
 | 
			
		||||
      
 | 
			
		||||
      template <class ref>
 | 
			
		||||
      inline void loadLinkElement(Simd ®, ref &memory) {
 | 
			
		||||
	vsplat(reg, memory);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    mult(&phi(), &UU(), &chi());
 | 
			
		||||
  }
 | 
			
		||||
      inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
 | 
			
		||||
			   const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
			   StencilImpl &St) {
 | 
			
		||||
	SiteGaugeLink UU;
 | 
			
		||||
	for (int i = 0; i < Nrepresentation; i++) {
 | 
			
		||||
	  for (int j = 0; j < Nrepresentation; j++) {
 | 
			
		||||
	    vsplat(UU()()(i, j), U(mu)()(i, j));
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
	mult(&phi(), &UU(), &chi());
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,
 | 
			
		||||
			      const GaugeField &Umu) {
 | 
			
		||||
	SiteScalarGaugeField ScalarUmu;
 | 
			
		||||
	SiteDoubledGaugeField ScalarUds;
 | 
			
		||||
	
 | 
			
		||||
	GaugeLinkField U(Umu._grid);
 | 
			
		||||
	GaugeField Uadj(Umu._grid);
 | 
			
		||||
	for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
	  U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
			
		||||
	  U = adj(Cshift(U, mu, -1));
 | 
			
		||||
	  PokeIndex<LorentzIndex>(Uadj, U, mu);
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
 | 
			
		||||
	  std::vector<int> lcoor;
 | 
			
		||||
	  GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
 | 
			
		||||
	  
 | 
			
		||||
	  peekLocalSite(ScalarUmu, Umu, lcoor);
 | 
			
		||||
	  for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
 | 
			
		||||
	  
 | 
			
		||||
	  peekLocalSite(ScalarUmu, Uadj, lcoor);
 | 
			
		||||
	  for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
 | 
			
		||||
	  
 | 
			
		||||
	  pokeLocalSite(ScalarUds, Uds, lcoor);
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
				FermionField &A, int mu) {
 | 
			
		||||
	assert(0);
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
				FermionField Ã, int mu) {
 | 
			
		||||
	assert(0);
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Flavour doubled spinors; is Gparity the only? what about C*?
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    
 | 
			
		||||
    template <class S, int Nrepresentation,class _Coeff_t = RealD>
 | 
			
		||||
    class GparityWilsonImpl
 | 
			
		||||
      : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
 | 
			
		||||
    public:
 | 
			
		||||
      static const int Dimension = Nrepresentation;
 | 
			
		||||
 | 
			
		||||
  inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,
 | 
			
		||||
                          const GaugeField &Umu) {
 | 
			
		||||
    SiteScalarGaugeField ScalarUmu;
 | 
			
		||||
    SiteDoubledGaugeField ScalarUds;
 | 
			
		||||
 | 
			
		||||
    GaugeLinkField U(Umu._grid);
 | 
			
		||||
    GaugeField Uadj(Umu._grid);
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
      U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
			
		||||
      U = adj(Cshift(U, mu, -1));
 | 
			
		||||
      PokeIndex<LorentzIndex>(Uadj, U, mu);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
 | 
			
		||||
      std::vector<int> lcoor;
 | 
			
		||||
      GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
 | 
			
		||||
 | 
			
		||||
      peekLocalSite(ScalarUmu, Umu, lcoor);
 | 
			
		||||
      for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
 | 
			
		||||
 | 
			
		||||
      peekLocalSite(ScalarUmu, Uadj, lcoor);
 | 
			
		||||
      for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
 | 
			
		||||
 | 
			
		||||
      pokeLocalSite(ScalarUds, Uds, lcoor);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
                            FermionField &A, int mu) {
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
                            FermionField Ã, int mu) {
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Flavour doubled spinors; is Gparity the only? what about C*?
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template <class S, int Nrepresentation>
 | 
			
		||||
class GparityWilsonImpl
 | 
			
		||||
    : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
 | 
			
		||||
 public:
 | 
			
		||||
  static const int Dimension = Nrepresentation;
 | 
			
		||||
  typedef ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
 | 
			
		||||
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplSpinor =
 | 
			
		||||
      const bool LsVectorised=false;
 | 
			
		||||
      
 | 
			
		||||
      typedef _Coeff_t Coeff_t;
 | 
			
		||||
      typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
 | 
			
		||||
      
 | 
			
		||||
      INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
      
 | 
			
		||||
      template <typename vtype>
 | 
			
		||||
      using iImplSpinor =
 | 
			
		||||
      iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplHalfSpinor =
 | 
			
		||||
      iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplDoubledGaugeField =
 | 
			
		||||
      iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
 | 
			
		||||
      template <typename vtype>
 | 
			
		||||
      using iImplHalfSpinor =
 | 
			
		||||
	iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
 | 
			
		||||
      template <typename vtype>
 | 
			
		||||
      using iImplDoubledGaugeField =
 | 
			
		||||
	iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
 | 
			
		||||
      
 | 
			
		||||
      typedef iImplSpinor<Simd> SiteSpinor;
 | 
			
		||||
      typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
 | 
			
		||||
      typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
 | 
			
		||||
      
 | 
			
		||||
      typedef Lattice<SiteSpinor> FermionField;
 | 
			
		||||
      typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
      
 | 
			
		||||
      typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
      typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
 | 
			
		||||
  typedef iImplSpinor<Simd> SiteSpinor;
 | 
			
		||||
  typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
 | 
			
		||||
  typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
 | 
			
		||||
      typedef GparityWilsonImplParams ImplParams;
 | 
			
		||||
      
 | 
			
		||||
      ImplParams Params;
 | 
			
		||||
 | 
			
		||||
  typedef Lattice<SiteSpinor> FermionField;
 | 
			
		||||
  typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
 | 
			
		||||
  typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
  typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
      GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
 | 
			
		||||
  typedef GparityWilsonImplParams ImplParams;
 | 
			
		||||
      bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
 | 
			
		||||
 | 
			
		||||
  ImplParams Params;
 | 
			
		||||
      // provide the multiply by link that is differentiated between Gparity (with
 | 
			
		||||
      // flavour index) and non-Gparity
 | 
			
		||||
      inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
 | 
			
		||||
			   const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
			   StencilImpl &St) {
 | 
			
		||||
	typedef SiteHalfSpinor vobj;
 | 
			
		||||
	typedef typename SiteHalfSpinor::scalar_object sobj;
 | 
			
		||||
	
 | 
			
		||||
	vobj vtmp;
 | 
			
		||||
	sobj stmp;
 | 
			
		||||
	
 | 
			
		||||
	GridBase *grid = St._grid;
 | 
			
		||||
	
 | 
			
		||||
	const int Nsimd = grid->Nsimd();
 | 
			
		||||
	
 | 
			
		||||
	int direction = St._directions[mu];
 | 
			
		||||
	int distance = St._distances[mu];
 | 
			
		||||
	int ptype = St._permute_type[mu];
 | 
			
		||||
	int sl = St._grid->_simd_layout[direction];
 | 
			
		||||
	
 | 
			
		||||
	// Fixme X.Y.Z.T hardcode in stencil
 | 
			
		||||
	int mmu = mu % Nd;
 | 
			
		||||
	
 | 
			
		||||
	// assert our assumptions
 | 
			
		||||
	assert((distance == 1) || (distance == -1));  // nearest neighbour stencil hard code
 | 
			
		||||
	assert((sl == 1) || (sl == 2));
 | 
			
		||||
	
 | 
			
		||||
	std::vector<int> icoor;
 | 
			
		||||
	
 | 
			
		||||
	if ( SE->_around_the_world && Params.twists[mmu] ) {
 | 
			
		||||
 | 
			
		||||
  GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
	  if ( sl == 2 ) {
 | 
			
		||||
 | 
			
		||||
  bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
 | 
			
		||||
	    std::vector<sobj> vals(Nsimd);
 | 
			
		||||
 | 
			
		||||
  // provide the multiply by link that is differentiated between Gparity (with
 | 
			
		||||
  // flavour index) and non-Gparity
 | 
			
		||||
  inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
 | 
			
		||||
                       const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
                       StencilImpl &St) {
 | 
			
		||||
    typedef SiteHalfSpinor vobj;
 | 
			
		||||
    typedef typename SiteHalfSpinor::scalar_object sobj;
 | 
			
		||||
	    extract(chi,vals);
 | 
			
		||||
	    for(int s=0;s<Nsimd;s++){
 | 
			
		||||
 | 
			
		||||
    vobj vtmp;
 | 
			
		||||
    sobj stmp;
 | 
			
		||||
	      grid->iCoorFromIindex(icoor,s);
 | 
			
		||||
	      
 | 
			
		||||
	      assert((icoor[direction]==0)||(icoor[direction]==1));
 | 
			
		||||
	      
 | 
			
		||||
	      int permute_lane;
 | 
			
		||||
	      if ( distance == 1) {
 | 
			
		||||
		permute_lane = icoor[direction]?1:0;
 | 
			
		||||
	      } else {
 | 
			
		||||
		permute_lane = icoor[direction]?0:1;
 | 
			
		||||
	      }
 | 
			
		||||
	      
 | 
			
		||||
	      if ( permute_lane ) { 
 | 
			
		||||
		stmp(0) = vals[s](1);
 | 
			
		||||
		stmp(1) = vals[s](0);
 | 
			
		||||
		vals[s] = stmp;
 | 
			
		||||
	      }
 | 
			
		||||
	    }
 | 
			
		||||
	    merge(vtmp,vals);
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = St._grid;
 | 
			
		||||
 | 
			
		||||
    const int Nsimd = grid->Nsimd();
 | 
			
		||||
 | 
			
		||||
    int direction = St._directions[mu];
 | 
			
		||||
    int distance = St._distances[mu];
 | 
			
		||||
    int ptype = St._permute_type[mu];
 | 
			
		||||
    int sl = St._grid->_simd_layout[direction];
 | 
			
		||||
 | 
			
		||||
    // Fixme X.Y.Z.T hardcode in stencil
 | 
			
		||||
    int mmu = mu % Nd;
 | 
			
		||||
 | 
			
		||||
    // assert our assumptions
 | 
			
		||||
    assert((distance == 1) ||
 | 
			
		||||
           (distance == -1));  // nearest neighbour stencil hard code
 | 
			
		||||
    assert((sl == 1) || (sl == 2));
 | 
			
		||||
 | 
			
		||||
    std::vector<int> icoor;
 | 
			
		||||
 | 
			
		||||
    if (SE->_around_the_world && Params.twists[mmu]) {
 | 
			
		||||
      if (sl == 2) {
 | 
			
		||||
        std::vector<sobj> vals(Nsimd);
 | 
			
		||||
 | 
			
		||||
        extract(chi, vals);
 | 
			
		||||
        for (int s = 0; s < Nsimd; s++) {
 | 
			
		||||
          grid->iCoorFromIindex(icoor, s);
 | 
			
		||||
 | 
			
		||||
          assert((icoor[direction] == 0) || (icoor[direction] == 1));
 | 
			
		||||
 | 
			
		||||
          int permute_lane;
 | 
			
		||||
          if (distance == 1) {
 | 
			
		||||
            permute_lane = icoor[direction] ? 1 : 0;
 | 
			
		||||
          } else {
 | 
			
		||||
            permute_lane = icoor[direction] ? 0 : 1;
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if (permute_lane) {
 | 
			
		||||
            stmp(0) = vals[s](1);
 | 
			
		||||
            stmp(1) = vals[s](0);
 | 
			
		||||
            vals[s] = stmp;
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        merge(vtmp, vals);
 | 
			
		||||
 | 
			
		||||
      } else {
 | 
			
		||||
        vtmp(0) = chi(1);
 | 
			
		||||
        vtmp(1) = chi(0);
 | 
			
		||||
      }
 | 
			
		||||
      mult(&phi(0), &U(0)(mu), &vtmp(0));
 | 
			
		||||
      mult(&phi(1), &U(1)(mu), &vtmp(1));
 | 
			
		||||
 | 
			
		||||
    } else {
 | 
			
		||||
      mult(&phi(0), &U(0)(mu), &chi(0));
 | 
			
		||||
      mult(&phi(1), &U(1)(mu), &chi(1));
 | 
			
		||||
    }
 | 
			
		||||
	  } else { 
 | 
			
		||||
	    vtmp(0) = chi(1);
 | 
			
		||||
	    vtmp(1) = chi(0);
 | 
			
		||||
	  }
 | 
			
		||||
	  mult(&phi(0),&U(0)(mu),&vtmp(0));
 | 
			
		||||
	  mult(&phi(1),&U(1)(mu),&vtmp(1));
 | 
			
		||||
	  
 | 
			
		||||
	} else { 
 | 
			
		||||
	  mult(&phi(0),&U(0)(mu),&chi(0));
 | 
			
		||||
	  mult(&phi(1),&U(1)(mu),&chi(1));
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,
 | 
			
		||||
                          const GaugeField &Umu) {
 | 
			
		||||
    conformable(Uds._grid, GaugeGrid);
 | 
			
		||||
    conformable(Umu._grid, GaugeGrid);
 | 
			
		||||
 | 
			
		||||
    GaugeLinkField Utmp(GaugeGrid);
 | 
			
		||||
    GaugeLinkField U(GaugeGrid);
 | 
			
		||||
    GaugeLinkField Uconj(GaugeGrid);
 | 
			
		||||
 | 
			
		||||
    Lattice<iScalar<vInteger> > coor(GaugeGrid);
 | 
			
		||||
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
      LatticeCoordinate(coor, mu);
 | 
			
		||||
 | 
			
		||||
      U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
			
		||||
      Uconj = conjugate(U);
 | 
			
		||||
 | 
			
		||||
      // This phase could come from a simple bc 1,1,-1,1 ..
 | 
			
		||||
      int neglink = GaugeGrid->GlobalDimensions()[mu] - 1;
 | 
			
		||||
      if (Params.twists[mu]) {
 | 
			
		||||
        Uconj = where(coor == neglink, -Uconj, Uconj);
 | 
			
		||||
      inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
 | 
			
		||||
      {
 | 
			
		||||
	
 | 
			
		||||
	conformable(Uds._grid,GaugeGrid);
 | 
			
		||||
	conformable(Umu._grid,GaugeGrid);
 | 
			
		||||
	
 | 
			
		||||
	GaugeLinkField Utmp (GaugeGrid);
 | 
			
		||||
	GaugeLinkField U    (GaugeGrid);
 | 
			
		||||
	GaugeLinkField Uconj(GaugeGrid);
 | 
			
		||||
	
 | 
			
		||||
	Lattice<iScalar<vInteger> > coor(GaugeGrid);
 | 
			
		||||
	
 | 
			
		||||
	
 | 
			
		||||
	for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
	  
 | 
			
		||||
	  LatticeCoordinate(coor,mu);
 | 
			
		||||
	  
 | 
			
		||||
	  U     = PeekIndex<LorentzIndex>(Umu,mu);
 | 
			
		||||
	  Uconj = conjugate(U);
 | 
			
		||||
	  
 | 
			
		||||
	  // This phase could come from a simple bc 1,1,-1,1 ..
 | 
			
		||||
	  int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
 | 
			
		||||
	  if ( Params.twists[mu] ) { 
 | 
			
		||||
	    Uconj = where(coor==neglink,-Uconj,Uconj);
 | 
			
		||||
	  }
 | 
			
		||||
	  
 | 
			
		||||
	  
 | 
			
		||||
	  PARALLEL_FOR_LOOP
 | 
			
		||||
	    for(auto ss=U.begin();ss<U.end();ss++){
 | 
			
		||||
	      Uds[ss](0)(mu) = U[ss]();
 | 
			
		||||
	      Uds[ss](1)(mu) = Uconj[ss]();
 | 
			
		||||
	    }
 | 
			
		||||
	  
 | 
			
		||||
	  U     = adj(Cshift(U    ,mu,-1));      // correct except for spanning the boundary
 | 
			
		||||
	  Uconj = adj(Cshift(Uconj,mu,-1));
 | 
			
		||||
	  
 | 
			
		||||
	  Utmp = U;
 | 
			
		||||
	  if ( Params.twists[mu] ) { 
 | 
			
		||||
	    Utmp = where(coor==0,Uconj,Utmp);
 | 
			
		||||
	  }
 | 
			
		||||
	  
 | 
			
		||||
	  PARALLEL_FOR_LOOP
 | 
			
		||||
	    for(auto ss=U.begin();ss<U.end();ss++){
 | 
			
		||||
	      Uds[ss](0)(mu+4) = Utmp[ss]();
 | 
			
		||||
	    }
 | 
			
		||||
	  
 | 
			
		||||
	  Utmp = Uconj;
 | 
			
		||||
	  if ( Params.twists[mu] ) { 
 | 
			
		||||
	    Utmp = where(coor==0,U,Utmp);
 | 
			
		||||
	  }
 | 
			
		||||
	  
 | 
			
		||||
	  PARALLEL_FOR_LOOP
 | 
			
		||||
	    for(auto ss=U.begin();ss<U.end();ss++){
 | 
			
		||||
	      Uds[ss](1)(mu+4) = Utmp[ss]();
 | 
			
		||||
	    }
 | 
			
		||||
	  
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      PARALLEL_FOR_LOOP
 | 
			
		||||
      for (auto ss = U.begin(); ss < U.end(); ss++) {
 | 
			
		||||
        Uds[ss](0)(mu) = U[ss]();
 | 
			
		||||
        Uds[ss](1)(mu) = Uconj[ss]();
 | 
			
		||||
      
 | 
			
		||||
      
 | 
			
		||||
      inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
				FermionField &A, int mu) {
 | 
			
		||||
	// DhopDir provides U or Uconj depending on coor/flavour.
 | 
			
		||||
	GaugeLinkField link(mat._grid);
 | 
			
		||||
	// use lorentz for flavour as hack.
 | 
			
		||||
	auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
 | 
			
		||||
	PARALLEL_FOR_LOOP
 | 
			
		||||
	  for (auto ss = tmp.begin(); ss < tmp.end(); ss++) {
 | 
			
		||||
	    link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
 | 
			
		||||
	  }
 | 
			
		||||
	PokeIndex<LorentzIndex>(mat, link, mu);
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      U = adj(Cshift(U, mu, -1));  // correct except for spanning the boundary
 | 
			
		||||
      Uconj = adj(Cshift(Uconj, mu, -1));
 | 
			
		||||
 | 
			
		||||
      Utmp = U;
 | 
			
		||||
      if (Params.twists[mu]) {
 | 
			
		||||
        Utmp = where(coor == 0, Uconj, Utmp);
 | 
			
		||||
      
 | 
			
		||||
      inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
				FermionField Ã, int mu) {
 | 
			
		||||
	int Ls = Btilde._grid->_fdimensions[0];
 | 
			
		||||
	
 | 
			
		||||
	GaugeLinkField tmp(mat._grid);
 | 
			
		||||
	tmp = zero;
 | 
			
		||||
	PARALLEL_FOR_LOOP
 | 
			
		||||
	  for (int ss = 0; ss < tmp._grid->oSites(); ss++) {
 | 
			
		||||
	    for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	      int sF = s + Ls * ss;
 | 
			
		||||
	      auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
 | 
			
		||||
	      tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	PokeIndex<LorentzIndex>(mat, tmp, mu);
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
      PARALLEL_FOR_LOOP
 | 
			
		||||
      for (auto ss = U.begin(); ss < U.end(); ss++) {
 | 
			
		||||
        Uds[ss](0)(mu + 4) = Utmp[ss]();
 | 
			
		||||
      }
 | 
			
		||||
    typedef WilsonImpl<vComplex,  FundamentalRepresentation > WilsonImplR;   // Real.. whichever prec
 | 
			
		||||
    typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF;  // Float
 | 
			
		||||
    typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD;  // Double
 | 
			
		||||
 | 
			
		||||
      Utmp = Uconj;
 | 
			
		||||
      if (Params.twists[mu]) {
 | 
			
		||||
        Utmp = where(coor == 0, U, Utmp);
 | 
			
		||||
      }
 | 
			
		||||
    typedef WilsonImpl<vComplex,  FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
 | 
			
		||||
    typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
 | 
			
		||||
    typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
 | 
			
		||||
 | 
			
		||||
      PARALLEL_FOR_LOOP
 | 
			
		||||
      for (auto ss = U.begin(); ss < U.end(); ss++) {
 | 
			
		||||
        Uds[ss](1)(mu + 4) = Utmp[ss]();
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
    typedef WilsonImpl<vComplex,  AdjointRepresentation > WilsonAdjImplR;   // Real.. whichever prec
 | 
			
		||||
    typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF;  // Float
 | 
			
		||||
    typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD;  // Double
 | 
			
		||||
    
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
 | 
			
		||||
    
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
 | 
			
		||||
 | 
			
		||||
  inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
                            FermionField &A, int mu) {
 | 
			
		||||
    // DhopDir provides U or Uconj depending on coor/flavour.
 | 
			
		||||
    GaugeLinkField link(mat._grid);
 | 
			
		||||
    // use lorentz for flavour as hack.
 | 
			
		||||
    auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (auto ss = tmp.begin(); ss < tmp.end(); ss++) {
 | 
			
		||||
      link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
 | 
			
		||||
    }
 | 
			
		||||
    PokeIndex<LorentzIndex>(mat, link, mu);
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
  inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
                            FermionField Ã, int mu) {
 | 
			
		||||
    int Ls = Btilde._grid->_fdimensions[0];
 | 
			
		||||
 | 
			
		||||
    GaugeLinkField tmp(mat._grid);
 | 
			
		||||
    tmp = zero;
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int ss = 0; ss < tmp._grid->oSites(); ss++) {
 | 
			
		||||
      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
        int sF = s + Ls * ss;
 | 
			
		||||
        auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
 | 
			
		||||
        tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    PokeIndex<LorentzIndex>(mat, tmp, mu);
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
typedef WilsonImpl<vComplex,  FundamentalRepresentation> WilsonImplR;   // Real.. whichever prec
 | 
			
		||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation> WilsonImplF;  // Float
 | 
			
		||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation> WilsonImplD;  // Double
 | 
			
		||||
 | 
			
		||||
typedef WilsonImpl<vComplex,  AdjointRepresentation > WilsonAdjImplR;   // Real.. whichever prec
 | 
			
		||||
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF;  // Float
 | 
			
		||||
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD;  // Double
 | 
			
		||||
 | 
			
		||||
typedef DomainWallRedBlack5dImpl<vComplex, Nc>
 | 
			
		||||
    DomainWallRedBlack5dImplR;  // Real.. whichever prec
 | 
			
		||||
typedef DomainWallRedBlack5dImpl<vComplexF, Nc>
 | 
			
		||||
    DomainWallRedBlack5dImplF;  // Float
 | 
			
		||||
typedef DomainWallRedBlack5dImpl<vComplexD, Nc>
 | 
			
		||||
    DomainWallRedBlack5dImplD;  // Double
 | 
			
		||||
 | 
			
		||||
typedef GparityWilsonImpl<vComplex, Nc>
 | 
			
		||||
    GparityWilsonImplR;  // Real.. whichever prec
 | 
			
		||||
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF;  // Float
 | 
			
		||||
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD;  // Double
 | 
			
		||||
    typedef GparityWilsonImpl<vComplex, Nc>  GparityWilsonImplR;  // Real.. whichever prec
 | 
			
		||||
    typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF;  // Float
 | 
			
		||||
    typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD;  // Double
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef  GRID_QCD_MOBIUS_FERMION_H
 | 
			
		||||
#define  GRID_QCD_MOBIUS_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef  GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
 | 
			
		||||
#define  GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
 | 
			
		||||
#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef  OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
 | 
			
		||||
#define  OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
 | 
			
		||||
#define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
 | 
			
		||||
#define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
 | 
			
		||||
#define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
 | 
			
		||||
#define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef  GRID_QCD_SCALED_SHAMIR_FERMION_H
 | 
			
		||||
#define  GRID_QCD_SCALED_SHAMIR_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef  GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
 | 
			
		||||
#define  GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -48,9 +48,9 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
 | 
			
		||||
				       GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
				       RealD _M5,const ImplParams &p) :
 | 
			
		||||
  Kernels(p),
 | 
			
		||||
  _FiveDimGrid(&FiveDimGrid),
 | 
			
		||||
  _FiveDimGrid        (&FiveDimGrid),
 | 
			
		||||
  _FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
 | 
			
		||||
  _FourDimGrid(&FourDimGrid),
 | 
			
		||||
  _FourDimGrid        (&FourDimGrid),
 | 
			
		||||
  _FourDimRedBlackGrid(&FourDimRedBlackGrid),
 | 
			
		||||
  Stencil    (_FiveDimGrid,npoint,Even,directions,displacements),
 | 
			
		||||
  StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
 | 
			
		||||
@@ -62,60 +62,83 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
 | 
			
		||||
  Lebesgue(_FourDimGrid),
 | 
			
		||||
  LebesgueEvenOdd(_FourDimRedBlackGrid)
 | 
			
		||||
{
 | 
			
		||||
  // some assertions
 | 
			
		||||
  assert(FiveDimGrid._ndimension==5);
 | 
			
		||||
  assert(FourDimGrid._ndimension==4);
 | 
			
		||||
  assert(FiveDimRedBlackGrid._ndimension==5);
 | 
			
		||||
  assert(FourDimRedBlackGrid._ndimension==4);
 | 
			
		||||
  assert(FiveDimRedBlackGrid._checker_dim==1);
 | 
			
		||||
  if (Impl::LsVectorised) { 
 | 
			
		||||
 | 
			
		||||
  // Dimension zero of the five-d is the Ls direction
 | 
			
		||||
  Ls=FiveDimGrid._fdimensions[0];
 | 
			
		||||
  assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
 | 
			
		||||
  assert(FiveDimRedBlackGrid._processors[0] ==1);
 | 
			
		||||
  assert(FiveDimRedBlackGrid._simd_layout[0]==1);
 | 
			
		||||
  assert(FiveDimGrid._processors[0]         ==1);
 | 
			
		||||
  assert(FiveDimGrid._simd_layout[0]        ==1);
 | 
			
		||||
    int nsimd = Simd::Nsimd();
 | 
			
		||||
    
 | 
			
		||||
    // some assertions
 | 
			
		||||
    assert(FiveDimGrid._ndimension==5);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._ndimension==5);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
 | 
			
		||||
    assert(FourDimGrid._ndimension==4);
 | 
			
		||||
 | 
			
		||||
  // Other dimensions must match the decomposition of the four-D fields 
 | 
			
		||||
  for(int d=0;d<4;d++){
 | 
			
		||||
    assert(FourDimRedBlackGrid._fdimensions[d]  ==FourDimGrid._fdimensions[d]);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
 | 
			
		||||
    // Dimension zero of the five-d is the Ls direction
 | 
			
		||||
    Ls=FiveDimGrid._fdimensions[0];
 | 
			
		||||
    assert(FiveDimGrid._processors[0]         ==1);
 | 
			
		||||
    assert(FiveDimGrid._simd_layout[0]        ==nsimd);
 | 
			
		||||
 | 
			
		||||
    assert(FourDimRedBlackGrid._processors[d]   ==FourDimGrid._processors[d]);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._processors[0] ==1);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
 | 
			
		||||
 | 
			
		||||
    assert(FourDimRedBlackGrid._simd_layout[d]  ==FourDimGrid._simd_layout[d]);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
 | 
			
		||||
    // Other dimensions must match the decomposition of the four-D fields 
 | 
			
		||||
    for(int d=0;d<4;d++){
 | 
			
		||||
      assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
 | 
			
		||||
      assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
 | 
			
		||||
      
 | 
			
		||||
      assert(FourDimGrid._simd_layout[d]=1);
 | 
			
		||||
      assert(FourDimRedBlackGrid._simd_layout[d]=1);
 | 
			
		||||
      assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
 | 
			
		||||
 | 
			
		||||
    assert(FiveDimGrid._fdimensions[d+1]        ==FourDimGrid._fdimensions[d]);
 | 
			
		||||
    assert(FiveDimGrid._processors[d+1]         ==FourDimGrid._processors[d]);
 | 
			
		||||
    assert(FiveDimGrid._simd_layout[d+1]        ==FourDimGrid._simd_layout[d]);
 | 
			
		||||
      assert(FiveDimGrid._fdimensions[d+1]        ==FourDimGrid._fdimensions[d]);
 | 
			
		||||
      assert(FiveDimGrid._processors[d+1]         ==FourDimGrid._processors[d]);
 | 
			
		||||
      assert(FiveDimGrid._simd_layout[d+1]        ==FourDimGrid._simd_layout[d]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  } else {
 | 
			
		||||
 | 
			
		||||
    // some assertions
 | 
			
		||||
    assert(FiveDimGrid._ndimension==5);
 | 
			
		||||
    assert(FourDimGrid._ndimension==4);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._ndimension==5);
 | 
			
		||||
    assert(FourDimRedBlackGrid._ndimension==4);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._checker_dim==1);
 | 
			
		||||
    
 | 
			
		||||
    // Dimension zero of the five-d is the Ls direction
 | 
			
		||||
    Ls=FiveDimGrid._fdimensions[0];
 | 
			
		||||
    assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._processors[0] ==1);
 | 
			
		||||
    assert(FiveDimRedBlackGrid._simd_layout[0]==1);
 | 
			
		||||
    assert(FiveDimGrid._processors[0]         ==1);
 | 
			
		||||
    assert(FiveDimGrid._simd_layout[0]        ==1);
 | 
			
		||||
    
 | 
			
		||||
    // Other dimensions must match the decomposition of the four-D fields 
 | 
			
		||||
    for(int d=0;d<4;d++){
 | 
			
		||||
      assert(FourDimRedBlackGrid._fdimensions[d]  ==FourDimGrid._fdimensions[d]);
 | 
			
		||||
      assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
 | 
			
		||||
      
 | 
			
		||||
      assert(FourDimRedBlackGrid._processors[d]   ==FourDimGrid._processors[d]);
 | 
			
		||||
      assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
 | 
			
		||||
      
 | 
			
		||||
      assert(FourDimRedBlackGrid._simd_layout[d]  ==FourDimGrid._simd_layout[d]);
 | 
			
		||||
      assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
 | 
			
		||||
      
 | 
			
		||||
      assert(FiveDimGrid._fdimensions[d+1]        ==FourDimGrid._fdimensions[d]);
 | 
			
		||||
      assert(FiveDimGrid._processors[d+1]         ==FourDimGrid._processors[d]);
 | 
			
		||||
      assert(FiveDimGrid._simd_layout[d+1]        ==FourDimGrid._simd_layout[d]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
  // Allocate the required comms buffer
 | 
			
		||||
  ImportGauge(_Umu);
 | 
			
		||||
}  
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
  /*
 | 
			
		||||
template<class Impl>
 | 
			
		||||
WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
 | 
			
		||||
				       GridCartesian         &FiveDimGrid,
 | 
			
		||||
				       GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
				       GridCartesian         &FourDimGrid,
 | 
			
		||||
				       RealD _M5,const ImplParams &p) :
 | 
			
		||||
  Kernels(p),
 | 
			
		||||
  _FiveDimGrid        (&FiveDimGrid),
 | 
			
		||||
  _FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
 | 
			
		||||
  _FourDimGrid        (&FourDimGrid),
 | 
			
		||||
  Stencil    (_FiveDimGrid,npoint,Even,directions,displacements),
 | 
			
		||||
  StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
 | 
			
		||||
  StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
 | 
			
		||||
  M5(_M5),
 | 
			
		||||
  Umu(_FourDimGrid),
 | 
			
		||||
  UmuEven(_FourDimGrid),
 | 
			
		||||
  UmuOdd (_FourDimGrid),
 | 
			
		||||
  Lebesgue(_FourDimGrid),
 | 
			
		||||
  LebesgueEvenOdd(_FourDimGrid)
 | 
			
		||||
{
 | 
			
		||||
  int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
@@ -148,15 +171,10 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  {
 | 
			
		||||
    GaugeField HUmu(_Umu._grid);
 | 
			
		||||
    HUmu = _Umu*(-0.5);
 | 
			
		||||
    Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
 | 
			
		||||
    UmuEven=Umu;// Really want a reference.
 | 
			
		||||
    UmuOdd =Umu;
 | 
			
		||||
  }
 | 
			
		||||
}  
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  */
 | 
			
		||||
     
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
			
		||||
{
 | 
			
		||||
@@ -376,8 +394,6 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag
 | 
			
		||||
 | 
			
		||||
FermOpTemplateInstantiate(WilsonFermion5D);
 | 
			
		||||
GparityFermOpTemplateInstantiate(WilsonFermion5D);
 | 
			
		||||
template class WilsonFermion5D<DomainWallRedBlack5dImplF>;		
 | 
			
		||||
template class WilsonFermion5D<DomainWallRedBlack5dImplD>;
 | 
			
		||||
  
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -125,12 +125,14 @@ namespace Grid {
 | 
			
		||||
		      double _M5,const ImplParams &p= ImplParams());
 | 
			
		||||
 | 
			
		||||
      // Constructors
 | 
			
		||||
      /*
 | 
			
		||||
      WilsonFermion5D(int simd, 
 | 
			
		||||
		      GaugeField &_Umu,
 | 
			
		||||
		      GridCartesian         &FiveDimGrid,
 | 
			
		||||
		      GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
		      GridCartesian         &FourDimGrid,
 | 
			
		||||
		      double _M5,const ImplParams &p= ImplParams());
 | 
			
		||||
      */
 | 
			
		||||
 | 
			
		||||
      // DoubleStore
 | 
			
		||||
      void ImportGauge(const GaugeField &_Umu);
 | 
			
		||||
 
 | 
			
		||||
@@ -611,7 +611,5 @@ void WilsonKernels<Impl>::DiracOptDhopDir(
 | 
			
		||||
FermOpTemplateInstantiate(WilsonKernels);
 | 
			
		||||
AdjointFermOpTemplateInstantiate(WilsonKernels);
 | 
			
		||||
 | 
			
		||||
template class WilsonKernels<DomainWallRedBlack5dImplF>;
 | 
			
		||||
template class WilsonKernels<DomainWallRedBlack5dImplD>;
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -90,7 +90,7 @@ void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrd
 | 
			
		||||
#define VMOVRDUP(A,B,C)                                  VBCASTRDUPf(A,B,C)
 | 
			
		||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
								   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								   int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
@@ -110,10 +110,10 @@ template void WilsonKernels<GparityWilsonImplF>::DiracOptAsmDhopSite(StencilImpl
 | 
			
		||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, 
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							       int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);		
 | 
			
		||||
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, 
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, 
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							       int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);		
 | 
			
		||||
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, 
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, 
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							       int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);		
 | 
			
		||||
}}
 | 
			
		||||
 
 | 
			
		||||
@@ -867,16 +867,16 @@ template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(Stencil
 | 
			
		||||
									 int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
								      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								      int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
								      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								      int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
									 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
									 int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
									 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
									 int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -28,7 +28,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef  GRID_QCD_WILSON_TM_FERMION_H
 | 
			
		||||
#define  GRID_QCD_WILSON_TM_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,8 @@
 | 
			
		||||
#ifndef HMC_TYPES_H
 | 
			
		||||
#define HMC_TYPES_H
 | 
			
		||||
 | 
			
		||||
#include <qcd/representations/adjoint.h>
 | 
			
		||||
#include <qcd/representations/fundamental.h>
 | 
			
		||||
#include <Grid/qcd/representations/adjoint.h>
 | 
			
		||||
#include <Grid/qcd/representations/fundamental.h>
 | 
			
		||||
#include <tuple>
 | 
			
		||||
#include <utility>
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,9 @@
 | 
			
		||||
#ifndef GRID_QCD_SMEARING_H
 | 
			
		||||
#define GRID_QCD_SMEARING_H
 | 
			
		||||
 | 
			
		||||
#include <qcd/smearing/BaseSmearing.h>
 | 
			
		||||
#include <qcd/smearing/APEsmearing.h>
 | 
			
		||||
#include <qcd/smearing/StoutSmearing.h>
 | 
			
		||||
#include <qcd/smearing/GaugeConfiguration.h>
 | 
			
		||||
#include <Grid/qcd/smearing/BaseSmearing.h>
 | 
			
		||||
#include <Grid/qcd/smearing/APEsmearing.h>
 | 
			
		||||
#include <Grid/qcd/smearing/StoutSmearing.h>
 | 
			
		||||
#include <Grid/qcd/smearing/GaugeConfiguration.h>
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -84,7 +84,7 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimRedBlackGrid(int Ls,const GridC
 | 
			
		||||
 | 
			
		||||
GridCartesian         *SpaceTimeGrid::makeFiveDimDWFGrid(int Ls,const GridCartesian *FourDimGrid)
 | 
			
		||||
{
 | 
			
		||||
  int N4=FourDimGrid->_ndimension;
 | 
			
		||||
  int N4    = FourDimGrid->_ndimension;
 | 
			
		||||
  int nsimd = FourDimGrid->Nsimd();
 | 
			
		||||
 | 
			
		||||
  std::vector<int> latt5(1,Ls);
 | 
			
		||||
@@ -103,11 +103,11 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(int Ls,const Gr
 | 
			
		||||
{
 | 
			
		||||
  int N4=FourDimGrid->_ndimension;
 | 
			
		||||
  int nsimd = FourDimGrid->Nsimd();
 | 
			
		||||
  int cbd=0;
 | 
			
		||||
  int cbd=1;
 | 
			
		||||
  std::vector<int> latt5(1,Ls);
 | 
			
		||||
  std::vector<int> simd5(1,nsimd);
 | 
			
		||||
  std::vector<int>  mpi5(1,1);
 | 
			
		||||
  std::vector<int>   cb5(1,1);
 | 
			
		||||
  std::vector<int>   cb5(1,0);
 | 
			
		||||
    
 | 
			
		||||
  for(int d=0;d<N4;d++){
 | 
			
		||||
    latt5.push_back(FourDimGrid->_fdimensions[d]);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user