mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Internal SHM comms in non-simd directions working
Need to fix simd directions
This commit is contained in:
		@@ -33,511 +33,500 @@ directory
 | 
			
		||||
#define GRID_QCD_FERMION_OPERATOR_IMPL_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    // Template parameter class constructs to package
 | 
			
		||||
    // externally control Fermion implementations
 | 
			
		||||
    // in orthogonal directions
 | 
			
		||||
    //
 | 
			
		||||
    // Ultimately need Impl to always define types where XXX is opaque
 | 
			
		||||
    //
 | 
			
		||||
    //    typedef typename XXX               Simd;
 | 
			
		||||
    //    typedef typename XXX     GaugeLinkField;	
 | 
			
		||||
    //    typedef typename XXX         GaugeField;
 | 
			
		||||
    //    typedef typename XXX      GaugeActField;
 | 
			
		||||
    //    typedef typename XXX       FermionField;
 | 
			
		||||
    //    typedef typename XXX  DoubledGaugeField;
 | 
			
		||||
    //    typedef typename XXX         SiteSpinor;
 | 
			
		||||
    //    typedef typename XXX     SiteHalfSpinor;	
 | 
			
		||||
    //    typedef typename XXX         Compressor;	
 | 
			
		||||
    //
 | 
			
		||||
    // and Methods:
 | 
			
		||||
    //    void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
 | 
			
		||||
    //    void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
 | 
			
		||||
    //    void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
 | 
			
		||||
    //    void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
 | 
			
		||||
    //    void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
 | 
			
		||||
    //
 | 
			
		||||
    //
 | 
			
		||||
    // To acquire the typedefs from "Base" (either a base class or template param) use:
 | 
			
		||||
    //
 | 
			
		||||
    // INHERIT_GIMPL_TYPES(Base)
 | 
			
		||||
    // INHERIT_FIMPL_TYPES(Base)
 | 
			
		||||
    // INHERIT_IMPL_TYPES(Base)
 | 
			
		||||
    //
 | 
			
		||||
    // The Fermion operators will do the following:
 | 
			
		||||
    //
 | 
			
		||||
    // struct MyOpParams { 
 | 
			
		||||
    //   RealD mass;
 | 
			
		||||
    // };
 | 
			
		||||
    //
 | 
			
		||||
    //
 | 
			
		||||
    // template<class Impl>
 | 
			
		||||
    // class MyOp : public<Impl> { 
 | 
			
		||||
    // public:
 | 
			
		||||
    //
 | 
			
		||||
    //    INHERIT_ALL_IMPL_TYPES(Impl);
 | 
			
		||||
    //
 | 
			
		||||
    //    MyOp(MyOpParams Myparm, ImplParams &ImplParam) :  Impl(ImplParam)
 | 
			
		||||
    //    {
 | 
			
		||||
    //
 | 
			
		||||
    //    };
 | 
			
		||||
    //    
 | 
			
		||||
    //  }
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Implementation dependent fermion types
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  //////////////////////////////////////////////
 | 
			
		||||
  // Template parameter class constructs to package
 | 
			
		||||
  // externally control Fermion implementations
 | 
			
		||||
  // in orthogonal directions
 | 
			
		||||
  //
 | 
			
		||||
  // Ultimately need Impl to always define types where XXX is opaque
 | 
			
		||||
  //
 | 
			
		||||
  //    typedef typename XXX               Simd;
 | 
			
		||||
  //    typedef typename XXX     GaugeLinkField;	
 | 
			
		||||
  //    typedef typename XXX         GaugeField;
 | 
			
		||||
  //    typedef typename XXX      GaugeActField;
 | 
			
		||||
  //    typedef typename XXX       FermionField;
 | 
			
		||||
  //    typedef typename XXX  DoubledGaugeField;
 | 
			
		||||
  //    typedef typename XXX         SiteSpinor;
 | 
			
		||||
  //    typedef typename XXX     SiteHalfSpinor;	
 | 
			
		||||
  //    typedef typename XXX         Compressor;	
 | 
			
		||||
  //
 | 
			
		||||
  // and Methods:
 | 
			
		||||
  //    void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
 | 
			
		||||
  //    void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
 | 
			
		||||
  //    void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
 | 
			
		||||
  //    void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
 | 
			
		||||
  //    void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
 | 
			
		||||
  //
 | 
			
		||||
  //
 | 
			
		||||
  // To acquire the typedefs from "Base" (either a base class or template param) use:
 | 
			
		||||
  //
 | 
			
		||||
  // INHERIT_GIMPL_TYPES(Base)
 | 
			
		||||
  // INHERIT_FIMPL_TYPES(Base)
 | 
			
		||||
  // INHERIT_IMPL_TYPES(Base)
 | 
			
		||||
  //
 | 
			
		||||
  // The Fermion operators will do the following:
 | 
			
		||||
  //
 | 
			
		||||
  // struct MyOpParams { 
 | 
			
		||||
  //   RealD mass;
 | 
			
		||||
  // };
 | 
			
		||||
  //
 | 
			
		||||
  //
 | 
			
		||||
  // template<class Impl>
 | 
			
		||||
  // class MyOp : public<Impl> { 
 | 
			
		||||
  // public:
 | 
			
		||||
  //
 | 
			
		||||
  //    INHERIT_ALL_IMPL_TYPES(Impl);
 | 
			
		||||
  //
 | 
			
		||||
  //    MyOp(MyOpParams Myparm, ImplParams &ImplParam) :  Impl(ImplParam)
 | 
			
		||||
  //    {
 | 
			
		||||
  //
 | 
			
		||||
  //    };
 | 
			
		||||
  //    
 | 
			
		||||
  //  }
 | 
			
		||||
  //////////////////////////////////////////////
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Implementation dependent fermion types
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  
 | 
			
		||||
#define INHERIT_FIMPL_TYPES(Impl)\
 | 
			
		||||
    typedef typename Impl::FermionField           FermionField;		\
 | 
			
		||||
    typedef typename Impl::DoubledGaugeField DoubledGaugeField;		\
 | 
			
		||||
    typedef typename Impl::SiteSpinor               SiteSpinor;		\
 | 
			
		||||
    typedef typename Impl::SiteHalfSpinor       SiteHalfSpinor;		\
 | 
			
		||||
    typedef typename Impl::Compressor               Compressor;		\
 | 
			
		||||
    typedef typename Impl::StencilImpl             StencilImpl;		\
 | 
			
		||||
    typedef typename Impl::ImplParams ImplParams;			\
 | 
			
		||||
    typedef typename Impl::Coeff_t       Coeff_t;
 | 
			
		||||
 | 
			
		||||
  typedef typename Impl::FermionField           FermionField;		\
 | 
			
		||||
  typedef typename Impl::DoubledGaugeField DoubledGaugeField;		\
 | 
			
		||||
  typedef typename Impl::SiteSpinor               SiteSpinor;		\
 | 
			
		||||
  typedef typename Impl::SiteHalfSpinor       SiteHalfSpinor;		\
 | 
			
		||||
  typedef typename Impl::Compressor               Compressor;		\
 | 
			
		||||
  typedef typename Impl::StencilImpl             StencilImpl;		\
 | 
			
		||||
  typedef typename Impl::ImplParams ImplParams;				\
 | 
			
		||||
  typedef typename Impl::Coeff_t       Coeff_t;
 | 
			
		||||
  
 | 
			
		||||
#define INHERIT_IMPL_TYPES(Base) \
 | 
			
		||||
    INHERIT_GIMPL_TYPES(Base)	 \
 | 
			
		||||
    INHERIT_FIMPL_TYPES(Base)
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Base)	 \
 | 
			
		||||
  INHERIT_FIMPL_TYPES(Base)
 | 
			
		||||
  
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Single flavour four spinors with colour index
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
 | 
			
		||||
  class WilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
 | 
			
		||||
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
    static const int Dimension = Representation::Dimension;
 | 
			
		||||
    typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
 | 
			
		||||
      
 | 
			
		||||
    //Necessary?
 | 
			
		||||
    constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
 | 
			
		||||
    
 | 
			
		||||
    ///////
 | 
			
		||||
    // Single flavour four spinors with colour index
 | 
			
		||||
    ///////
 | 
			
		||||
    template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
 | 
			
		||||
    class WilsonImpl
 | 
			
		||||
      : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
 | 
			
		||||
    public:
 | 
			
		||||
      static const int Dimension = Representation::Dimension;
 | 
			
		||||
      typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
 | 
			
		||||
      
 | 
			
		||||
      //Necessary?
 | 
			
		||||
      constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
 | 
			
		||||
    const bool LsVectorised=false;
 | 
			
		||||
    typedef _Coeff_t Coeff_t;
 | 
			
		||||
 | 
			
		||||
      const bool LsVectorised=false;
 | 
			
		||||
      typedef _Coeff_t Coeff_t;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
    INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
      
 | 
			
		||||
      template <typename vtype> using iImplSpinor            = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
 | 
			
		||||
      template <typename vtype> using iImplHalfSpinor        = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
 | 
			
		||||
      template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
 | 
			
		||||
    template <typename vtype> using iImplSpinor            = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
 | 
			
		||||
    template <typename vtype> using iImplHalfSpinor        = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
 | 
			
		||||
    template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
 | 
			
		||||
    
 | 
			
		||||
    typedef iImplSpinor<Simd>            SiteSpinor;
 | 
			
		||||
    typedef iImplHalfSpinor<Simd>        SiteHalfSpinor;
 | 
			
		||||
    typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
 | 
			
		||||
    
 | 
			
		||||
    typedef Lattice<SiteSpinor>            FermionField;
 | 
			
		||||
    typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
    
 | 
			
		||||
    typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
    typedef WilsonImplParams ImplParams;
 | 
			
		||||
    typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
    
 | 
			
		||||
    ImplParams Params;
 | 
			
		||||
    
 | 
			
		||||
    WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
      
 | 
			
		||||
      typedef iImplSpinor<Simd>            SiteSpinor;
 | 
			
		||||
      typedef iImplHalfSpinor<Simd>        SiteHalfSpinor;
 | 
			
		||||
      typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
 | 
			
		||||
    bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
 | 
			
		||||
      
 | 
			
		||||
      typedef Lattice<SiteSpinor>            FermionField;
 | 
			
		||||
      typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
    inline void multLink(SiteHalfSpinor &phi,
 | 
			
		||||
			 const SiteDoubledGaugeField &U,
 | 
			
		||||
			 const SiteHalfSpinor &chi,
 | 
			
		||||
			 int mu,
 | 
			
		||||
			 StencilEntry *SE,
 | 
			
		||||
			 StencilImpl &St) {
 | 
			
		||||
      mult(&phi(), &U(mu), &chi());
 | 
			
		||||
    }
 | 
			
		||||
      
 | 
			
		||||
      typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
      typedef WilsonImplParams ImplParams;
 | 
			
		||||
      typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
    template <class ref>
 | 
			
		||||
    inline void loadLinkElement(Simd ®, ref &memory) {
 | 
			
		||||
      reg = memory;
 | 
			
		||||
    }
 | 
			
		||||
      
 | 
			
		||||
      ImplParams Params;
 | 
			
		||||
      
 | 
			
		||||
      WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
      
 | 
			
		||||
      bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
 | 
			
		||||
      
 | 
			
		||||
      inline void multLink(SiteHalfSpinor &phi,
 | 
			
		||||
			   const SiteDoubledGaugeField &U,
 | 
			
		||||
			   const SiteHalfSpinor &chi,
 | 
			
		||||
			   int mu,
 | 
			
		||||
			   StencilEntry *SE,
 | 
			
		||||
			   StencilImpl &St) {
 | 
			
		||||
	mult(&phi(), &U(mu), &chi());
 | 
			
		||||
    inline void DoubleStore(GridBase *GaugeGrid,
 | 
			
		||||
			    DoubledGaugeField &Uds,
 | 
			
		||||
			    const GaugeField &Umu) {
 | 
			
		||||
      conformable(Uds._grid, GaugeGrid);
 | 
			
		||||
      conformable(Umu._grid, GaugeGrid);
 | 
			
		||||
      GaugeLinkField U(GaugeGrid);
 | 
			
		||||
      for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
	U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
			
		||||
	PokeIndex<LorentzIndex>(Uds, U, mu);
 | 
			
		||||
	U = adj(Cshift(U, mu, -1));
 | 
			
		||||
	PokeIndex<LorentzIndex>(Uds, U, mu + 4);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
 | 
			
		||||
      GaugeLinkField link(mat._grid);
 | 
			
		||||
      link = TraceIndex<SpinIndex>(outerProduct(Btilde,A)); 
 | 
			
		||||
      PokeIndex<LorentzIndex>(mat,link,mu);
 | 
			
		||||
    }   
 | 
			
		||||
      
 | 
			
		||||
      template <class ref>
 | 
			
		||||
      inline void loadLinkElement(Simd ®,
 | 
			
		||||
				  ref &memory) {
 | 
			
		||||
	reg = memory;
 | 
			
		||||
      }
 | 
			
		||||
    inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
 | 
			
		||||
      
 | 
			
		||||
      inline void DoubleStore(GridBase *GaugeGrid,
 | 
			
		||||
			      DoubledGaugeField &Uds,
 | 
			
		||||
			      const GaugeField &Umu) {
 | 
			
		||||
	conformable(Uds._grid, GaugeGrid);
 | 
			
		||||
	conformable(Umu._grid, GaugeGrid);
 | 
			
		||||
	GaugeLinkField U(GaugeGrid);
 | 
			
		||||
	for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
	  U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
			
		||||
	  PokeIndex<LorentzIndex>(Uds, U, mu);
 | 
			
		||||
	  U = adj(Cshift(U, mu, -1));
 | 
			
		||||
	  PokeIndex<LorentzIndex>(Uds, U, mu + 4);
 | 
			
		||||
      int Ls=Btilde._grid->_fdimensions[0];
 | 
			
		||||
      GaugeLinkField tmp(mat._grid);
 | 
			
		||||
      tmp = zero;
 | 
			
		||||
      
 | 
			
		||||
      PARALLEL_FOR_LOOP
 | 
			
		||||
      for(int sss=0;sss<tmp._grid->oSites();sss++){
 | 
			
		||||
	int sU=sss;
 | 
			
		||||
	for(int s=0;s<Ls;s++){
 | 
			
		||||
	  int sF = s+Ls*sU;
 | 
			
		||||
	  tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
 | 
			
		||||
	GaugeLinkField link(mat._grid);
 | 
			
		||||
	link = TraceIndex<SpinIndex>(outerProduct(Btilde,A)); 
 | 
			
		||||
	PokeIndex<LorentzIndex>(mat,link,mu);
 | 
			
		||||
      }   
 | 
			
		||||
      PokeIndex<LorentzIndex>(mat,tmp,mu);
 | 
			
		||||
      
 | 
			
		||||
      inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
 | 
			
		||||
	
 | 
			
		||||
	int Ls=Btilde._grid->_fdimensions[0];
 | 
			
		||||
	GaugeLinkField tmp(mat._grid);
 | 
			
		||||
	tmp = zero;
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
        PARALLEL_FOR_LOOP
 | 
			
		||||
	  for(int sss=0;sss<tmp._grid->oSites();sss++){
 | 
			
		||||
	    int sU=sss;
 | 
			
		||||
	    for(int s=0;s<Ls;s++){
 | 
			
		||||
	      int sF = s+Ls*sU;
 | 
			
		||||
	      tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	PokeIndex<LorentzIndex>(mat,tmp,mu);
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Single flavour four spinors with colour index, 5d redblack
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
 | 
			
		||||
class DomainWallVec5dImpl :  public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { 
 | 
			
		||||
  public:
 | 
			
		||||
      
 | 
			
		||||
  static const int Dimension = Nrepresentation;
 | 
			
		||||
  const bool LsVectorised=true;
 | 
			
		||||
  typedef _Coeff_t Coeff_t;      
 | 
			
		||||
  typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
 | 
			
		||||
  
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
  
 | 
			
		||||
  template <typename vtype> using iImplSpinor            = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
 | 
			
		||||
  template <typename vtype> using iImplHalfSpinor        = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
 | 
			
		||||
  template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
 | 
			
		||||
  template <typename vtype> using iImplGaugeField        = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
 | 
			
		||||
  template <typename vtype> using iImplGaugeLink         = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
 | 
			
		||||
  
 | 
			
		||||
  typedef iImplSpinor<Simd> SiteSpinor;
 | 
			
		||||
  typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
 | 
			
		||||
  typedef Lattice<SiteSpinor> FermionField;
 | 
			
		||||
  
 | 
			
		||||
  // Make the doubled gauge field a *scalar*
 | 
			
		||||
  typedef iImplDoubledGaugeField<typename Simd::scalar_type>  SiteDoubledGaugeField;  // This is a scalar
 | 
			
		||||
  typedef iImplGaugeField<typename Simd::scalar_type>         SiteScalarGaugeField;  // scalar
 | 
			
		||||
  typedef iImplGaugeLink<typename Simd::scalar_type>          SiteScalarGaugeLink;  // scalar
 | 
			
		||||
      
 | 
			
		||||
  typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
      
 | 
			
		||||
  typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
  typedef WilsonImplParams ImplParams;
 | 
			
		||||
  typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
  
 | 
			
		||||
  ImplParams Params;
 | 
			
		||||
  
 | 
			
		||||
  DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
      
 | 
			
		||||
  bool overlapCommsCompute(void) { return false; };
 | 
			
		||||
      
 | 
			
		||||
  template <class ref>
 | 
			
		||||
  inline void loadLinkElement(Simd ®, ref &memory) {
 | 
			
		||||
    vsplat(reg, memory);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
 | 
			
		||||
		       const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
		       StencilImpl &St) {
 | 
			
		||||
    SiteGaugeLink UU;
 | 
			
		||||
    for (int i = 0; i < Nrepresentation; i++) {
 | 
			
		||||
      for (int j = 0; j < Nrepresentation; j++) {
 | 
			
		||||
	vsplat(UU()()(i, j), U(mu)()(i, j));
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    ///////
 | 
			
		||||
    // Single flavour four spinors with colour index, 5d redblack
 | 
			
		||||
    ///////
 | 
			
		||||
    template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
 | 
			
		||||
    class DomainWallVec5dImpl :  public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { 
 | 
			
		||||
    public:
 | 
			
		||||
    }
 | 
			
		||||
    mult(&phi(), &UU(), &chi());
 | 
			
		||||
  }
 | 
			
		||||
      
 | 
			
		||||
      static const int Dimension = Nrepresentation;
 | 
			
		||||
      const bool LsVectorised=true;
 | 
			
		||||
      typedef _Coeff_t Coeff_t;      
 | 
			
		||||
      typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
 | 
			
		||||
 | 
			
		||||
      INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
  inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu) 
 | 
			
		||||
  {
 | 
			
		||||
    SiteScalarGaugeField ScalarUmu;
 | 
			
		||||
    SiteDoubledGaugeField ScalarUds;
 | 
			
		||||
    
 | 
			
		||||
    GaugeLinkField U(Umu._grid);
 | 
			
		||||
    GaugeField Uadj(Umu._grid);
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
      U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
			
		||||
      U = adj(Cshift(U, mu, -1));
 | 
			
		||||
      PokeIndex<LorentzIndex>(Uadj, U, mu);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
 | 
			
		||||
      std::vector<int> lcoor;
 | 
			
		||||
      GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
 | 
			
		||||
      
 | 
			
		||||
      template <typename vtype> using iImplSpinor            = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
 | 
			
		||||
      template <typename vtype> using iImplHalfSpinor        = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
 | 
			
		||||
      template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
 | 
			
		||||
      template <typename vtype> using iImplGaugeField        = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
 | 
			
		||||
      template <typename vtype> using iImplGaugeLink         = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
 | 
			
		||||
      peekLocalSite(ScalarUmu, Umu, lcoor);
 | 
			
		||||
      for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
 | 
			
		||||
      
 | 
			
		||||
      typedef iImplSpinor<Simd> SiteSpinor;
 | 
			
		||||
      typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
 | 
			
		||||
      typedef Lattice<SiteSpinor> FermionField;
 | 
			
		||||
      peekLocalSite(ScalarUmu, Uadj, lcoor);
 | 
			
		||||
      for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
 | 
			
		||||
      
 | 
			
		||||
      // Make the doubled gauge field a *scalar*
 | 
			
		||||
      typedef iImplDoubledGaugeField<typename Simd::scalar_type>
 | 
			
		||||
      SiteDoubledGaugeField;  // This is a scalar
 | 
			
		||||
      typedef iImplGaugeField<typename Simd::scalar_type>
 | 
			
		||||
      SiteScalarGaugeField;  // scalar
 | 
			
		||||
      typedef iImplGaugeLink<typename Simd::scalar_type>
 | 
			
		||||
      SiteScalarGaugeLink;  // scalar
 | 
			
		||||
      pokeLocalSite(ScalarUds, Uds, lcoor);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
      
 | 
			
		||||
      typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
  inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu) 
 | 
			
		||||
  {
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
      
 | 
			
		||||
      typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
      typedef WilsonImplParams ImplParams;
 | 
			
		||||
      typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
      
 | 
			
		||||
      ImplParams Params;
 | 
			
		||||
      
 | 
			
		||||
      DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
      
 | 
			
		||||
      bool overlapCommsCompute(void) { return false; };
 | 
			
		||||
      
 | 
			
		||||
      template <class ref>
 | 
			
		||||
      inline void loadLinkElement(Simd ®, ref &memory) {
 | 
			
		||||
	vsplat(reg, memory);
 | 
			
		||||
      }
 | 
			
		||||
      inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
 | 
			
		||||
			   const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
			   StencilImpl &St) {
 | 
			
		||||
	SiteGaugeLink UU;
 | 
			
		||||
	for (int i = 0; i < Nrepresentation; i++) {
 | 
			
		||||
	  for (int j = 0; j < Nrepresentation; j++) {
 | 
			
		||||
	    vsplat(UU()()(i, j), U(mu)()(i, j));
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
	mult(&phi(), &UU(), &chi());
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,
 | 
			
		||||
			      const GaugeField &Umu) {
 | 
			
		||||
	SiteScalarGaugeField ScalarUmu;
 | 
			
		||||
	SiteDoubledGaugeField ScalarUds;
 | 
			
		||||
	
 | 
			
		||||
	GaugeLinkField U(Umu._grid);
 | 
			
		||||
	GaugeField Uadj(Umu._grid);
 | 
			
		||||
	for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
	  U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
			
		||||
	  U = adj(Cshift(U, mu, -1));
 | 
			
		||||
	  PokeIndex<LorentzIndex>(Uadj, U, mu);
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
 | 
			
		||||
	  std::vector<int> lcoor;
 | 
			
		||||
	  GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
 | 
			
		||||
	  
 | 
			
		||||
	  peekLocalSite(ScalarUmu, Umu, lcoor);
 | 
			
		||||
	  for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
 | 
			
		||||
	  
 | 
			
		||||
	  peekLocalSite(ScalarUmu, Uadj, lcoor);
 | 
			
		||||
	  for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
 | 
			
		||||
	  
 | 
			
		||||
	  pokeLocalSite(ScalarUds, Uds, lcoor);
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
				FermionField &A, int mu) {
 | 
			
		||||
  inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,FermionField Ã, int mu) 
 | 
			
		||||
  {
 | 
			
		||||
	assert(0);
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
				FermionField Ã, int mu) {
 | 
			
		||||
	assert(0);
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
    
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Flavour doubled spinors; is Gparity the only? what about C*?
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    
 | 
			
		||||
    template <class S, int Nrepresentation,class _Coeff_t = RealD>
 | 
			
		||||
    class GparityWilsonImpl
 | 
			
		||||
      : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
 | 
			
		||||
    public:
 | 
			
		||||
      static const int Dimension = Nrepresentation;
 | 
			
		||||
template <class S, int Nrepresentation,class _Coeff_t = RealD>
 | 
			
		||||
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
      const bool LsVectorised=false;
 | 
			
		||||
 static const int Dimension = Nrepresentation;
 | 
			
		||||
 | 
			
		||||
      typedef _Coeff_t Coeff_t;
 | 
			
		||||
      typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
 | 
			
		||||
 const bool LsVectorised=false;
 | 
			
		||||
 | 
			
		||||
 typedef _Coeff_t Coeff_t;
 | 
			
		||||
 typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
 | 
			
		||||
 
 | 
			
		||||
 INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
      
 | 
			
		||||
      INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
 template <typename vtype> using iImplSpinor            = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
 | 
			
		||||
 template <typename vtype> using iImplHalfSpinor        = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
 | 
			
		||||
 template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
 | 
			
		||||
      
 | 
			
		||||
      template <typename vtype>
 | 
			
		||||
      using iImplSpinor =
 | 
			
		||||
      iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
 | 
			
		||||
      template <typename vtype>
 | 
			
		||||
      using iImplHalfSpinor =
 | 
			
		||||
	iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
 | 
			
		||||
      template <typename vtype>
 | 
			
		||||
      using iImplDoubledGaugeField =
 | 
			
		||||
	iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
 | 
			
		||||
 typedef iImplSpinor<Simd> SiteSpinor;
 | 
			
		||||
 typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
 | 
			
		||||
 typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
 | 
			
		||||
 
 | 
			
		||||
 typedef Lattice<SiteSpinor> FermionField;
 | 
			
		||||
 typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
 
 | 
			
		||||
 typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
 typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
 
 | 
			
		||||
 typedef GparityWilsonImplParams ImplParams;
 | 
			
		||||
      
 | 
			
		||||
      typedef iImplSpinor<Simd> SiteSpinor;
 | 
			
		||||
      typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
 | 
			
		||||
      typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
 | 
			
		||||
      
 | 
			
		||||
      typedef Lattice<SiteSpinor> FermionField;
 | 
			
		||||
      typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
			
		||||
      
 | 
			
		||||
      typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
 | 
			
		||||
      typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
 | 
			
		||||
 ImplParams Params;
 | 
			
		||||
 | 
			
		||||
      typedef GparityWilsonImplParams ImplParams;
 | 
			
		||||
      
 | 
			
		||||
      ImplParams Params;
 | 
			
		||||
 GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
 | 
			
		||||
 bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
 | 
			
		||||
 | 
			
		||||
      GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
 // provide the multiply by link that is differentiated between Gparity (with
 | 
			
		||||
 // flavour index) and non-Gparity
 | 
			
		||||
 inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
 | 
			
		||||
		      const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
		      StencilImpl &St) {
 | 
			
		||||
 | 
			
		||||
      bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
 | 
			
		||||
 | 
			
		||||
      // provide the multiply by link that is differentiated between Gparity (with
 | 
			
		||||
      // flavour index) and non-Gparity
 | 
			
		||||
      inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
 | 
			
		||||
			   const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
			   StencilImpl &St) {
 | 
			
		||||
	typedef SiteHalfSpinor vobj;
 | 
			
		||||
	typedef typename SiteHalfSpinor::scalar_object sobj;
 | 
			
		||||
  typedef SiteHalfSpinor vobj;
 | 
			
		||||
   typedef typename SiteHalfSpinor::scalar_object sobj;
 | 
			
		||||
	
 | 
			
		||||
	vobj vtmp;
 | 
			
		||||
	sobj stmp;
 | 
			
		||||
   vobj vtmp;
 | 
			
		||||
   sobj stmp;
 | 
			
		||||
	
 | 
			
		||||
	GridBase *grid = St._grid;
 | 
			
		||||
   GridBase *grid = St._grid;
 | 
			
		||||
	
 | 
			
		||||
	const int Nsimd = grid->Nsimd();
 | 
			
		||||
   const int Nsimd = grid->Nsimd();
 | 
			
		||||
	
 | 
			
		||||
	int direction = St._directions[mu];
 | 
			
		||||
	int distance = St._distances[mu];
 | 
			
		||||
	int ptype = St._permute_type[mu];
 | 
			
		||||
	int sl = St._grid->_simd_layout[direction];
 | 
			
		||||
   int direction = St._directions[mu];
 | 
			
		||||
   int distance = St._distances[mu];
 | 
			
		||||
   int ptype = St._permute_type[mu];
 | 
			
		||||
   int sl = St._grid->_simd_layout[direction];
 | 
			
		||||
   
 | 
			
		||||
   // Fixme X.Y.Z.T hardcode in stencil
 | 
			
		||||
   int mmu = mu % Nd;
 | 
			
		||||
	
 | 
			
		||||
	// Fixme X.Y.Z.T hardcode in stencil
 | 
			
		||||
	int mmu = mu % Nd;
 | 
			
		||||
   // assert our assumptions
 | 
			
		||||
   assert((distance == 1) || (distance == -1));  // nearest neighbour stencil hard code
 | 
			
		||||
   assert((sl == 1) || (sl == 2));
 | 
			
		||||
   
 | 
			
		||||
   std::vector<int> icoor;
 | 
			
		||||
	
 | 
			
		||||
	// assert our assumptions
 | 
			
		||||
	assert((distance == 1) || (distance == -1));  // nearest neighbour stencil hard code
 | 
			
		||||
	assert((sl == 1) || (sl == 2));
 | 
			
		||||
	
 | 
			
		||||
	std::vector<int> icoor;
 | 
			
		||||
	
 | 
			
		||||
	if ( SE->_around_the_world && Params.twists[mmu] ) {
 | 
			
		||||
   if ( SE->_around_the_world && Params.twists[mmu] ) {
 | 
			
		||||
 | 
			
		||||
	  if ( sl == 2 ) {
 | 
			
		||||
     if ( sl == 2 ) {
 | 
			
		||||
       
 | 
			
		||||
       std::vector<sobj> vals(Nsimd);
 | 
			
		||||
 | 
			
		||||
	    std::vector<sobj> vals(Nsimd);
 | 
			
		||||
       extract(chi,vals);
 | 
			
		||||
       for(int s=0;s<Nsimd;s++){
 | 
			
		||||
 | 
			
		||||
	    extract(chi,vals);
 | 
			
		||||
	    for(int s=0;s<Nsimd;s++){
 | 
			
		||||
 | 
			
		||||
	      grid->iCoorFromIindex(icoor,s);
 | 
			
		||||
	 grid->iCoorFromIindex(icoor,s);
 | 
			
		||||
	      
 | 
			
		||||
	      assert((icoor[direction]==0)||(icoor[direction]==1));
 | 
			
		||||
	 assert((icoor[direction]==0)||(icoor[direction]==1));
 | 
			
		||||
	      
 | 
			
		||||
	      int permute_lane;
 | 
			
		||||
	      if ( distance == 1) {
 | 
			
		||||
		permute_lane = icoor[direction]?1:0;
 | 
			
		||||
	      } else {
 | 
			
		||||
		permute_lane = icoor[direction]?0:1;
 | 
			
		||||
	 int permute_lane;
 | 
			
		||||
	 if ( distance == 1) {
 | 
			
		||||
	   permute_lane = icoor[direction]?1:0;
 | 
			
		||||
	 } else {
 | 
			
		||||
	   permute_lane = icoor[direction]?0:1;
 | 
			
		||||
	 }
 | 
			
		||||
	      
 | 
			
		||||
	 if ( permute_lane ) { 
 | 
			
		||||
	   stmp(0) = vals[s](1);
 | 
			
		||||
	   stmp(1) = vals[s](0);
 | 
			
		||||
	   vals[s] = stmp;
 | 
			
		||||
	      }
 | 
			
		||||
	      
 | 
			
		||||
	      if ( permute_lane ) { 
 | 
			
		||||
		stmp(0) = vals[s](1);
 | 
			
		||||
		stmp(1) = vals[s](0);
 | 
			
		||||
		vals[s] = stmp;
 | 
			
		||||
	      }
 | 
			
		||||
	    }
 | 
			
		||||
	    merge(vtmp,vals);
 | 
			
		||||
       }
 | 
			
		||||
       merge(vtmp,vals);
 | 
			
		||||
	    
 | 
			
		||||
     } else { 
 | 
			
		||||
       vtmp(0) = chi(1);
 | 
			
		||||
       vtmp(1) = chi(0);
 | 
			
		||||
     }
 | 
			
		||||
     mult(&phi(0),&U(0)(mu),&vtmp(0));
 | 
			
		||||
     mult(&phi(1),&U(1)(mu),&vtmp(1));
 | 
			
		||||
     
 | 
			
		||||
   } else { 
 | 
			
		||||
     mult(&phi(0),&U(0)(mu),&chi(0));
 | 
			
		||||
     mult(&phi(1),&U(1)(mu),&chi(1));
 | 
			
		||||
   }
 | 
			
		||||
   
 | 
			
		||||
 }
 | 
			
		||||
 | 
			
		||||
	  } else { 
 | 
			
		||||
	    vtmp(0) = chi(1);
 | 
			
		||||
	    vtmp(1) = chi(0);
 | 
			
		||||
	  }
 | 
			
		||||
	  mult(&phi(0),&U(0)(mu),&vtmp(0));
 | 
			
		||||
	  mult(&phi(1),&U(1)(mu),&vtmp(1));
 | 
			
		||||
	  
 | 
			
		||||
	} else { 
 | 
			
		||||
	  mult(&phi(0),&U(0)(mu),&chi(0));
 | 
			
		||||
	  mult(&phi(1),&U(1)(mu),&chi(1));
 | 
			
		||||
	}
 | 
			
		||||
 inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
 | 
			
		||||
 {
 | 
			
		||||
   conformable(Uds._grid,GaugeGrid);
 | 
			
		||||
   conformable(Umu._grid,GaugeGrid);
 | 
			
		||||
   
 | 
			
		||||
   GaugeLinkField Utmp (GaugeGrid);
 | 
			
		||||
   GaugeLinkField U    (GaugeGrid);
 | 
			
		||||
   GaugeLinkField Uconj(GaugeGrid);
 | 
			
		||||
   
 | 
			
		||||
   Lattice<iScalar<vInteger> > coor(GaugeGrid);
 | 
			
		||||
	
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
      inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
 | 
			
		||||
      {
 | 
			
		||||
	
 | 
			
		||||
	conformable(Uds._grid,GaugeGrid);
 | 
			
		||||
	conformable(Umu._grid,GaugeGrid);
 | 
			
		||||
	
 | 
			
		||||
	GaugeLinkField Utmp (GaugeGrid);
 | 
			
		||||
	GaugeLinkField U    (GaugeGrid);
 | 
			
		||||
	GaugeLinkField Uconj(GaugeGrid);
 | 
			
		||||
	
 | 
			
		||||
	Lattice<iScalar<vInteger> > coor(GaugeGrid);
 | 
			
		||||
	
 | 
			
		||||
	
 | 
			
		||||
	for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
   for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
	  
 | 
			
		||||
	  LatticeCoordinate(coor,mu);
 | 
			
		||||
     LatticeCoordinate(coor,mu);
 | 
			
		||||
	  
 | 
			
		||||
	  U     = PeekIndex<LorentzIndex>(Umu,mu);
 | 
			
		||||
	  Uconj = conjugate(U);
 | 
			
		||||
     U     = PeekIndex<LorentzIndex>(Umu,mu);
 | 
			
		||||
     Uconj = conjugate(U);
 | 
			
		||||
     
 | 
			
		||||
     // This phase could come from a simple bc 1,1,-1,1 ..
 | 
			
		||||
     int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
 | 
			
		||||
     if ( Params.twists[mu] ) { 
 | 
			
		||||
       Uconj = where(coor==neglink,-Uconj,Uconj);
 | 
			
		||||
     }
 | 
			
		||||
	  
 | 
			
		||||
	  // This phase could come from a simple bc 1,1,-1,1 ..
 | 
			
		||||
	  int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
 | 
			
		||||
	  if ( Params.twists[mu] ) { 
 | 
			
		||||
	    Uconj = where(coor==neglink,-Uconj,Uconj);
 | 
			
		||||
	  }
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
     for(auto ss=U.begin();ss<U.end();ss++){
 | 
			
		||||
       Uds[ss](0)(mu) = U[ss]();
 | 
			
		||||
       Uds[ss](1)(mu) = Uconj[ss]();
 | 
			
		||||
     }
 | 
			
		||||
	  
 | 
			
		||||
     U     = adj(Cshift(U    ,mu,-1));      // correct except for spanning the boundary
 | 
			
		||||
     Uconj = adj(Cshift(Uconj,mu,-1));
 | 
			
		||||
 
 | 
			
		||||
     Utmp = U;
 | 
			
		||||
     if ( Params.twists[mu] ) { 
 | 
			
		||||
       Utmp = where(coor==0,Uconj,Utmp);
 | 
			
		||||
     }
 | 
			
		||||
	  
 | 
			
		||||
	  PARALLEL_FOR_LOOP
 | 
			
		||||
	    for(auto ss=U.begin();ss<U.end();ss++){
 | 
			
		||||
	      Uds[ss](0)(mu) = U[ss]();
 | 
			
		||||
	      Uds[ss](1)(mu) = Uconj[ss]();
 | 
			
		||||
	    }
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
     for(auto ss=U.begin();ss<U.end();ss++){
 | 
			
		||||
       Uds[ss](0)(mu+4) = Utmp[ss]();
 | 
			
		||||
     }
 | 
			
		||||
	  
 | 
			
		||||
	  U     = adj(Cshift(U    ,mu,-1));      // correct except for spanning the boundary
 | 
			
		||||
	  Uconj = adj(Cshift(Uconj,mu,-1));
 | 
			
		||||
     Utmp = Uconj;
 | 
			
		||||
     if ( Params.twists[mu] ) { 
 | 
			
		||||
       Utmp = where(coor==0,U,Utmp);
 | 
			
		||||
     }
 | 
			
		||||
	  
 | 
			
		||||
	  Utmp = U;
 | 
			
		||||
	  if ( Params.twists[mu] ) { 
 | 
			
		||||
	    Utmp = where(coor==0,Uconj,Utmp);
 | 
			
		||||
	  }
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
     for(auto ss=U.begin();ss<U.end();ss++){
 | 
			
		||||
       Uds[ss](1)(mu+4) = Utmp[ss]();
 | 
			
		||||
     }
 | 
			
		||||
	  
 | 
			
		||||
	  PARALLEL_FOR_LOOP
 | 
			
		||||
	    for(auto ss=U.begin();ss<U.end();ss++){
 | 
			
		||||
	      Uds[ss](0)(mu+4) = Utmp[ss]();
 | 
			
		||||
	    }
 | 
			
		||||
	  
 | 
			
		||||
	  Utmp = Uconj;
 | 
			
		||||
	  if ( Params.twists[mu] ) { 
 | 
			
		||||
	    Utmp = where(coor==0,U,Utmp);
 | 
			
		||||
	  }
 | 
			
		||||
	  
 | 
			
		||||
	  PARALLEL_FOR_LOOP
 | 
			
		||||
	    for(auto ss=U.begin();ss<U.end();ss++){
 | 
			
		||||
	      Uds[ss](1)(mu+4) = Utmp[ss]();
 | 
			
		||||
	    }
 | 
			
		||||
	  
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
   }
 | 
			
		||||
 }
 | 
			
		||||
      
 | 
			
		||||
      
 | 
			
		||||
      inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
				FermionField &A, int mu) {
 | 
			
		||||
	// DhopDir provides U or Uconj depending on coor/flavour.
 | 
			
		||||
	GaugeLinkField link(mat._grid);
 | 
			
		||||
	// use lorentz for flavour as hack.
 | 
			
		||||
	auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
 | 
			
		||||
	PARALLEL_FOR_LOOP
 | 
			
		||||
	  for (auto ss = tmp.begin(); ss < tmp.end(); ss++) {
 | 
			
		||||
	    link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
 | 
			
		||||
	  }
 | 
			
		||||
	PokeIndex<LorentzIndex>(mat, link, mu);
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
 inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
 | 
			
		||||
 | 
			
		||||
   // DhopDir provides U or Uconj depending on coor/flavour.
 | 
			
		||||
   GaugeLinkField link(mat._grid);
 | 
			
		||||
   // use lorentz for flavour as hack.
 | 
			
		||||
   auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
   for (auto ss = tmp.begin(); ss < tmp.end(); ss++) {
 | 
			
		||||
     link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
 | 
			
		||||
   }
 | 
			
		||||
   PokeIndex<LorentzIndex>(mat, link, mu);
 | 
			
		||||
   return;
 | 
			
		||||
 }
 | 
			
		||||
      
 | 
			
		||||
      inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
 | 
			
		||||
				FermionField Ã, int mu) {
 | 
			
		||||
	int Ls = Btilde._grid->_fdimensions[0];
 | 
			
		||||
 inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) {
 | 
			
		||||
 | 
			
		||||
   int Ls = Btilde._grid->_fdimensions[0];
 | 
			
		||||
	
 | 
			
		||||
	GaugeLinkField tmp(mat._grid);
 | 
			
		||||
	tmp = zero;
 | 
			
		||||
	PARALLEL_FOR_LOOP
 | 
			
		||||
	  for (int ss = 0; ss < tmp._grid->oSites(); ss++) {
 | 
			
		||||
	    for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	      int sF = s + Ls * ss;
 | 
			
		||||
	      auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
 | 
			
		||||
	      tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	PokeIndex<LorentzIndex>(mat, tmp, mu);
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
   GaugeLinkField tmp(mat._grid);
 | 
			
		||||
   tmp = zero;
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
   for (int ss = 0; ss < tmp._grid->oSites(); ss++) {
 | 
			
		||||
     for (int s = 0; s < Ls; s++) {
 | 
			
		||||
       int sF = s + Ls * ss;
 | 
			
		||||
       auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
 | 
			
		||||
       tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
 | 
			
		||||
     }
 | 
			
		||||
   }
 | 
			
		||||
   PokeIndex<LorentzIndex>(mat, tmp, mu);
 | 
			
		||||
   return;
 | 
			
		||||
 }
 | 
			
		||||
 | 
			
		||||
    typedef WilsonImpl<vComplex,  FundamentalRepresentation > WilsonImplR;   // Real.. whichever prec
 | 
			
		||||
    typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF;  // Float
 | 
			
		||||
    typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD;  // Double
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 typedef WilsonImpl<vComplex,  FundamentalRepresentation > WilsonImplR;   // Real.. whichever prec
 | 
			
		||||
 typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF;  // Float
 | 
			
		||||
 typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD;  // Double
 | 
			
		||||
 | 
			
		||||
    typedef WilsonImpl<vComplex,  FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
 | 
			
		||||
    typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
 | 
			
		||||
    typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
 | 
			
		||||
 typedef WilsonImpl<vComplex,  FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
 | 
			
		||||
 typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
 | 
			
		||||
 typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
 | 
			
		||||
 
 | 
			
		||||
 typedef WilsonImpl<vComplex,  AdjointRepresentation > WilsonAdjImplR;   // Real.. whichever prec
 | 
			
		||||
 typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF;  // Float
 | 
			
		||||
 typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD;  // Double
 | 
			
		||||
 
 | 
			
		||||
 typedef WilsonImpl<vComplex,  TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR;   // Real.. whichever prec
 | 
			
		||||
 typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF;  // Float
 | 
			
		||||
 typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD;  // Double
 | 
			
		||||
 
 | 
			
		||||
 typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
 | 
			
		||||
 typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
 | 
			
		||||
 typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
 | 
			
		||||
 
 | 
			
		||||
 typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
 | 
			
		||||
 typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
 | 
			
		||||
 typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
 | 
			
		||||
 
 | 
			
		||||
 typedef GparityWilsonImpl<vComplex , Nc> GparityWilsonImplR;  // Real.. whichever prec
 | 
			
		||||
 typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF;  // Float
 | 
			
		||||
 typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD;  // Double
 | 
			
		||||
 | 
			
		||||
    typedef WilsonImpl<vComplex,  AdjointRepresentation > WilsonAdjImplR;   // Real.. whichever prec
 | 
			
		||||
    typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF;  // Float
 | 
			
		||||
    typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD;  // Double
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
    typedef WilsonImpl<vComplex,  TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR;   // Real.. whichever prec
 | 
			
		||||
    typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF;  // Float
 | 
			
		||||
    typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD;  // Double
 | 
			
		||||
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
 | 
			
		||||
    
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
 | 
			
		||||
    typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
 | 
			
		||||
 | 
			
		||||
    typedef GparityWilsonImpl<vComplex, Nc>  GparityWilsonImplR;  // Real.. whichever prec
 | 
			
		||||
    typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF;  // Float
 | 
			
		||||
    typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD;  // Double
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -166,7 +166,7 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
 | 
			
		||||
    ////////////////////////
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int sss = 0; sss < B._grid->oSites(); sss++) {
 | 
			
		||||
      Kernels::DiracOptDhopDir(st, U, st.comm_buf, sss, sss, B, Btilde, mu,
 | 
			
		||||
      Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu,
 | 
			
		||||
                               gamma);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -277,7 +277,7 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
 | 
			
		||||
 | 
			
		||||
  PARALLEL_FOR_LOOP
 | 
			
		||||
  for (int sss = 0; sss < in._grid->oSites(); sss++) {
 | 
			
		||||
    Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.comm_buf, sss, sss, in, out,
 | 
			
		||||
    Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out,
 | 
			
		||||
                             dirdisp, gamma);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
@@ -295,13 +295,13 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
 | 
			
		||||
  if (dag == DaggerYes) {
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int sss = 0; sss < in._grid->oSites(); sss++) {
 | 
			
		||||
      Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sss, sss, 1, 1, in,
 | 
			
		||||
      Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
 | 
			
		||||
                                   out);
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int sss = 0; sss < in._grid->oSites(); sss++) {
 | 
			
		||||
      Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sss, sss, 1, 1, in,
 | 
			
		||||
      Kernels::DiracOptDhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
 | 
			
		||||
                                out);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -184,44 +184,37 @@ void WilsonFermion5D<Impl>::Report(void)
 | 
			
		||||
 | 
			
		||||
  if ( DhopCalls > 0 ) {
 | 
			
		||||
    std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls     : " << DhopCalls  << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime
 | 
			
		||||
              << " us" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls           : "
 | 
			
		||||
              << DhopCommTime / DhopCalls << " us" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Total Compute time       : "
 | 
			
		||||
              << DhopComputeTime << " us" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls        : "
 | 
			
		||||
              << DhopComputeTime / DhopCalls << " us" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls     : " << DhopCalls   << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime<< " us" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls           : " << DhopCommTime / DhopCalls << " us" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Total Compute time       : " << DhopComputeTime << " us" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls        : " << DhopComputeTime / DhopCalls << " us" << std::endl;
 | 
			
		||||
 | 
			
		||||
    RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
 | 
			
		||||
    std::cout << GridLogMessage << "Average mflops/s per call                : " << mflops << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "Average mflops/s per call per node       : " << mflops/NP << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "Average mflops/s per call per rank       : " << mflops/NP << std::endl;
 | 
			
		||||
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
  if ( DerivCalls > 0 ) {
 | 
			
		||||
  std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls    : " <<DerivCalls <<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls           : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Total Compute time       : " <<DerivComputeTime <<" us"<<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls        : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time  : " <<DerivDhopComputeTime <<" us"<<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls   : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
 | 
			
		||||
  std::cout << GridLogMessage << "Average mflops/s per call                : " << mflops << std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "Average mflops/s per call per node       : " << mflops/NP << std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls    : " <<DerivCalls <<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls           : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Total Compute time       : " <<DerivComputeTime <<" us"<<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls        : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time  : " <<DerivDhopComputeTime <<" us"<<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls   : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
 | 
			
		||||
    std::cout << GridLogMessage << "Average mflops/s per call                : " << mflops << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "Average mflops/s per call per node       : " << mflops/NP << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (DerivCalls > 0 || DhopCalls > 0){
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl;  Stencil.Report();
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl;  StencilEven.Report();
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl;  StencilOdd.Report();
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl;  Stencil.Report();
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl;  StencilEven.Report();
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl;  StencilOdd.Report();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -275,7 +268,7 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      int sU=ss;
 | 
			
		||||
      int sF = s+Ls*sU; 
 | 
			
		||||
      Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sF,sU,in,out,dirdisp,gamma);
 | 
			
		||||
      Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
@@ -327,8 +320,7 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
 | 
			
		||||
        assert(sF < B._grid->oSites());
 | 
			
		||||
        assert(sU < U._grid->oSites());
 | 
			
		||||
 | 
			
		||||
        Kernels::DiracOptDhopDir(st, U, st.comm_buf, sF, sU, B, Btilde, mu,
 | 
			
		||||
                                 gamma);
 | 
			
		||||
        Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
 | 
			
		||||
 | 
			
		||||
        ////////////////////////////
 | 
			
		||||
        // spin trace outer product
 | 
			
		||||
@@ -342,10 +334,10 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::DhopDeriv(      GaugeField &mat,
 | 
			
		||||
              const FermionField &A,
 | 
			
		||||
              const FermionField &B,
 | 
			
		||||
              int dag)
 | 
			
		||||
void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
 | 
			
		||||
				      const FermionField &A,
 | 
			
		||||
				      const FermionField &B,
 | 
			
		||||
				      int dag)
 | 
			
		||||
{
 | 
			
		||||
  conformable(A._grid,FermionGrid());  
 | 
			
		||||
  conformable(A._grid,B._grid);
 | 
			
		||||
@@ -358,9 +350,9 @@ void WilsonFermion5D<Impl>::DhopDeriv(      GaugeField &mat,
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
 | 
			
		||||
          const FermionField &A,
 | 
			
		||||
          const FermionField &B,
 | 
			
		||||
          int dag)
 | 
			
		||||
					const FermionField &A,
 | 
			
		||||
					const FermionField &B,
 | 
			
		||||
					int dag)
 | 
			
		||||
{
 | 
			
		||||
  conformable(A._grid,FermionRedBlackGrid());
 | 
			
		||||
  conformable(GaugeRedBlackGrid(),mat._grid);
 | 
			
		||||
@@ -376,9 +368,9 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
 | 
			
		||||
          const FermionField &A,
 | 
			
		||||
          const FermionField &B,
 | 
			
		||||
          int dag)
 | 
			
		||||
					const FermionField &A,
 | 
			
		||||
					const FermionField &B,
 | 
			
		||||
					int dag)
 | 
			
		||||
{
 | 
			
		||||
  conformable(A._grid,FermionRedBlackGrid());
 | 
			
		||||
  conformable(GaugeRedBlackGrid(),mat._grid);
 | 
			
		||||
@@ -393,8 +385,8 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
 | 
			
		||||
           DoubledGaugeField & U,
 | 
			
		||||
           const FermionField &in, FermionField &out,int dag)
 | 
			
		||||
					 DoubledGaugeField & U,
 | 
			
		||||
					 const FermionField &in, FermionField &out,int dag)
 | 
			
		||||
{
 | 
			
		||||
  //  assert((dag==DaggerNo) ||(dag==DaggerYes));
 | 
			
		||||
  Compressor compressor(dag);
 | 
			
		||||
@@ -412,27 +404,25 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
 | 
			
		||||
    for (int ss = 0; ss < U._grid->oSites(); ss++) {
 | 
			
		||||
      int sU = ss;
 | 
			
		||||
      int sF = LLs * sU;
 | 
			
		||||
      Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
 | 
			
		||||
                                   out);
 | 
			
		||||
      Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sF, sU, LLs, 1, in, out);
 | 
			
		||||
    }
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
  } else if (stat.is_init() ) {
 | 
			
		||||
 | 
			
		||||
    int nthreads;
 | 
			
		||||
    stat.start();
 | 
			
		||||
    #pragma omp parallel
 | 
			
		||||
#pragma omp parallel
 | 
			
		||||
    {
 | 
			
		||||
    #pragma omp master
 | 
			
		||||
#pragma omp master
 | 
			
		||||
    nthreads = omp_get_num_threads();
 | 
			
		||||
    int mythread = omp_get_thread_num();
 | 
			
		||||
    stat.enter(mythread);
 | 
			
		||||
    #pragma omp for nowait
 | 
			
		||||
   for(int ss=0;ss<U._grid->oSites();ss++)
 | 
			
		||||
    {
 | 
			
		||||
       int sU=ss;
 | 
			
		||||
       int sF=LLs*sU;
 | 
			
		||||
       Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
 | 
			
		||||
     }
 | 
			
		||||
#pragma omp for nowait
 | 
			
		||||
    for(int ss=0;ss<U._grid->oSites();ss++) {
 | 
			
		||||
      int sU=ss;
 | 
			
		||||
      int sF=LLs*sU;
 | 
			
		||||
      Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
 | 
			
		||||
    }
 | 
			
		||||
    stat.exit(mythread);
 | 
			
		||||
    }
 | 
			
		||||
    stat.accum(nthreads);
 | 
			
		||||
@@ -442,8 +432,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
 | 
			
		||||
    for (int ss = 0; ss < U._grid->oSites(); ss++) {
 | 
			
		||||
      int sU = ss;
 | 
			
		||||
      int sF = LLs * sU;
 | 
			
		||||
      Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
 | 
			
		||||
                                out);
 | 
			
		||||
      Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  DhopComputeTime+=usecond();
 | 
			
		||||
 
 | 
			
		||||
@@ -34,155 +34,154 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#include <Grid/Stat.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // This is the 4d red black case appropriate to support
 | 
			
		||||
  //
 | 
			
		||||
  // parity = (x+y+z+t)|2;
 | 
			
		||||
  // generalised five dim fermions like mobius, zolotarev etc..	
 | 
			
		||||
  //
 | 
			
		||||
  // i.e. even even contains fifth dim hopping term.
 | 
			
		||||
  //
 | 
			
		||||
  // [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // This is the 4d red black case appropriate to support
 | 
			
		||||
    //
 | 
			
		||||
    // parity = (x+y+z+t)|2;
 | 
			
		||||
    // generalised five dim fermions like mobius, zolotarev etc..	
 | 
			
		||||
    //
 | 
			
		||||
    // i.e. even even contains fifth dim hopping term.
 | 
			
		||||
    //
 | 
			
		||||
    // [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    class WilsonFermion5DStatic { 
 | 
			
		||||
    public:
 | 
			
		||||
      // S-direction is INNERMOST and takes no part in the parity.
 | 
			
		||||
      static const std::vector<int> directions;
 | 
			
		||||
      static const std::vector<int> displacements;
 | 
			
		||||
      const int npoint = 8;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    class WilsonFermion5D : public WilsonKernels<Impl>, public WilsonFermion5DStatic
 | 
			
		||||
    {
 | 
			
		||||
    public:
 | 
			
		||||
     INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
     typedef WilsonKernels<Impl> Kernels;
 | 
			
		||||
     PmuStat stat;
 | 
			
		||||
 | 
			
		||||
     void Report(void);
 | 
			
		||||
     void ZeroCounters(void);
 | 
			
		||||
     double DhopCalls;
 | 
			
		||||
     double DhopCommTime;
 | 
			
		||||
     double DhopComputeTime;
 | 
			
		||||
 | 
			
		||||
     double DerivCalls;
 | 
			
		||||
     double DerivCommTime;
 | 
			
		||||
     double DerivComputeTime;
 | 
			
		||||
     double DerivDhopComputeTime;
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////////////////
 | 
			
		||||
      // Implement the abstract base
 | 
			
		||||
      ///////////////////////////////////////////////////////////////
 | 
			
		||||
      GridBase *GaugeGrid(void)              { return _FourDimGrid ;}
 | 
			
		||||
      GridBase *GaugeRedBlackGrid(void)      { return _FourDimRedBlackGrid ;}
 | 
			
		||||
      GridBase *FermionGrid(void)            { return _FiveDimGrid;}
 | 
			
		||||
      GridBase *FermionRedBlackGrid(void)    { return _FiveDimRedBlackGrid;}
 | 
			
		||||
 | 
			
		||||
      // full checkerboard operations; leave unimplemented as abstract for now
 | 
			
		||||
      virtual RealD  M    (const FermionField &in, FermionField &out){assert(0); return 0.0;};
 | 
			
		||||
      virtual RealD  Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
 | 
			
		||||
 | 
			
		||||
      // half checkerboard operations; leave unimplemented as abstract for now
 | 
			
		||||
      virtual void   Meooe       (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
      virtual void   Mooee       (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
      virtual void   MooeeInv    (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
 | 
			
		||||
      virtual void   MeooeDag    (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
      virtual void   MooeeDag    (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
      virtual void   MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
      virtual void   Mdir   (const FermionField &in, FermionField &out,int dir,int disp){assert(0);};   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
			
		||||
 | 
			
		||||
      // These can be overridden by fancy 5d chiral action
 | 
			
		||||
      virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
			
		||||
      virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
			
		||||
      virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
			
		||||
 | 
			
		||||
      // Implement hopping term non-hermitian hopping term; half cb or both
 | 
			
		||||
      // Implement s-diagonal DW
 | 
			
		||||
      void DW    (const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
      void Dhop  (const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
      void DhopOE(const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
      void DhopEO(const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
 | 
			
		||||
      // add a DhopComm
 | 
			
		||||
  class WilsonFermion5DStatic { 
 | 
			
		||||
  public:
 | 
			
		||||
    // S-direction is INNERMOST and takes no part in the parity.
 | 
			
		||||
    static const std::vector<int> directions;
 | 
			
		||||
    static const std::vector<int> displacements;
 | 
			
		||||
    const int npoint = 8;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  class WilsonFermion5D : public WilsonKernels<Impl>, public WilsonFermion5DStatic
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
    typedef WilsonKernels<Impl> Kernels;
 | 
			
		||||
    PmuStat stat;
 | 
			
		||||
    
 | 
			
		||||
    void Report(void);
 | 
			
		||||
    void ZeroCounters(void);
 | 
			
		||||
    double DhopCalls;
 | 
			
		||||
    double DhopCommTime;
 | 
			
		||||
    double DhopComputeTime;
 | 
			
		||||
    
 | 
			
		||||
    double DerivCalls;
 | 
			
		||||
    double DerivCommTime;
 | 
			
		||||
    double DerivComputeTime;
 | 
			
		||||
    double DerivDhopComputeTime;
 | 
			
		||||
    
 | 
			
		||||
    ///////////////////////////////////////////////////////////////
 | 
			
		||||
    // Implement the abstract base
 | 
			
		||||
    ///////////////////////////////////////////////////////////////
 | 
			
		||||
    GridBase *GaugeGrid(void)              { return _FourDimGrid ;}
 | 
			
		||||
    GridBase *GaugeRedBlackGrid(void)      { return _FourDimRedBlackGrid ;}
 | 
			
		||||
    GridBase *FermionGrid(void)            { return _FiveDimGrid;}
 | 
			
		||||
    GridBase *FermionRedBlackGrid(void)    { return _FiveDimRedBlackGrid;}
 | 
			
		||||
    
 | 
			
		||||
    // full checkerboard operations; leave unimplemented as abstract for now
 | 
			
		||||
    virtual RealD  M    (const FermionField &in, FermionField &out){assert(0); return 0.0;};
 | 
			
		||||
    virtual RealD  Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
 | 
			
		||||
    
 | 
			
		||||
    // half checkerboard operations; leave unimplemented as abstract for now
 | 
			
		||||
    virtual void   Meooe       (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
    virtual void   Mooee       (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
    virtual void   MooeeInv    (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
    
 | 
			
		||||
    virtual void   MeooeDag    (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
    virtual void   MooeeDag    (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
    virtual void   MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
    virtual void   Mdir   (const FermionField &in, FermionField &out,int dir,int disp){assert(0);};   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
			
		||||
    
 | 
			
		||||
    // These can be overridden by fancy 5d chiral action
 | 
			
		||||
    virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
			
		||||
    virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
			
		||||
    virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
			
		||||
    
 | 
			
		||||
    // Implement hopping term non-hermitian hopping term; half cb or both
 | 
			
		||||
    // Implement s-diagonal DW
 | 
			
		||||
    void DW    (const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
    void Dhop  (const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
    void DhopOE(const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
    void DhopEO(const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
    
 | 
			
		||||
    // add a DhopComm
 | 
			
		||||
      // -- suboptimal interface will presently trigger multiple comms.
 | 
			
		||||
      void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////////////////
 | 
			
		||||
      // New methods added 
 | 
			
		||||
      ///////////////////////////////////////////////////////////////
 | 
			
		||||
      void DerivInternal(StencilImpl & st,
 | 
			
		||||
			 DoubledGaugeField & U,
 | 
			
		||||
			 GaugeField &mat,
 | 
			
		||||
			 const FermionField &A,
 | 
			
		||||
			 const FermionField &B,
 | 
			
		||||
			 int dag);
 | 
			
		||||
 | 
			
		||||
      void DhopInternal(StencilImpl & st,
 | 
			
		||||
			LebesgueOrder &lo,
 | 
			
		||||
			DoubledGaugeField &U,
 | 
			
		||||
			const FermionField &in, 
 | 
			
		||||
			FermionField &out,
 | 
			
		||||
			int dag);
 | 
			
		||||
 | 
			
		||||
      // Constructors
 | 
			
		||||
      WilsonFermion5D(GaugeField &_Umu,
 | 
			
		||||
		      GridCartesian         &FiveDimGrid,
 | 
			
		||||
		      GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
		      GridCartesian         &FourDimGrid,
 | 
			
		||||
		      GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
		      double _M5,const ImplParams &p= ImplParams());
 | 
			
		||||
 | 
			
		||||
      // Constructors
 | 
			
		||||
      /*
 | 
			
		||||
    void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
 | 
			
		||||
    
 | 
			
		||||
    ///////////////////////////////////////////////////////////////
 | 
			
		||||
    // New methods added 
 | 
			
		||||
    ///////////////////////////////////////////////////////////////
 | 
			
		||||
    void DerivInternal(StencilImpl & st,
 | 
			
		||||
		       DoubledGaugeField & U,
 | 
			
		||||
		       GaugeField &mat,
 | 
			
		||||
		       const FermionField &A,
 | 
			
		||||
		       const FermionField &B,
 | 
			
		||||
		       int dag);
 | 
			
		||||
    
 | 
			
		||||
    void DhopInternal(StencilImpl & st,
 | 
			
		||||
		      LebesgueOrder &lo,
 | 
			
		||||
		      DoubledGaugeField &U,
 | 
			
		||||
		      const FermionField &in, 
 | 
			
		||||
		      FermionField &out,
 | 
			
		||||
		      int dag);
 | 
			
		||||
    
 | 
			
		||||
    // Constructors
 | 
			
		||||
    WilsonFermion5D(GaugeField &_Umu,
 | 
			
		||||
		    GridCartesian         &FiveDimGrid,
 | 
			
		||||
		    GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
		    GridCartesian         &FourDimGrid,
 | 
			
		||||
		    GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
		    double _M5,const ImplParams &p= ImplParams());
 | 
			
		||||
    
 | 
			
		||||
    // Constructors
 | 
			
		||||
    /*
 | 
			
		||||
      WilsonFermion5D(int simd, 
 | 
			
		||||
		      GaugeField &_Umu,
 | 
			
		||||
		      GridCartesian         &FiveDimGrid,
 | 
			
		||||
		      GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
		      GridCartesian         &FourDimGrid,
 | 
			
		||||
		      double _M5,const ImplParams &p= ImplParams());
 | 
			
		||||
      */
 | 
			
		||||
      GaugeField &_Umu,
 | 
			
		||||
      GridCartesian         &FiveDimGrid,
 | 
			
		||||
      GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
      GridCartesian         &FourDimGrid,
 | 
			
		||||
      double _M5,const ImplParams &p= ImplParams());
 | 
			
		||||
    */
 | 
			
		||||
    
 | 
			
		||||
    // DoubleStore
 | 
			
		||||
    void ImportGauge(const GaugeField &_Umu);
 | 
			
		||||
    
 | 
			
		||||
    ///////////////////////////////////////////////////////////////
 | 
			
		||||
    // Data members require to support the functionality
 | 
			
		||||
    ///////////////////////////////////////////////////////////////
 | 
			
		||||
  public:
 | 
			
		||||
    
 | 
			
		||||
    // Add these to the support from Wilson
 | 
			
		||||
    GridBase *_FourDimGrid;
 | 
			
		||||
    GridBase *_FourDimRedBlackGrid;
 | 
			
		||||
    GridBase *_FiveDimGrid;
 | 
			
		||||
    GridBase *_FiveDimRedBlackGrid;
 | 
			
		||||
    
 | 
			
		||||
    double                        M5;
 | 
			
		||||
    int Ls;
 | 
			
		||||
    
 | 
			
		||||
    //Defines the stencils for even and odd
 | 
			
		||||
    StencilImpl Stencil; 
 | 
			
		||||
    StencilImpl StencilEven; 
 | 
			
		||||
    StencilImpl StencilOdd; 
 | 
			
		||||
    
 | 
			
		||||
    // Copy of the gauge field , with even and odd subsets
 | 
			
		||||
    DoubledGaugeField Umu;
 | 
			
		||||
    DoubledGaugeField UmuEven;
 | 
			
		||||
    DoubledGaugeField UmuOdd;
 | 
			
		||||
    
 | 
			
		||||
    LebesgueOrder Lebesgue;
 | 
			
		||||
    LebesgueOrder LebesgueEvenOdd;
 | 
			
		||||
    
 | 
			
		||||
    // Comms buffer
 | 
			
		||||
    std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  comm_buf;
 | 
			
		||||
    
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
      // DoubleStore
 | 
			
		||||
      void ImportGauge(const GaugeField &_Umu);
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////////////////
 | 
			
		||||
      // Data members require to support the functionality
 | 
			
		||||
      ///////////////////////////////////////////////////////////////
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
      // Add these to the support from Wilson
 | 
			
		||||
      GridBase *_FourDimGrid;
 | 
			
		||||
      GridBase *_FourDimRedBlackGrid;
 | 
			
		||||
      GridBase *_FiveDimGrid;
 | 
			
		||||
      GridBase *_FiveDimRedBlackGrid;
 | 
			
		||||
 | 
			
		||||
      double                        M5;
 | 
			
		||||
      int Ls;
 | 
			
		||||
 | 
			
		||||
      //Defines the stencils for even and odd
 | 
			
		||||
      StencilImpl Stencil; 
 | 
			
		||||
      StencilImpl StencilEven; 
 | 
			
		||||
      StencilImpl StencilOdd; 
 | 
			
		||||
 | 
			
		||||
      // Copy of the gauge field , with even and odd subsets
 | 
			
		||||
      DoubledGaugeField Umu;
 | 
			
		||||
      DoubledGaugeField UmuEven;
 | 
			
		||||
      DoubledGaugeField UmuOdd;
 | 
			
		||||
 | 
			
		||||
      LebesgueOrder Lebesgue;
 | 
			
		||||
      LebesgueOrder LebesgueEvenOdd;
 | 
			
		||||
 | 
			
		||||
      // Comms buffer
 | 
			
		||||
      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  comm_buf;
 | 
			
		||||
      
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -43,10 +43,9 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
 | 
			
		||||
    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
    commVector<SiteHalfSpinor> &buf, int sF,
 | 
			
		||||
    int sU, const FermionField &in, FermionField &out) {
 | 
			
		||||
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
						     SiteHalfSpinor *buf, int sF,
 | 
			
		||||
						     int sU, const FermionField &in, FermionField &out) {
 | 
			
		||||
  SiteHalfSpinor tmp;
 | 
			
		||||
  SiteHalfSpinor chi;
 | 
			
		||||
  SiteHalfSpinor *chi_p;
 | 
			
		||||
@@ -220,10 +219,9 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
 | 
			
		||||
 | 
			
		||||
// Need controls to do interior, exterior, or both
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonKernels<Impl>::DiracOptGenericDhopSite(
 | 
			
		||||
    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
    commVector<SiteHalfSpinor> &buf, int sF,
 | 
			
		||||
    int sU, const FermionField &in, FermionField &out) {
 | 
			
		||||
void WilsonKernels<Impl>::DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
						  SiteHalfSpinor *buf, int sF,
 | 
			
		||||
						  int sU, const FermionField &in, FermionField &out) {
 | 
			
		||||
  SiteHalfSpinor tmp;
 | 
			
		||||
  SiteHalfSpinor chi;
 | 
			
		||||
  SiteHalfSpinor *chi_p;
 | 
			
		||||
@@ -396,10 +394,9 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSite(
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonKernels<Impl>::DiracOptDhopDir(
 | 
			
		||||
    StencilImpl &st, DoubledGaugeField &U,
 | 
			
		||||
    commVector<SiteHalfSpinor> &buf, int sF,
 | 
			
		||||
    int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
 | 
			
		||||
void WilsonKernels<Impl>::DiracOptDhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
 | 
			
		||||
					   int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
 | 
			
		||||
 | 
			
		||||
  SiteHalfSpinor tmp;
 | 
			
		||||
  SiteHalfSpinor chi;
 | 
			
		||||
  SiteSpinor result;
 | 
			
		||||
 
 | 
			
		||||
@@ -32,175 +32,132 @@ directory
 | 
			
		||||
#define GRID_QCD_DHOP_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Helper routines that implement Wilson stencil for a single site.
 | 
			
		||||
    // Common to both the WilsonFermion and WilsonFermion5D
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    class WilsonKernelsStatic { 
 | 
			
		||||
    public:
 | 
			
		||||
      // S-direction is INNERMOST and takes no part in the parity.
 | 
			
		||||
      static int AsmOpt;  // these are a temporary hack
 | 
			
		||||
      static int HandOpt; // these are a temporary hack
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic { 
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
      INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
      typedef FermionOperator<Impl> Base;
 | 
			
		||||
     
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
      template <bool EnableBool = true>
 | 
			
		||||
      typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
 | 
			
		||||
	DiracOptDhopSite(
 | 
			
		||||
			 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
			 commVector<SiteHalfSpinor> &buf,
 | 
			
		||||
			 int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
			 FermionField &out) {
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Helper routines that implement Wilson stencil for a single site.
 | 
			
		||||
  // Common to both the WilsonFermion and WilsonFermion5D
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
class WilsonKernelsStatic { 
 | 
			
		||||
 public:
 | 
			
		||||
  // S-direction is INNERMOST and takes no part in the parity.
 | 
			
		||||
  static int AsmOpt;  // these are a temporary hack
 | 
			
		||||
  static int HandOpt; // these are a temporary hack
 | 
			
		||||
};
 | 
			
		||||
 
 | 
			
		||||
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic { 
 | 
			
		||||
 public:
 | 
			
		||||
   
 | 
			
		||||
  INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
  typedef FermionOperator<Impl> Base;
 | 
			
		||||
   
 | 
			
		||||
public:
 | 
			
		||||
   
 | 
			
		||||
  template <bool EnableBool = true>
 | 
			
		||||
  typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
 | 
			
		||||
  DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
		   int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
	if (AsmOpt) {
 | 
			
		||||
	  WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns,
 | 
			
		||||
						   in, out);
 | 
			
		||||
 | 
			
		||||
	} else {
 | 
			
		||||
    if (AsmOpt) {
 | 
			
		||||
      WilsonKernels<Impl>::DiracOptAsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
 | 
			
		||||
    } else {
 | 
			
		||||
#else
 | 
			
		||||
	  {
 | 
			
		||||
    {
 | 
			
		||||
#endif
 | 
			
		||||
	    for (int site = 0; site < Ns; site++) {
 | 
			
		||||
	      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
		if (HandOpt)
 | 
			
		||||
		  WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU,
 | 
			
		||||
							    in, out);
 | 
			
		||||
		else
 | 
			
		||||
		  WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU,
 | 
			
		||||
							       in, out);
 | 
			
		||||
		sF++;
 | 
			
		||||
	      }
 | 
			
		||||
	      sU++;
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
      for (int site = 0; site < Ns; site++) {
 | 
			
		||||
	for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	  if (HandOpt)
 | 
			
		||||
	    WilsonKernels<Impl>::DiracOptHandDhopSite(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
	  else
 | 
			
		||||
	    WilsonKernels<Impl>::DiracOptGenericDhopSite(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
	  sF++;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	template <bool EnableBool = true>
 | 
			
		||||
	  typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
 | 
			
		||||
	  DiracOptDhopSite(
 | 
			
		||||
			   StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
			   commVector<SiteHalfSpinor> &buf,
 | 
			
		||||
			   int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
			   FermionField &out) {
 | 
			
		||||
	  for (int site = 0; site < Ns; site++) {
 | 
			
		||||
	    for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	      WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in,
 | 
			
		||||
							   out);
 | 
			
		||||
	      sF++;
 | 
			
		||||
	    }
 | 
			
		||||
	    sU++;
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	template <bool EnableBool = true>
 | 
			
		||||
	  typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,
 | 
			
		||||
				  void>::type
 | 
			
		||||
	  DiracOptDhopSiteDag(
 | 
			
		||||
			      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
			      commVector<SiteHalfSpinor> &buf,
 | 
			
		||||
			      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
			      FermionField &out) {
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
				    if (AsmOpt) {
 | 
			
		||||
				      WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls,
 | 
			
		||||
										  Ns, in, out);
 | 
			
		||||
				    } else {
 | 
			
		||||
#else
 | 
			
		||||
				      {
 | 
			
		||||
#endif
 | 
			
		||||
					for (int site = 0; site < Ns; site++) {
 | 
			
		||||
					  for (int s = 0; s < Ls; s++) {
 | 
			
		||||
					    if (HandOpt)
 | 
			
		||||
					      WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
 | 
			
		||||
											   in, out);
 | 
			
		||||
					    else
 | 
			
		||||
					      WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
 | 
			
		||||
											      sU, in, out);
 | 
			
		||||
					    sF++;
 | 
			
		||||
					  }
 | 
			
		||||
					  sU++;
 | 
			
		||||
					}
 | 
			
		||||
				      }
 | 
			
		||||
				    }
 | 
			
		||||
 | 
			
		||||
				    template <bool EnableBool = true>
 | 
			
		||||
				      typename std::enable_if<
 | 
			
		||||
				      (Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,
 | 
			
		||||
				      void>::type
 | 
			
		||||
				      DiracOptDhopSiteDag(
 | 
			
		||||
							  StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
							  commVector<SiteHalfSpinor> &buf,
 | 
			
		||||
							  int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
							  FermionField &out) {
 | 
			
		||||
					for (int site = 0; site < Ns; site++) {
 | 
			
		||||
					  for (int s = 0; s < Ls; s++) {
 | 
			
		||||
					    WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
 | 
			
		||||
											    in, out);
 | 
			
		||||
					    sF++;
 | 
			
		||||
					  }
 | 
			
		||||
					  sU++;
 | 
			
		||||
					}
 | 
			
		||||
				      }
 | 
			
		||||
 | 
			
		||||
				    void DiracOptDhopDir(
 | 
			
		||||
							 StencilImpl &st, DoubledGaugeField &U,
 | 
			
		||||
							 commVector<SiteHalfSpinor> &buf,
 | 
			
		||||
							 int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
 | 
			
		||||
							 int gamma);
 | 
			
		||||
 | 
			
		||||
	private:
 | 
			
		||||
				    // Specialised variants
 | 
			
		||||
				    void DiracOptGenericDhopSite(
 | 
			
		||||
								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
								 commVector<SiteHalfSpinor> &buf,
 | 
			
		||||
								 int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
				    void DiracOptGenericDhopSiteDag(
 | 
			
		||||
								    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
								    commVector<SiteHalfSpinor> &buf,
 | 
			
		||||
								    int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
				    void DiracOptAsmDhopSite(
 | 
			
		||||
							     StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
							     commVector<SiteHalfSpinor> &buf,
 | 
			
		||||
							     int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
							     FermionField &out);
 | 
			
		||||
 | 
			
		||||
				    void DiracOptAsmDhopSiteDag(
 | 
			
		||||
								StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
								commVector<SiteHalfSpinor> &buf,
 | 
			
		||||
								int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
								FermionField &out);
 | 
			
		||||
 | 
			
		||||
				    void DiracOptHandDhopSite(
 | 
			
		||||
							      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
							      commVector<SiteHalfSpinor> &buf,
 | 
			
		||||
							      int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
				    void DiracOptHandDhopSiteDag(
 | 
			
		||||
								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
								 commVector<SiteHalfSpinor> &buf,
 | 
			
		||||
								 int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
	public:
 | 
			
		||||
				    WilsonKernels(const ImplParams &p = ImplParams());
 | 
			
		||||
				  };
 | 
			
		||||
    
 | 
			
		||||
	sU++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
     
 | 
			
		||||
  template <bool EnableBool = true>
 | 
			
		||||
  typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
 | 
			
		||||
  DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
		   int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
 | 
			
		||||
     
 | 
			
		||||
    for (int site = 0; site < Ns; site++) {
 | 
			
		||||
      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in, out);
 | 
			
		||||
	sF++;
 | 
			
		||||
      }
 | 
			
		||||
      sU++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
     
 | 
			
		||||
  template <bool EnableBool = true>
 | 
			
		||||
  typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
 | 
			
		||||
  DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
		      int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
    if (AsmOpt) {
 | 
			
		||||
      WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
 | 
			
		||||
    } else {
 | 
			
		||||
#else
 | 
			
		||||
    {
 | 
			
		||||
#endif
 | 
			
		||||
      for (int site = 0; site < Ns; site++) {
 | 
			
		||||
	for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	  if (HandOpt)
 | 
			
		||||
	    WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
	  else
 | 
			
		||||
	    WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
	  sF++;
 | 
			
		||||
	}
 | 
			
		||||
	sU++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <bool EnableBool = true>
 | 
			
		||||
  typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
 | 
			
		||||
  DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
 | 
			
		||||
		      int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
 | 
			
		||||
 | 
			
		||||
    for (int site = 0; site < Ns; site++) {
 | 
			
		||||
      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
	sF++;
 | 
			
		||||
      }
 | 
			
		||||
      sU++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void DiracOptDhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
 | 
			
		||||
		       int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
 | 
			
		||||
      
 | 
			
		||||
private:
 | 
			
		||||
     // Specialised variants
 | 
			
		||||
  void DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
			       int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
      
 | 
			
		||||
  void DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
				  int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void DiracOptAsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
			   int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void DiracOptAsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
			      int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void DiracOptHandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
			    int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void DiracOptHandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
 | 
			
		||||
			       int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
      
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  WilsonKernels(const ImplParams &p = ImplParams());
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
    
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -33,31 +33,27 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
    
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
    // Default to no assembler implementation
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
      void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
                             commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
                             int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
    {
 | 
			
		||||
      assert(0);
 | 
			
		||||
    }
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
      void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
                                commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
                                int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
    {
 | 
			
		||||
      assert(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////
 | 
			
		||||
// Default to no assembler implementation
 | 
			
		||||
///////////////////////////////////////////////////////////
 | 
			
		||||
template<class Impl> void 
 | 
			
		||||
WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
			
		||||
					  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl> void 
 | 
			
		||||
WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
			
		||||
					     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if defined(AVX512) 
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
    // If we are AVX512 specialise the single precision routine
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
@@ -65,16 +61,16 @@ namespace Grid {
 | 
			
		||||
#include <simd/Intel512wilson.h>
 | 
			
		||||
#include <simd/Intel512single.h>
 | 
			
		||||
    
 | 
			
		||||
    static Vector<vComplexF> signs;
 | 
			
		||||
static Vector<vComplexF> signs;
 | 
			
		||||
    
 | 
			
		||||
    int setupSigns(void ){
 | 
			
		||||
      Vector<vComplexF> bother(2);
 | 
			
		||||
      signs = bother;
 | 
			
		||||
      vrsign(signs[0]);
 | 
			
		||||
      visign(signs[1]);
 | 
			
		||||
      return 1;
 | 
			
		||||
    }
 | 
			
		||||
    static int signInit = setupSigns();
 | 
			
		||||
  int setupSigns(void ){
 | 
			
		||||
    Vector<vComplexF> bother(2);
 | 
			
		||||
    signs = bother;
 | 
			
		||||
    vrsign(signs[0]);
 | 
			
		||||
    visign(signs[1]);
 | 
			
		||||
    return 1;
 | 
			
		||||
  }
 | 
			
		||||
  static int signInit = setupSigns();
 | 
			
		||||
  
 | 
			
		||||
#define label(A)  ilabel(A)
 | 
			
		||||
#define ilabel(A) ".globl\n"  #A ":\n" 
 | 
			
		||||
@@ -84,17 +80,15 @@ namespace Grid {
 | 
			
		||||
#define FX(A) WILSONASM_ ##A
 | 
			
		||||
  
 | 
			
		||||
#undef KERNEL_DAG
 | 
			
		||||
    template<>
 | 
			
		||||
    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
							 commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
							 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
template<> void 
 | 
			
		||||
WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
 | 
			
		||||
						int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
      
 | 
			
		||||
#define KERNEL_DAG
 | 
			
		||||
    template<>
 | 
			
		||||
    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
							    commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
							    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
template<> void 
 | 
			
		||||
WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
			
		||||
						   int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
				    
 | 
			
		||||
#undef VMOVIDUP
 | 
			
		||||
@@ -109,31 +103,26 @@ namespace Grid {
 | 
			
		||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
 | 
			
		||||
				    
 | 
			
		||||
#undef KERNEL_DAG
 | 
			
		||||
    template<>
 | 
			
		||||
    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
								  commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
								  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
template<> void 
 | 
			
		||||
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
 | 
			
		||||
							 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
				    
 | 
			
		||||
#define KERNEL_DAG
 | 
			
		||||
    template<>
 | 
			
		||||
    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
								     commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
								     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
template<> void 
 | 
			
		||||
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
			
		||||
							    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
				    
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define INSTANTIATE_ASM(A)\
 | 
			
		||||
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
			
		||||
                                   commVector<SiteHalfSpinor>  &buf,\
 | 
			
		||||
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
 | 
			
		||||
                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
			
		||||
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
			
		||||
                                   commVector<SiteHalfSpinor>  &buf,\
 | 
			
		||||
 \
 | 
			
		||||
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
 | 
			
		||||
                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
INSTANTIATE_ASM(WilsonImplF);
 | 
			
		||||
INSTANTIATE_ASM(WilsonImplD);
 | 
			
		||||
INSTANTIATE_ASM(ZWilsonImplF);
 | 
			
		||||
@@ -144,6 +133,6 @@ INSTANTIATE_ASM(DomainWallVec5dImplF);
 | 
			
		||||
INSTANTIATE_ASM(DomainWallVec5dImplD);
 | 
			
		||||
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
 | 
			
		||||
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -311,10 +311,9 @@ namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
					       commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
					       int ss,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
template<class Impl> void 
 | 
			
		||||
WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor  *buf,
 | 
			
		||||
					  int ss,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
@@ -554,10 +553,9 @@ namespace QCD {
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
					       commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
					       int ss,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
			
		||||
						  int ss,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  //  std::cout << "Hand op Dhop "<<std::endl;
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
@@ -798,38 +796,35 @@ namespace QCD {
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  // Specialise Gparity to simple implementation
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
							     commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
							     int sF,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
template<> void 
 | 
			
		||||
WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
							SiteHalfSpinor *buf,
 | 
			
		||||
							int sF,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
								commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
								int sF,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
template<> void 
 | 
			
		||||
WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
							   SiteHalfSpinor *buf,
 | 
			
		||||
							   int sF,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
							     commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
							     int sF,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
template<> void 
 | 
			
		||||
WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
			
		||||
							int sF,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
								commVector<SiteHalfSpinor>  &buf,
 | 
			
		||||
								int sF,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
template<> void 
 | 
			
		||||
WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
			
		||||
							   int sF,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
@@ -840,12 +835,10 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
 | 
			
		||||
// Need Nc=3 though //
 | 
			
		||||
 | 
			
		||||
#define INSTANTIATE_THEM(A) \
 | 
			
		||||
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
			
		||||
							       commVector<SiteHalfSpinor>  &buf,\
 | 
			
		||||
							       int ss,int sU,const FermionField &in, FermionField &out);\
 | 
			
		||||
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
			
		||||
								  commVector<SiteHalfSpinor>  &buf,\
 | 
			
		||||
								  int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
 | 
			
		||||
						     int ss,int sU,const FermionField &in, FermionField &out); \
 | 
			
		||||
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
 | 
			
		||||
							int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
INSTANTIATE_THEM(WilsonImplF);
 | 
			
		||||
INSTANTIATE_THEM(WilsonImplD);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user