mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Added single threaded version of the derivative for the Ls vectorised DWF
This commit is contained in:
		@@ -99,7 +99,7 @@ public:
 | 
			
		||||
    virtual int oIndex(std::vector<int> &coor)
 | 
			
		||||
    {
 | 
			
		||||
        int idx=0;
 | 
			
		||||
	// Works with either global or local coordinates
 | 
			
		||||
        // Works with either global or local coordinates
 | 
			
		||||
        for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
 | 
			
		||||
        return idx;
 | 
			
		||||
    }
 | 
			
		||||
@@ -146,15 +146,15 @@ public:
 | 
			
		||||
      // Distance should be either 0,1,2..
 | 
			
		||||
      //
 | 
			
		||||
      if ( _simd_layout[dimension] > 2 ) { 
 | 
			
		||||
	for(int d=0;d<_ndimension;d++){
 | 
			
		||||
	  if ( d != dimension ) assert ( (_simd_layout[d]==1)  );
 | 
			
		||||
	}
 | 
			
		||||
	permute_type = RotateBit; // How to specify distance; this is not just direction.
 | 
			
		||||
	return permute_type;
 | 
			
		||||
        for(int d=0;d<_ndimension;d++){
 | 
			
		||||
          if ( d != dimension ) assert ( (_simd_layout[d]==1)  );
 | 
			
		||||
        }
 | 
			
		||||
        permute_type = RotateBit; // How to specify distance; this is not just direction.
 | 
			
		||||
        return permute_type;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      for(int d=_ndimension-1;d>dimension;d--){
 | 
			
		||||
	if (_simd_layout[d]>1 ) permute_type++;
 | 
			
		||||
        if (_simd_layout[d]>1 ) permute_type++;
 | 
			
		||||
      }
 | 
			
		||||
      return permute_type;
 | 
			
		||||
    }
 | 
			
		||||
@@ -174,6 +174,22 @@ public:
 | 
			
		||||
    inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;};
 | 
			
		||||
    inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;};
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Print decomposition
 | 
			
		||||
    ////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
   	void show_decomposition(){
 | 
			
		||||
   		std::cout << GridLogMessage << "Full Dimensions    : " << _fdimensions << std::endl;
 | 
			
		||||
   		std::cout << GridLogMessage << "Global Dimensions  : " << _gdimensions << std::endl;
 | 
			
		||||
   	        std::cout << GridLogMessage << "Local Dimensions   : " << _ldimensions << std::endl;
 | 
			
		||||
                std::cout << GridLogMessage << "Reduced Dimensions : " << _rdimensions << std::endl;
 | 
			
		||||
   		std::cout << GridLogMessage << "iSites             : " << _isites << std::endl;
 | 
			
		||||
   		std::cout << GridLogMessage << "oSites             : " << _osites << std::endl;
 | 
			
		||||
   		std::cout << GridLogMessage << "lSites             : " << lSites() << std::endl;	
 | 
			
		||||
   		std::cout << GridLogMessage << "gSites             : " << gSites() << std::endl;
 | 
			
		||||
   		std::cout << GridLogMessage << "Nd                 : " << _ndimension << std::endl;		
 | 
			
		||||
   	} 
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Global addressing
 | 
			
		||||
    ////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -187,8 +203,8 @@ public:
 | 
			
		||||
      gidx=0;
 | 
			
		||||
      int mult=1;
 | 
			
		||||
      for(int mu=0;mu<_ndimension;mu++) {
 | 
			
		||||
	gidx+=mult*gcoor[mu];
 | 
			
		||||
	mult*=_gdimensions[mu];
 | 
			
		||||
        gidx+=mult*gcoor[mu];
 | 
			
		||||
        mult*=_gdimensions[mu];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor)
 | 
			
		||||
@@ -196,9 +212,9 @@ public:
 | 
			
		||||
      pcoor.resize(_ndimension);
 | 
			
		||||
      lcoor.resize(_ndimension);
 | 
			
		||||
      for(int mu=0;mu<_ndimension;mu++){
 | 
			
		||||
	int _fld  = _fdimensions[mu]/_processors[mu];
 | 
			
		||||
	pcoor[mu] = gcoor[mu]/_fld;
 | 
			
		||||
	lcoor[mu] = gcoor[mu]%_fld;
 | 
			
		||||
        int _fld  = _fdimensions[mu]/_processors[mu];
 | 
			
		||||
        pcoor[mu] = gcoor[mu]/_fld;
 | 
			
		||||
        lcoor[mu] = gcoor[mu]%_fld;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor)
 | 
			
		||||
@@ -210,9 +226,9 @@ public:
 | 
			
		||||
 | 
			
		||||
      std::vector<int> cblcoor(lcoor);
 | 
			
		||||
      for(int d=0;d<cblcoor.size();d++){
 | 
			
		||||
	if( this->CheckerBoarded(d) ) {
 | 
			
		||||
	  cblcoor[d] = lcoor[d]/2;
 | 
			
		||||
	}
 | 
			
		||||
        if( this->CheckerBoarded(d) ) {
 | 
			
		||||
          cblcoor[d] = lcoor[d]/2;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
 | 
			
		||||
@@ -238,7 +254,7 @@ public:
 | 
			
		||||
    {
 | 
			
		||||
      RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor);
 | 
			
		||||
      if(CheckerBoarded(0)){
 | 
			
		||||
	fcoor[0] = fcoor[0]*2+cb;
 | 
			
		||||
        fcoor[0] = fcoor[0]*2+cb;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor)
 | 
			
		||||
 
 | 
			
		||||
@@ -6,8 +6,8 @@
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
@@ -67,24 +67,42 @@ namespace Grid {
 | 
			
		||||
    return multiplicity;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
 | 
			
		||||
  {
 | 
			
		||||
    int rngdims = coarse->_ndimension;
 | 
			
		||||
 | 
			
		||||
    // trivially extended in higher dims, with locality guaranteeing RNG state is local to node
 | 
			
		||||
    int lowerdims   = fine->_ndimension - coarse->_ndimension;  assert(lowerdims >= 0);
 | 
			
		||||
    // assumes that the higher dimensions are not using more processors
 | 
			
		||||
    // all further divisions are local
 | 
			
		||||
    for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
 | 
			
		||||
    for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    // then divide the number of local sites
 | 
			
		||||
    // check that the total number of sims agree, meanse the iSites are the same
 | 
			
		||||
    assert(fine->Nsimd() == coarse->Nsimd());
 | 
			
		||||
 | 
			
		||||
    // check that the two grids divide cleanly
 | 
			
		||||
    assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() );
 | 
			
		||||
 | 
			
		||||
    return fine->lSites() / coarse->lSites();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Wrap seed_seq to give common interface with random_device
 | 
			
		||||
  class fixedSeed {
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    typedef std::seed_seq::result_type result_type;
 | 
			
		||||
 | 
			
		||||
    std::seed_seq src;
 | 
			
		||||
    
 | 
			
		||||
    fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
 | 
			
		||||
 | 
			
		||||
    result_type operator () (void){
 | 
			
		||||
 | 
			
		||||
      std::vector<result_type> list(1);
 | 
			
		||||
 | 
			
		||||
      src.generate(list.begin(),list.end());
 | 
			
		||||
 | 
			
		||||
      return list[0];
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
@@ -252,24 +270,30 @@ namespace Grid {
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  class GridParallelRNG : public GridRNGbase {
 | 
			
		||||
 | 
			
		||||
    double _time_counter;
 | 
			
		||||
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    GridBase *_grid;
 | 
			
		||||
    int _vol;
 | 
			
		||||
    unsigned int _vol;
 | 
			
		||||
 | 
			
		||||
    int generator_idx(int os,int is){
 | 
			
		||||
      return is*_grid->oSites()+os;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    GridParallelRNG(GridBase *grid) : GridRNGbase() {
 | 
			
		||||
      _grid=grid;
 | 
			
		||||
      _vol =_grid->iSites()*_grid->oSites();
 | 
			
		||||
      _grid = grid;
 | 
			
		||||
      _vol  =_grid->iSites()*_grid->oSites();
 | 
			
		||||
 | 
			
		||||
      _generators.resize(_vol);
 | 
			
		||||
      _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
 | 
			
		||||
      _gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) );
 | 
			
		||||
      _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
 | 
			
		||||
      _seeded=0;
 | 
			
		||||
      _seeded = 0;
 | 
			
		||||
 | 
			
		||||
      _time_counter = 0.0;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -325,37 +349,36 @@ namespace Grid {
 | 
			
		||||
      typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
      typedef typename vobj::vector_type vector_type;
 | 
			
		||||
 | 
			
		||||
      int multiplicity = RNGfillable(_grid, l._grid);
 | 
			
		||||
      double inner_time_counter = usecond();
 | 
			
		||||
 | 
			
		||||
      int Nsimd = _grid->Nsimd();
 | 
			
		||||
      int osites = _grid->oSites();
 | 
			
		||||
      int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid
 | 
			
		||||
 | 
			
		||||
      int Nsimd = _grid->Nsimd();// guaranteed to be the same for l._grid too
 | 
			
		||||
      int osites = _grid->oSites();// guaranteed to be <= l._grid->oSites() by a factor multiplicity
 | 
			
		||||
      int words = sizeof(scalar_object) / sizeof(scalar_type);
 | 
			
		||||
 | 
			
		||||
      PARALLEL_FOR_LOOP
 | 
			
		||||
      for (int ss = 0; ss < osites; ss++) {
 | 
			
		||||
 | 
			
		||||
        std::vector<scalar_object> buf(Nsimd);
 | 
			
		||||
        for (int m = 0; m < multiplicity;
 | 
			
		||||
             m++) {  // Draw from same generator multiplicity times
 | 
			
		||||
        for (int m = 0; m < multiplicity; m++) {  // Draw from same generator multiplicity times
 | 
			
		||||
 | 
			
		||||
          int sm = multiplicity * ss +
 | 
			
		||||
                   m;  // Maps the generator site to the fine site
 | 
			
		||||
          int sm = multiplicity * ss + m;  // Maps the generator site to the fine site
 | 
			
		||||
 | 
			
		||||
          for (int si = 0; si < Nsimd; si++) {
 | 
			
		||||
            
 | 
			
		||||
            int gdx = generator_idx(ss, si);  // index of generator state
 | 
			
		||||
            scalar_type *pointer = (scalar_type *)&buf[si];
 | 
			
		||||
            dist[gdx].reset();
 | 
			
		||||
            for (int idx = 0; idx < words; idx++) {
 | 
			
		||||
              
 | 
			
		||||
            for (int idx = 0; idx < words; idx++) 
 | 
			
		||||
              fillScalar(pointer[idx], dist[gdx], _generators[gdx]);
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          // merge into SIMD lanes
 | 
			
		||||
          merge(l._odata[sm], buf);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      _time_counter += usecond()- inner_time_counter;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    void SeedRandomDevice(void) {
 | 
			
		||||
@@ -366,6 +389,12 @@ namespace Grid {
 | 
			
		||||
      fixedSeed src(seeds);
 | 
			
		||||
      Seed(src);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void Report(){
 | 
			
		||||
      std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l){
 | 
			
		||||
 
 | 
			
		||||
@@ -94,17 +94,17 @@ namespace QCD {
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  
 | 
			
		||||
#define INHERIT_FIMPL_TYPES(Impl)\
 | 
			
		||||
  typedef typename Impl::FermionField           FermionField;		\
 | 
			
		||||
  typedef typename Impl::DoubledGaugeField DoubledGaugeField;		\
 | 
			
		||||
  typedef typename Impl::SiteSpinor               SiteSpinor;		\
 | 
			
		||||
  typedef typename Impl::SiteHalfSpinor       SiteHalfSpinor;		\
 | 
			
		||||
  typedef typename Impl::Compressor               Compressor;		\
 | 
			
		||||
  typedef typename Impl::StencilImpl             StencilImpl;		\
 | 
			
		||||
  typedef typename Impl::ImplParams ImplParams;				\
 | 
			
		||||
  typedef typename Impl::FermionField           FermionField;           \
 | 
			
		||||
  typedef typename Impl::DoubledGaugeField DoubledGaugeField;           \
 | 
			
		||||
  typedef typename Impl::SiteSpinor               SiteSpinor;           \
 | 
			
		||||
  typedef typename Impl::SiteHalfSpinor       SiteHalfSpinor;           \
 | 
			
		||||
  typedef typename Impl::Compressor               Compressor;           \
 | 
			
		||||
  typedef typename Impl::StencilImpl             StencilImpl;           \
 | 
			
		||||
  typedef typename Impl::ImplParams ImplParams;                         \
 | 
			
		||||
  typedef typename Impl::Coeff_t       Coeff_t;
 | 
			
		||||
  
 | 
			
		||||
#define INHERIT_IMPL_TYPES(Base) \
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Base)	 \
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Base)      \
 | 
			
		||||
  INHERIT_FIMPL_TYPES(Base)
 | 
			
		||||
  
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -148,11 +148,11 @@ namespace QCD {
 | 
			
		||||
    bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
 | 
			
		||||
      
 | 
			
		||||
    inline void multLink(SiteHalfSpinor &phi,
 | 
			
		||||
			 const SiteDoubledGaugeField &U,
 | 
			
		||||
			 const SiteHalfSpinor &chi,
 | 
			
		||||
			 int mu,
 | 
			
		||||
			 StencilEntry *SE,
 | 
			
		||||
			 StencilImpl &St) {
 | 
			
		||||
                         const SiteDoubledGaugeField &U,
 | 
			
		||||
                         const SiteHalfSpinor &chi,
 | 
			
		||||
                         int mu,
 | 
			
		||||
                         StencilEntry *SE,
 | 
			
		||||
                         StencilImpl &St) {
 | 
			
		||||
      mult(&phi(), &U(mu), &chi());
 | 
			
		||||
    }
 | 
			
		||||
      
 | 
			
		||||
@@ -162,16 +162,16 @@ namespace QCD {
 | 
			
		||||
    }
 | 
			
		||||
      
 | 
			
		||||
    inline void DoubleStore(GridBase *GaugeGrid,
 | 
			
		||||
			    DoubledGaugeField &Uds,
 | 
			
		||||
			    const GaugeField &Umu) {
 | 
			
		||||
                            DoubledGaugeField &Uds,
 | 
			
		||||
                            const GaugeField &Umu) {
 | 
			
		||||
      conformable(Uds._grid, GaugeGrid);
 | 
			
		||||
      conformable(Umu._grid, GaugeGrid);
 | 
			
		||||
      GaugeLinkField U(GaugeGrid);
 | 
			
		||||
      for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
	U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
			
		||||
	PokeIndex<LorentzIndex>(Uds, U, mu);
 | 
			
		||||
	U = adj(Cshift(U, mu, -1));
 | 
			
		||||
	PokeIndex<LorentzIndex>(Uds, U, mu + 4);
 | 
			
		||||
        U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
			
		||||
        PokeIndex<LorentzIndex>(Uds, U, mu);
 | 
			
		||||
        U = adj(Cshift(U, mu, -1));
 | 
			
		||||
        PokeIndex<LorentzIndex>(Uds, U, mu + 4);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -189,11 +189,11 @@ namespace QCD {
 | 
			
		||||
      
 | 
			
		||||
      PARALLEL_FOR_LOOP
 | 
			
		||||
      for(int sss=0;sss<tmp._grid->oSites();sss++){
 | 
			
		||||
	int sU=sss;
 | 
			
		||||
	for(int s=0;s<Ls;s++){
 | 
			
		||||
	  int sF = s+Ls*sU;
 | 
			
		||||
	  tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
 | 
			
		||||
	}
 | 
			
		||||
        int sU=sss;
 | 
			
		||||
        for(int s=0;s<Ls;s++){
 | 
			
		||||
          int sF = s+Ls*sU;
 | 
			
		||||
          tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      PokeIndex<LorentzIndex>(mat,tmp,mu);
 | 
			
		||||
      
 | 
			
		||||
@@ -248,12 +248,12 @@ class DomainWallVec5dImpl :  public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
 | 
			
		||||
		       const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
		       StencilImpl &St) {
 | 
			
		||||
                       const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
                       StencilImpl &St) {
 | 
			
		||||
    SiteGaugeLink UU;
 | 
			
		||||
    for (int i = 0; i < Nrepresentation; i++) {
 | 
			
		||||
      for (int j = 0; j < Nrepresentation; j++) {
 | 
			
		||||
	vsplat(UU()()(i, j), U(mu)()(i, j));
 | 
			
		||||
        vsplat(UU()()(i, j), U(mu)()(i, j));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    mult(&phi(), &UU(), &chi());
 | 
			
		||||
@@ -291,9 +291,39 @@ class DomainWallVec5dImpl :  public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,FermionField Ã, int mu) 
 | 
			
		||||
  {
 | 
			
		||||
	assert(0);
 | 
			
		||||
  inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) {
 | 
			
		||||
    int LLs = Btilde._grid->_rdimensions[0];
 | 
			
		||||
    conformable(Atilde._grid,Btilde._grid);
 | 
			
		||||
    GaugeLinkField tmp(mat._grid); 
 | 
			
		||||
    tmp = zero;
 | 
			
		||||
    typedef decltype(traceIndex<SpinIndex>(outerProduct(Btilde[0], Atilde[0]))) result_type;
 | 
			
		||||
    std::vector<typename result_type::scalar_object> v_scalar_object(Btilde._grid->Nsimd());
 | 
			
		||||
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int sss = 0; sss < tmp._grid->oSites(); sss++) {
 | 
			
		||||
      std::vector<int> ocoor;
 | 
			
		||||
      tmp._grid->oCoorFromOindex(ocoor,sss); 
 | 
			
		||||
      for (int si = 0; si < tmp._grid->iSites(); si++){
 | 
			
		||||
        typename result_type::scalar_object scalar_object;
 | 
			
		||||
        scalar_object = zero;
 | 
			
		||||
        std::vector<int> local_coor(tmp._grid->Nd());      
 | 
			
		||||
        std::vector<int> icoor;  
 | 
			
		||||
        tmp._grid->iCoorFromIindex(icoor,si);
 | 
			
		||||
        for (int i = 0; i < tmp._grid->Nd(); i++) local_coor[i] = ocoor[i] + tmp._grid->_rdimensions[i]*icoor[i];
 | 
			
		||||
        
 | 
			
		||||
        for (int s = 0; s < LLs; s++) {
 | 
			
		||||
          std::vector<int> slocal_coor(Btilde._grid->Nd());
 | 
			
		||||
          slocal_coor[0] = s;
 | 
			
		||||
          for (int s4d = 1; s4d< Btilde._grid->Nd(); s4d++) slocal_coor[s4d] = local_coor[s4d-1];
 | 
			
		||||
          int sF = Btilde._grid->oIndexReduced(slocal_coor);  
 | 
			
		||||
          assert(sF < Btilde._grid->oSites());
 | 
			
		||||
          extract(traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])), v_scalar_object); 
 | 
			
		||||
          for (int sv = 0; sv < v_scalar_object.size(); sv++) scalar_object += v_scalar_object[sv];  // sum across the 5d dimension
 | 
			
		||||
        }
 | 
			
		||||
        tmp._odata[sss].putlane(scalar_object, si);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    PokeIndex<LorentzIndex>(mat, tmp, mu);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
    
 | 
			
		||||
@@ -339,8 +369,8 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
 | 
			
		||||
 // provide the multiply by link that is differentiated between Gparity (with
 | 
			
		||||
 // flavour index) and non-Gparity
 | 
			
		||||
 inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
 | 
			
		||||
		      const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
		      StencilImpl &St) {
 | 
			
		||||
                      const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
 | 
			
		||||
                      StencilImpl &St) {
 | 
			
		||||
 | 
			
		||||
  typedef SiteHalfSpinor vobj;
 | 
			
		||||
   typedef typename SiteHalfSpinor::scalar_object sobj;
 | 
			
		||||
@@ -375,22 +405,22 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
 | 
			
		||||
       extract(chi,vals);
 | 
			
		||||
       for(int s=0;s<Nsimd;s++){
 | 
			
		||||
 | 
			
		||||
	 grid->iCoorFromIindex(icoor,s);
 | 
			
		||||
         grid->iCoorFromIindex(icoor,s);
 | 
			
		||||
              
 | 
			
		||||
	 assert((icoor[direction]==0)||(icoor[direction]==1));
 | 
			
		||||
         assert((icoor[direction]==0)||(icoor[direction]==1));
 | 
			
		||||
              
 | 
			
		||||
	 int permute_lane;
 | 
			
		||||
	 if ( distance == 1) {
 | 
			
		||||
	   permute_lane = icoor[direction]?1:0;
 | 
			
		||||
	 } else {
 | 
			
		||||
	   permute_lane = icoor[direction]?0:1;
 | 
			
		||||
	 }
 | 
			
		||||
         int permute_lane;
 | 
			
		||||
         if ( distance == 1) {
 | 
			
		||||
           permute_lane = icoor[direction]?1:0;
 | 
			
		||||
         } else {
 | 
			
		||||
           permute_lane = icoor[direction]?0:1;
 | 
			
		||||
         }
 | 
			
		||||
              
 | 
			
		||||
	 if ( permute_lane ) { 
 | 
			
		||||
	   stmp(0) = vals[s](1);
 | 
			
		||||
	   stmp(1) = vals[s](0);
 | 
			
		||||
	   vals[s] = stmp;
 | 
			
		||||
	      }
 | 
			
		||||
         if ( permute_lane ) { 
 | 
			
		||||
           stmp(0) = vals[s](1);
 | 
			
		||||
           stmp(1) = vals[s](0);
 | 
			
		||||
           vals[s] = stmp;
 | 
			
		||||
              }
 | 
			
		||||
       }
 | 
			
		||||
       merge(vtmp,vals);
 | 
			
		||||
            
 | 
			
		||||
 
 | 
			
		||||
@@ -271,11 +271,14 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
 | 
			
		||||
  assert(dirdisp<=7);
 | 
			
		||||
  assert(dirdisp>=0);
 | 
			
		||||
 | 
			
		||||
  int LLs = out._grid->_rdimensions[0];
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int ss=0;ss<Umu._grid->oSites();ss++){
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      int sU=ss;
 | 
			
		||||
      int sF = s+Ls*sU; 
 | 
			
		||||
    int sU=ss;
 | 
			
		||||
    for(int s=0;s<LLs;s++){
 | 
			
		||||
      int sF = s+LLs*sU; 
 | 
			
		||||
      assert(sF < out._grid->oSites());
 | 
			
		||||
      Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
@@ -305,6 +308,8 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
 | 
			
		||||
  DerivCommTime+=usecond();
 | 
			
		||||
 | 
			
		||||
  Atilde=A;
 | 
			
		||||
  int LLs = B._grid->_rdimensions[0];
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  DerivComputeTime-=usecond();
 | 
			
		||||
  for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
@@ -321,20 +326,18 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
 | 
			
		||||
    DerivDhopComputeTime -= usecond();
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int sss = 0; sss < U._grid->oSites(); sss++) {
 | 
			
		||||
      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
        int sU = sss;
 | 
			
		||||
        int sF = s + Ls * sU;
 | 
			
		||||
 | 
			
		||||
      int sU = sss;
 | 
			
		||||
      for (int s = 0; s < LLs; s++) {
 | 
			
		||||
        int sF = s + LLs * sU;
 | 
			
		||||
        assert(sF < B._grid->oSites());
 | 
			
		||||
        assert(sU < U._grid->oSites());
 | 
			
		||||
 | 
			
		||||
        Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
 | 
			
		||||
 | 
			
		||||
        ////////////////////////////
 | 
			
		||||
        // spin trace outer product
 | 
			
		||||
        ////////////////////////////
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    ////////////////////////////
 | 
			
		||||
    // spin trace outer product
 | 
			
		||||
    ////////////////////////////
 | 
			
		||||
    DerivDhopComputeTime += usecond();
 | 
			
		||||
    Impl::InsertForce5D(mat, Btilde, Atilde, mu);
 | 
			
		||||
  }
 | 
			
		||||
@@ -349,7 +352,7 @@ void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
 | 
			
		||||
{
 | 
			
		||||
  conformable(A._grid,FermionGrid());  
 | 
			
		||||
  conformable(A._grid,B._grid);
 | 
			
		||||
  conformable(GaugeGrid(),mat._grid);
 | 
			
		||||
  //conformable(GaugeGrid(),mat._grid);
 | 
			
		||||
 | 
			
		||||
  mat.checkerboard = A.checkerboard;
 | 
			
		||||
 | 
			
		||||
@@ -363,7 +366,7 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
 | 
			
		||||
					int dag)
 | 
			
		||||
{
 | 
			
		||||
  conformable(A._grid,FermionRedBlackGrid());
 | 
			
		||||
  conformable(GaugeRedBlackGrid(),mat._grid);
 | 
			
		||||
  //conformable(GaugeRedBlackGrid(),mat._grid);
 | 
			
		||||
  conformable(A._grid,B._grid);
 | 
			
		||||
 | 
			
		||||
  assert(B.checkerboard==Odd);
 | 
			
		||||
@@ -381,7 +384,7 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
 | 
			
		||||
					int dag)
 | 
			
		||||
{
 | 
			
		||||
  conformable(A._grid,FermionRedBlackGrid());
 | 
			
		||||
  conformable(GaugeRedBlackGrid(),mat._grid);
 | 
			
		||||
  //conformable(GaugeRedBlackGrid(),mat._grid);
 | 
			
		||||
  conformable(A._grid,B._grid);
 | 
			
		||||
 | 
			
		||||
  assert(B.checkerboard==Even);
 | 
			
		||||
 
 | 
			
		||||
@@ -68,8 +68,14 @@ namespace Grid{
 | 
			
		||||
	  assert(U.checkerboard==Odd);
 | 
			
		||||
	  assert(V.checkerboard==U.checkerboard);
 | 
			
		||||
 | 
			
		||||
	  GaugeField ForceO(ucbgrid);
 | 
			
		||||
	  GaugeField ForceE(ucbgrid);
 | 
			
		||||
          // NOTE Guido: WE DO NOT WANT TO USE THIS GRID FOR THE FORCE
 | 
			
		||||
          // INHERIT FROM THE Force field
 | 
			
		||||
          //GaugeField ForceO(ucbgrid);
 | 
			
		||||
          //GaugeField ForceE(ucbgrid);
 | 
			
		||||
          GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid);
 | 
			
		||||
          GaugeField ForceO(forcecb);
 | 
			
		||||
          GaugeField ForceE(forcecb);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	  //  X^dag Der_oe MeeInv Meo Y
 | 
			
		||||
	  // Use Mooee as nontrivial but gauge field indept
 | 
			
		||||
@@ -110,8 +116,14 @@ namespace Grid{
 | 
			
		||||
	  assert(V.checkerboard==Odd);
 | 
			
		||||
	  assert(V.checkerboard==V.checkerboard);
 | 
			
		||||
 | 
			
		||||
	  GaugeField ForceO(ucbgrid);
 | 
			
		||||
	  GaugeField ForceE(ucbgrid);
 | 
			
		||||
          // NOTE Guido: WE DO NOT WANT TO USE THIS GRID FOR THE FORCE
 | 
			
		||||
          // INHERIT FROM THE Force field
 | 
			
		||||
 | 
			
		||||
	  //GaugeField ForceO(ucbgrid);
 | 
			
		||||
	  //GaugeField ForceE(ucbgrid);
 | 
			
		||||
          GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid);
 | 
			
		||||
          GaugeField ForceO(forcecb);
 | 
			
		||||
          GaugeField ForceE(forcecb);
 | 
			
		||||
 | 
			
		||||
	  //  X^dag Der_oe MeeInv Meo Y
 | 
			
		||||
	  // Use Mooee as nontrivial but gauge field indept
 | 
			
		||||
@@ -130,6 +142,8 @@ namespace Grid{
 | 
			
		||||
	  setCheckerboard(Force,ForceE); 
 | 
			
		||||
	  setCheckerboard(Force,ForceO);
 | 
			
		||||
	  Force=-Force;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
    };
 | 
			
		||||
 
 | 
			
		||||
@@ -166,7 +166,9 @@ namespace Grid{
 | 
			
		||||
	FermionField  X(NumOp.FermionRedBlackGrid());
 | 
			
		||||
	FermionField  Y(NumOp.FermionRedBlackGrid());
 | 
			
		||||
 | 
			
		||||
	GaugeField   force(NumOp.GaugeGrid());	
 | 
			
		||||
	//GaugeField   force(NumOp.GaugeGrid());
 | 
			
		||||
        GaugeField   force(dSdU._grid);
 | 
			
		||||
        conformable(force._grid, dSdU._grid);
 | 
			
		||||
 | 
			
		||||
	//Y=Vdag phi
 | 
			
		||||
	//X = (Mdag M)^-1 V^dag phi
 | 
			
		||||
 
 | 
			
		||||
@@ -114,6 +114,7 @@ class Integrator {
 | 
			
		||||
    // Fundamental updates, include smearing
 | 
			
		||||
    for (int a = 0; a < as[level].actions.size(); ++a) {
 | 
			
		||||
      Field force(U._grid);
 | 
			
		||||
      conformable(U._grid, Mom._grid);
 | 
			
		||||
      Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
 | 
			
		||||
      as[level].actions.at(a)->deriv(Us, force);  // deriv should NOT include Ta
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -386,6 +386,19 @@ class Grid_simd {
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////
 | 
			
		||||
  // Getting single lanes
 | 
			
		||||
  ///////////////////////////////
 | 
			
		||||
  inline Scalar_type getlane(int lane) {
 | 
			
		||||
    return ((Scalar_type*)&v)[lane];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void putlane(const Scalar_type &S, int lane){
 | 
			
		||||
    ((Scalar_type*)&v)[lane] = S;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
};  // end of Grid_simd class definition
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -88,6 +88,21 @@ class iScalar {
 | 
			
		||||
    zeroit(*this);
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // managing the internal vector structure
 | 
			
		||||
  strong_inline scalar_object getlane(int lane){
 | 
			
		||||
    scalar_object ret;
 | 
			
		||||
    ret._internal = _internal.getlane(lane);
 | 
			
		||||
    return ret;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  strong_inline void putlane(scalar_object &s, int lane){
 | 
			
		||||
    _internal.putlane(s._internal,lane);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  friend strong_inline void vstream(iScalar<vtype> &out,
 | 
			
		||||
                                    const iScalar<vtype> &in) {
 | 
			
		||||
    vstream(out._internal, in._internal);
 | 
			
		||||
@@ -226,6 +241,20 @@ class iVector {
 | 
			
		||||
    zeroit(*this);
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  strong_inline scalar_object getlane(int lane){
 | 
			
		||||
    scalar_object ret;
 | 
			
		||||
    for (int i = 0; i < N; i++) ret._internal[i] = _internal[i].getlane(lane);
 | 
			
		||||
    return ret;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  strong_inline void putlane(scalar_object &s, int lane){
 | 
			
		||||
    for (int i = 0; i < N; i++) _internal[i].putlane(s._internal[i],lane);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  friend strong_inline void zeroit(iVector<vtype, N> &that) {
 | 
			
		||||
    for (int i = 0; i < N; i++) {
 | 
			
		||||
      zeroit(that._internal[i]);
 | 
			
		||||
@@ -349,6 +378,25 @@ class iMatrix {
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  strong_inline scalar_object getlane(int lane){
 | 
			
		||||
    scalar_object ret;
 | 
			
		||||
    for (int i = 0; i < N; i++) {
 | 
			
		||||
      for (int j = 0; j < N; j++) {
 | 
			
		||||
	ret._internal[i][j] = _internal[i][j].getlane(lane);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    return ret;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  strong_inline void putlane(scalar_object &s, int lane){
 | 
			
		||||
    for (int i = 0; i < N; i++)
 | 
			
		||||
      for (int j = 0; j < N; j++) _internal[i][j].putlane(s._internal[i][j],lane);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  friend strong_inline void zeroit(iMatrix<vtype,N> &that){
 | 
			
		||||
    for(int i=0;i<N;i++){
 | 
			
		||||
      for(int j=0;j<N;j++){
 | 
			
		||||
 
 | 
			
		||||
@@ -62,31 +62,45 @@ public:
 | 
			
		||||
  void BuildTheAction(int argc, char **argv)
 | 
			
		||||
 | 
			
		||||
  {
 | 
			
		||||
    typedef WilsonImplR ImplPolicy;
 | 
			
		||||
    typedef ScaledShamirFermionR FermionAction;
 | 
			
		||||
    //typedef WilsonImplR ImplPolicy;
 | 
			
		||||
    typedef DomainWallVec5dImplR ImplPolicy;
 | 
			
		||||
    typedef ScaledShamirFermion<ImplPolicy> FermionAction;
 | 
			
		||||
    typedef typename FermionAction::FermionField FermionField;
 | 
			
		||||
 | 
			
		||||
    const int Ls = 12;
 | 
			
		||||
    const int Ls = 8;
 | 
			
		||||
 | 
			
		||||
    UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
			
		||||
    UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    GridCartesian* sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
 | 
			
		||||
    GridRedBlackCartesian* sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    FGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
 | 
			
		||||
    FrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
 | 
			
		||||
    
 | 
			
		||||
    /*
 | 
			
		||||
    FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
    FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
			
		||||
    */
 | 
			
		||||
 | 
			
		||||
    // temporarily need a gauge field
 | 
			
		||||
    LatticeGaugeField  U(UGrid);
 | 
			
		||||
 | 
			
		||||
    // Gauge action
 | 
			
		||||
    double beta = 4.0;
 | 
			
		||||
    IwasakiGaugeActionR Iaction(beta);
 | 
			
		||||
    WilsonGaugeActionR Iaction(beta);
 | 
			
		||||
 | 
			
		||||
    Real mass = 0.04;
 | 
			
		||||
    Real pv   = 1.0;
 | 
			
		||||
    RealD M5  = 1.5;
 | 
			
		||||
    RealD scale = 2.0;
 | 
			
		||||
    FermionAction DenOp(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,scale);
 | 
			
		||||
    FermionAction NumOp(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,pv,M5,scale);
 | 
			
		||||
    FermionAction DenOp(U,*FGrid,*FrbGrid,*sUGrid,*sUrbGrid,mass,M5,scale);
 | 
			
		||||
    FermionAction NumOp(U,*FGrid,*FrbGrid,*sUGrid,*sUrbGrid,pv,M5,scale);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "Frb Osites: " << FrbGrid->oSites() << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "sUGrid Osites: " << sUGrid->oSites() << std::endl;
 | 
			
		||||
 | 
			
		||||
    double StoppingCondition = 1.0e-8;
 | 
			
		||||
    double MaxCGIterations = 10000;
 | 
			
		||||
@@ -94,7 +108,7 @@ public:
 | 
			
		||||
    TwoFlavourEvenOddRatioPseudoFermionAction<ImplPolicy> Nf2(NumOp, DenOp,CG,CG);
 | 
			
		||||
 | 
			
		||||
    // Set smearing (true/false), default: false
 | 
			
		||||
    Nf2.is_smeared = true;
 | 
			
		||||
    Nf2.is_smeared = false;
 | 
			
		||||
 | 
			
		||||
    // Collect actions
 | 
			
		||||
    // here an example of 2 level integration
 | 
			
		||||
@@ -154,7 +168,7 @@ int main(int argc, char **argv) {
 | 
			
		||||
 | 
			
		||||
  // Seeds for the random number generators
 | 
			
		||||
  std::vector<int> SerSeed({1, 2, 3, 4, 5});
 | 
			
		||||
  std::vector<int> ParSeed({6, 7, 8, 9, 10});
 | 
			
		||||
  std::vector<int> ParSeed({6, 7, 8, 9, 5});
 | 
			
		||||
  TheHMC.RNGSeeds(SerSeed, ParSeed);
 | 
			
		||||
 | 
			
		||||
  TheHMC.MDparameters.set(20, 1.0);// MDsteps, traj length
 | 
			
		||||
 
 | 
			
		||||
@@ -156,4 +156,6 @@ int main(int argc, char **argv) {
 | 
			
		||||
  TheHMC.MDparameters.set(10, 1.0);// MDsteps, traj length
 | 
			
		||||
 | 
			
		||||
  TheHMC.BuildTheAction(argc, argv);
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user