mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Compare commits
	
		
			36 Commits
		
	
	
		
			3c67d626ba
			...
			feature/se
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| a269a3d919 | |||
| 
						 | 
					0c4f585496 | ||
| 
						 | 
					33d2df46a0 | ||
| 
						 | 
					2df308f649 | ||
| 
						 | 
					92def28bd3 | ||
| ca10bfa1c7 | |||
| 298a6ec51e | |||
| 
						 | 
					e5dbe488a6 | ||
| 
						 | 
					0e27e3847d | ||
| 
						 | 
					393727b93b | ||
| 
						 | 
					2b1fcd78c3 | ||
| 
						 | 
					0a4e0b49a0 | ||
| 
						 | 
					76af169f05 | ||
| 
						 | 
					7b89232251 | ||
| 
						 | 
					ef0ddd5d04 | ||
| 
						 | 
					9b73dacf50 | ||
| 
						 | 
					244b4aa07f | ||
| 
						 | 
					8cfc7342cd | ||
| 
						 | 
					15ae317858 | ||
| 
						 | 
					834f536b5f | ||
| 
						 | 
					c332d9f08b | ||
| cf2923d5dd | |||
| 
						 | 
					0e4413ddde | ||
| 009ccd581e | |||
| 
						 | 
					8cd4263974 | ||
| 
						 | 
					d45c868656 | ||
| 
						 | 
					955a8113de | ||
| 
						 | 
					dbe210dd53 | ||
| 
						 | 
					86e11743ca | ||
| 
						 | 
					980e721f6e | ||
| 
						 | 
					e2a0142d87 | ||
| 895244ecc3 | |||
| addeb621a7 | |||
| 
						 | 
					a7fb25adf6 | ||
| 
						 | 
					e947992957 | ||
| 
						 | 
					bb89a82a07 | 
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -88,6 +88,7 @@ Thumbs.db
 | 
			
		||||
# build directory #
 | 
			
		||||
###################
 | 
			
		||||
build*/*
 | 
			
		||||
Documentation/_build
 | 
			
		||||
 | 
			
		||||
# IDE related files #
 | 
			
		||||
#####################
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										56
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										56
									
								
								.travis.yml
									
									
									
									
									
								
							@@ -1,56 +0,0 @@
 | 
			
		||||
language: cpp
 | 
			
		||||
 | 
			
		||||
cache:
 | 
			
		||||
  directories:
 | 
			
		||||
    - clang
 | 
			
		||||
 | 
			
		||||
matrix:
 | 
			
		||||
  include:
 | 
			
		||||
    - os:        osx
 | 
			
		||||
      osx_image: xcode8.3
 | 
			
		||||
      compiler: clang
 | 
			
		||||
      
 | 
			
		||||
before_install:
 | 
			
		||||
    - export GRIDDIR=`pwd`
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]] && [ ! -e clang/bin ]; then wget $CLANG_LINK; tar -xf `basename $CLANG_LINK`; mkdir clang; mv clang+*/* clang/; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
 | 
			
		||||
    
 | 
			
		||||
install:
 | 
			
		||||
    - export CWD=`pwd`
 | 
			
		||||
    - echo $CWD
 | 
			
		||||
    - export CC=$CC$VERSION
 | 
			
		||||
    - export CXX=$CXX$VERSION
 | 
			
		||||
    - echo $PATH
 | 
			
		||||
    - which autoconf
 | 
			
		||||
    - autoconf  --version
 | 
			
		||||
    - which automake
 | 
			
		||||
    - automake  --version
 | 
			
		||||
    - which $CC
 | 
			
		||||
    - $CC  --version
 | 
			
		||||
    - which $CXX
 | 
			
		||||
    - $CXX --version
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
 | 
			
		||||
    
 | 
			
		||||
script:
 | 
			
		||||
    - ./bootstrap.sh
 | 
			
		||||
    - mkdir build
 | 
			
		||||
    - cd build
 | 
			
		||||
    - mkdir lime
 | 
			
		||||
    - cd lime
 | 
			
		||||
    - mkdir build
 | 
			
		||||
    - cd build
 | 
			
		||||
    - wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
 | 
			
		||||
    - tar xf lime-1.3.2.tar.gz
 | 
			
		||||
    - cd lime-1.3.2
 | 
			
		||||
    - ./configure --prefix=$CWD/build/lime/install
 | 
			
		||||
    - make -j4
 | 
			
		||||
    - make install
 | 
			
		||||
    - cd $CWD/build
 | 
			
		||||
    - ../configure --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
 | 
			
		||||
    - make -j4 
 | 
			
		||||
    - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
 | 
			
		||||
    - make check
 | 
			
		||||
@@ -122,8 +122,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
 | 
			
		||||
  assert(shift<fd);
 | 
			
		||||
  
 | 
			
		||||
  int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
 | 
			
		||||
  cshiftVector<vobj> send_buf(buffer_size);
 | 
			
		||||
  cshiftVector<vobj> recv_buf(buffer_size);
 | 
			
		||||
  static cshiftVector<vobj> send_buf; send_buf.resize(buffer_size);
 | 
			
		||||
  static cshiftVector<vobj> recv_buf; recv_buf.resize(buffer_size);
 | 
			
		||||
    
 | 
			
		||||
  int cb= (cbmask==0x2)? Odd : Even;
 | 
			
		||||
  int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
 | 
			
		||||
@@ -198,8 +198,8 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
 | 
			
		||||
  int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
 | 
			
		||||
  //  int words = sizeof(vobj)/sizeof(vector_type);
 | 
			
		||||
 | 
			
		||||
  std::vector<cshiftVector<scalar_object> >  send_buf_extract(Nsimd);
 | 
			
		||||
  std::vector<cshiftVector<scalar_object> >  recv_buf_extract(Nsimd);
 | 
			
		||||
  static std::vector<cshiftVector<scalar_object> >  send_buf_extract; send_buf_extract.resize(Nsimd);
 | 
			
		||||
  static std::vector<cshiftVector<scalar_object> >  recv_buf_extract; recv_buf_extract.resize(Nsimd);
 | 
			
		||||
  scalar_object *  recv_buf_extract_mpi;
 | 
			
		||||
  scalar_object *  send_buf_extract_mpi;
 | 
			
		||||
 
 | 
			
		||||
@@ -294,8 +294,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
 | 
			
		||||
  assert(shift<fd);
 | 
			
		||||
  
 | 
			
		||||
  int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
 | 
			
		||||
  cshiftVector<vobj> send_buf_v(buffer_size);
 | 
			
		||||
  cshiftVector<vobj> recv_buf_v(buffer_size);
 | 
			
		||||
  static cshiftVector<vobj> send_buf_v; send_buf_v.resize(buffer_size);
 | 
			
		||||
  static cshiftVector<vobj> recv_buf_v; recv_buf_v.resize(buffer_size);
 | 
			
		||||
  vobj *send_buf;
 | 
			
		||||
  vobj *recv_buf;
 | 
			
		||||
  {
 | 
			
		||||
@@ -381,8 +381,8 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
 | 
			
		||||
  int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
 | 
			
		||||
  //  int words = sizeof(vobj)/sizeof(vector_type);
 | 
			
		||||
 | 
			
		||||
  std::vector<cshiftVector<scalar_object> >  send_buf_extract(Nsimd);
 | 
			
		||||
  std::vector<cshiftVector<scalar_object> >  recv_buf_extract(Nsimd);
 | 
			
		||||
  static std::vector<cshiftVector<scalar_object> >  send_buf_extract; send_buf_extract.resize(Nsimd);
 | 
			
		||||
  static std::vector<cshiftVector<scalar_object> >  recv_buf_extract; recv_buf_extract.resize(Nsimd);
 | 
			
		||||
  scalar_object *  recv_buf_extract_mpi;
 | 
			
		||||
  scalar_object *  send_buf_extract_mpi;
 | 
			
		||||
  {
 | 
			
		||||
 
 | 
			
		||||
@@ -128,7 +128,7 @@ inline void MachineCharacteristics(FieldMetaData &header)
 | 
			
		||||
  std::time_t t = std::time(nullptr);
 | 
			
		||||
  std::tm tm_ = *std::localtime(&t);
 | 
			
		||||
  std::ostringstream oss; 
 | 
			
		||||
  //      oss << std::put_time(&tm_, "%c %Z");
 | 
			
		||||
  oss << std::put_time(&tm_, "%c %Z");
 | 
			
		||||
  header.creation_date = oss.str();
 | 
			
		||||
  header.archive_date  = header.creation_date;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -205,11 +205,20 @@ public:
 | 
			
		||||
    std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Preferred interface
 | 
			
		||||
  template<class GaugeStats=PeriodicGaugeStatistics>
 | 
			
		||||
  static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
 | 
			
		||||
					std::string file, 
 | 
			
		||||
					std::string ens_label = std::string("DWF"))
 | 
			
		||||
  {
 | 
			
		||||
    writeConfiguration(Umu,file,0,1,ens_label);
 | 
			
		||||
  }
 | 
			
		||||
  template<class GaugeStats=PeriodicGaugeStatistics>
 | 
			
		||||
  static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
 | 
			
		||||
					std::string file, 
 | 
			
		||||
					int two_row,
 | 
			
		||||
					int bits32)
 | 
			
		||||
					int bits32,
 | 
			
		||||
					std::string ens_label = std::string("DWF"))
 | 
			
		||||
  {
 | 
			
		||||
    typedef vLorentzColourMatrixD vobj;
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
@@ -219,8 +228,8 @@ public:
 | 
			
		||||
    // Following should become arguments
 | 
			
		||||
    ///////////////////////////////////////////
 | 
			
		||||
    header.sequence_number = 1;
 | 
			
		||||
    header.ensemble_id     = "UKQCD";
 | 
			
		||||
    header.ensemble_label  = "DWF";
 | 
			
		||||
    header.ensemble_id     = std::string("UKQCD");
 | 
			
		||||
    header.ensemble_label  = ens_label;
 | 
			
		||||
 | 
			
		||||
    typedef LorentzColourMatrixD fobj3D;
 | 
			
		||||
    typedef LorentzColour2x3D    fobj2D;
 | 
			
		||||
@@ -232,7 +241,7 @@ public:
 | 
			
		||||
    GaugeStats Stats; Stats(Umu,header);
 | 
			
		||||
    MachineCharacteristics(header);
 | 
			
		||||
 | 
			
		||||
	uint64_t offset;
 | 
			
		||||
    uint64_t offset;
 | 
			
		||||
 | 
			
		||||
    // Sod it -- always write 3x3 double
 | 
			
		||||
    header.floating_point = std::string("IEEE64BIG");
 | 
			
		||||
 
 | 
			
		||||
@@ -291,12 +291,6 @@ typedef ImprovedStaggeredFermion5D<StaggeredImplR> ImprovedStaggeredFermion5DR;
 | 
			
		||||
typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF;
 | 
			
		||||
typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD;
 | 
			
		||||
 | 
			
		||||
#ifndef GRID_CUDA
 | 
			
		||||
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplR> ImprovedStaggeredFermionVec5dR;
 | 
			
		||||
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplF> ImprovedStaggeredFermionVec5dF;
 | 
			
		||||
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplD> ImprovedStaggeredFermionVec5dD;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
 | 
			
		||||
////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -183,7 +183,8 @@ NAMESPACE_CHECK(ImplStaggered);
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Single flavour one component spinors with colour index. 5d vec
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#include <Grid/qcd/action/fermion/StaggeredVec5dImpl.h> 
 | 
			
		||||
NAMESPACE_CHECK(ImplStaggered5dVec);  
 | 
			
		||||
// Deprecate Vec5d
 | 
			
		||||
//#include <Grid/qcd/action/fermion/StaggeredVec5dImpl.h> 
 | 
			
		||||
//NAMESPACE_CHECK(ImplStaggered5dVec);  
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -72,19 +72,23 @@ public:
 | 
			
		||||
    
 | 
			
		||||
  StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){};
 | 
			
		||||
      
 | 
			
		||||
  static accelerator_inline void multLink(SiteSpinor &phi,
 | 
			
		||||
  template<class _Spinor>
 | 
			
		||||
  static accelerator_inline void multLink(_Spinor &phi,
 | 
			
		||||
		       const SiteDoubledGaugeField &U,
 | 
			
		||||
		       const SiteSpinor &chi,
 | 
			
		||||
		       const _Spinor &chi,
 | 
			
		||||
		       int mu)
 | 
			
		||||
  {
 | 
			
		||||
    mult(&phi(), &U(mu), &chi());
 | 
			
		||||
    auto UU = coalescedRead(U(mu));
 | 
			
		||||
    mult(&phi(), &UU, &chi());
 | 
			
		||||
  }
 | 
			
		||||
  static accelerator_inline void multLinkAdd(SiteSpinor &phi,
 | 
			
		||||
  template<class _Spinor>
 | 
			
		||||
  static accelerator_inline void multLinkAdd(_Spinor &phi,
 | 
			
		||||
			  const SiteDoubledGaugeField &U,
 | 
			
		||||
			  const SiteSpinor &chi,
 | 
			
		||||
			  const _Spinor &chi,
 | 
			
		||||
			  int mu)
 | 
			
		||||
  {
 | 
			
		||||
    mac(&phi(), &U(mu), &chi());
 | 
			
		||||
    auto UU = coalescedRead(U(mu));
 | 
			
		||||
    mac(&phi(), &UU, &chi());
 | 
			
		||||
  }
 | 
			
		||||
      
 | 
			
		||||
  template <class ref>
 | 
			
		||||
 
 | 
			
		||||
@@ -184,18 +184,22 @@ public:
 | 
			
		||||
      mat = TraceIndex<SpinIndex>(P); 
 | 
			
		||||
    }
 | 
			
		||||
      
 | 
			
		||||
    inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
 | 
			
		||||
    inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds)
 | 
			
		||||
    {
 | 
			
		||||
      for (int mu = 0; mu < Nd; mu++)
 | 
			
		||||
      mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
 | 
			
		||||
      
 | 
			
		||||
  inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu)
 | 
			
		||||
  {
 | 
			
		||||
#undef USE_OLD_INSERT_FORCE    
 | 
			
		||||
    int Ls=Btilde.Grid()->_fdimensions[0];
 | 
			
		||||
    autoView( mat_v , mat, AcceleratorWrite);
 | 
			
		||||
#ifdef USE_OLD_INSERT_FORCE    
 | 
			
		||||
    GaugeLinkField tmp(mat.Grid());
 | 
			
		||||
    tmp = Zero();
 | 
			
		||||
    {
 | 
			
		||||
      const int Nsimd = SiteSpinor::Nsimd();
 | 
			
		||||
      autoView( tmp_v , tmp, AcceleratorWrite);
 | 
			
		||||
      autoView( Btilde_v , Btilde, AcceleratorRead);
 | 
			
		||||
      autoView( Atilde_v , Atilde, AcceleratorRead);
 | 
			
		||||
@@ -208,6 +212,29 @@ public:
 | 
			
		||||
	});
 | 
			
		||||
    }
 | 
			
		||||
    PokeIndex<LorentzIndex>(mat,tmp,mu);
 | 
			
		||||
#else
 | 
			
		||||
    {
 | 
			
		||||
      const int Nsimd = SiteSpinor::Nsimd();
 | 
			
		||||
      autoView( Btilde_v , Btilde, AcceleratorRead);
 | 
			
		||||
      autoView( Atilde_v , Atilde, AcceleratorRead);
 | 
			
		||||
      accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{
 | 
			
		||||
	  int sU=sss;
 | 
			
		||||
  	  typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
 | 
			
		||||
  	  ColorMatrixType sum;
 | 
			
		||||
	  zeroit(sum);  
 | 
			
		||||
	  for(int s=0;s<Ls;s++){
 | 
			
		||||
	    int sF = s+Ls*sU;
 | 
			
		||||
  	    for(int spn=0;spn<Ns;spn++){ //sum over spin
 | 
			
		||||
  	      auto bb = coalescedRead(Btilde_v[sF]()(spn) ); //color vector
 | 
			
		||||
  	      auto aa = coalescedRead(Atilde_v[sF]()(spn) );
 | 
			
		||||
	      auto op = outerProduct(bb,aa);
 | 
			
		||||
  	      sum = sum + op;
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
  	  coalescedWrite(mat_v[sU](mu)(), sum);
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
#endif    
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -880,11 +880,23 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::vector<RealD> G_s(Ls,1.0);
 | 
			
		||||
  Integer sign = 1; // sign flip for vector/tadpole
 | 
			
		||||
  if ( curr_type == Current::Axial ) {
 | 
			
		||||
    for(int s=0;s<Ls/2;s++){
 | 
			
		||||
      G_s[s] = -1.0;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  else if ( curr_type == Current::Tadpole ) {
 | 
			
		||||
    auto b=this->_b;
 | 
			
		||||
    auto c=this->_c;
 | 
			
		||||
    if ( b == 1 && c == 0 ) {
 | 
			
		||||
      sign = -1;    
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
      std::cerr << "Error: Tadpole implementation currently unavailable for non-Shamir actions." << std::endl;
 | 
			
		||||
      assert(b==1 && c==0);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for(int s=0;s<Ls;s++){
 | 
			
		||||
 | 
			
		||||
@@ -907,7 +919,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
 | 
			
		||||
 | 
			
		||||
    tmp    = Cshift(tmp,mu,1);
 | 
			
		||||
    Impl::multLinkField(Utmp,this->Umu,tmp,mu);
 | 
			
		||||
    tmp    = G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop
 | 
			
		||||
    tmp    = sign*G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop
 | 
			
		||||
    tmp    = where((lcoor>=tmin),tmp,zz); // Mask the time 
 | 
			
		||||
    L_Q    = where((lcoor<=tmax),tmp,zz); // Position of current complicated
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -680,7 +680,8 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilView &st,
 | 
			
		||||
  gauge2 =(uint64_t)&UU[sU]( Z );				\
 | 
			
		||||
  gauge3 =(uint64_t)&UU[sU]( T ); 
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
#undef STAG_VEC5D
 | 
			
		||||
#ifdef STAG_VEC5D
 | 
			
		||||
  // This is the single precision 5th direction vectorised kernel
 | 
			
		||||
#include <Grid/simd/Intel512single.h>
 | 
			
		||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilView &st,
 | 
			
		||||
@@ -790,7 +791,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilView
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
   
 | 
			
		||||
   
 | 
			
		||||
#endif   
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define PERMUTE_DIR3 __asm__ (	\
 | 
			
		||||
 
 | 
			
		||||
@@ -32,25 +32,50 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHI(b)		\
 | 
			
		||||
#ifdef GRID_SIMT
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHI(ptype,b)			\
 | 
			
		||||
  const SiteSpinor & ref (b[offset]);				\
 | 
			
		||||
  Chi_0=coalescedReadPermute<ptype>(ref()()(0),perm,lane);	\
 | 
			
		||||
  Chi_1=coalescedReadPermute<ptype>(ref()()(1),perm,lane);	\
 | 
			
		||||
  Chi_2=coalescedReadPermute<ptype>(ref()()(2),perm,lane);
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHI_COMMS(b)		\
 | 
			
		||||
  const SiteSpinor & ref (b[offset]);	\
 | 
			
		||||
    Chi_0=ref()()(0);\
 | 
			
		||||
    Chi_1=ref()()(1);\
 | 
			
		||||
    Chi_2=ref()()(2);
 | 
			
		||||
  Chi_0=coalescedRead(ref()()(0),lane);	\
 | 
			
		||||
  Chi_1=coalescedRead(ref()()(1),lane);	\
 | 
			
		||||
  Chi_2=coalescedRead(ref()()(2),lane);
 | 
			
		||||
 | 
			
		||||
#define PERMUTE_DIR(dir)	;
 | 
			
		||||
#else
 | 
			
		||||
#define LOAD_CHI(ptype,b)      LOAD_CHI_COMMS(b)
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHI_COMMS(b)		\
 | 
			
		||||
  const SiteSpinor & ref (b[offset]);	\
 | 
			
		||||
  Chi_0=ref()()(0);			\
 | 
			
		||||
  Chi_1=ref()()(1);			\
 | 
			
		||||
  Chi_2=ref()()(2);
 | 
			
		||||
 | 
			
		||||
#define PERMUTE_DIR(dir)			\
 | 
			
		||||
  permute##dir(Chi_0,Chi_0);			\
 | 
			
		||||
  permute##dir(Chi_1,Chi_1);			\
 | 
			
		||||
  permute##dir(Chi_2,Chi_2);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// To splat or not to splat depends on the implementation
 | 
			
		||||
#define MULT(A,UChi)				\
 | 
			
		||||
  auto & ref(U[sU](A));			\
 | 
			
		||||
   Impl::loadLinkElement(U_00,ref()(0,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_10,ref()(1,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_20,ref()(2,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_01,ref()(0,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_11,ref()(1,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_21,ref()(2,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_02,ref()(0,2));     \
 | 
			
		||||
   Impl::loadLinkElement(U_12,ref()(1,2));     \
 | 
			
		||||
   Impl::loadLinkElement(U_22,ref()(2,2));     \
 | 
			
		||||
    U_00=coalescedRead(ref()(0,0),lane);				\
 | 
			
		||||
    U_10=coalescedRead(ref()(1,0),lane);				\
 | 
			
		||||
    U_20=coalescedRead(ref()(2,0),lane);				\
 | 
			
		||||
    U_01=coalescedRead(ref()(0,1),lane);				\
 | 
			
		||||
    U_11=coalescedRead(ref()(1,1),lane);				\
 | 
			
		||||
    U_21=coalescedRead(ref()(2,1),lane);				\
 | 
			
		||||
    U_02=coalescedRead(ref()(0,2),lane);				\
 | 
			
		||||
    U_12=coalescedRead(ref()(1,2),lane);				\
 | 
			
		||||
    U_22=coalescedRead(ref()(2,2),lane);				\
 | 
			
		||||
    UChi ## _0  = U_00*Chi_0;	       \
 | 
			
		||||
    UChi ## _1  = U_10*Chi_0;\
 | 
			
		||||
    UChi ## _2  = U_20*Chi_0;\
 | 
			
		||||
@@ -63,15 +88,15 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
#define MULT_ADD(U,A,UChi)			\
 | 
			
		||||
  auto & ref(U[sU](A));			\
 | 
			
		||||
   Impl::loadLinkElement(U_00,ref()(0,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_10,ref()(1,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_20,ref()(2,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_01,ref()(0,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_11,ref()(1,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_21,ref()(2,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_02,ref()(0,2));     \
 | 
			
		||||
   Impl::loadLinkElement(U_12,ref()(1,2));     \
 | 
			
		||||
   Impl::loadLinkElement(U_22,ref()(2,2));     \
 | 
			
		||||
    U_00=coalescedRead(ref()(0,0),lane);				\
 | 
			
		||||
    U_10=coalescedRead(ref()(1,0),lane);				\
 | 
			
		||||
    U_20=coalescedRead(ref()(2,0),lane);				\
 | 
			
		||||
    U_01=coalescedRead(ref()(0,1),lane);				\
 | 
			
		||||
    U_11=coalescedRead(ref()(1,1),lane);				\
 | 
			
		||||
    U_21=coalescedRead(ref()(2,1),lane);				\
 | 
			
		||||
    U_02=coalescedRead(ref()(0,2),lane);				\
 | 
			
		||||
    U_12=coalescedRead(ref()(1,2),lane);				\
 | 
			
		||||
    U_22=coalescedRead(ref()(2,2),lane);				\
 | 
			
		||||
    UChi ## _0 += U_00*Chi_0;	       \
 | 
			
		||||
    UChi ## _1 += U_10*Chi_0;\
 | 
			
		||||
    UChi ## _2 += U_20*Chi_0;\
 | 
			
		||||
@@ -83,24 +108,18 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
    UChi ## _2 += U_22*Chi_2;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define PERMUTE_DIR(dir)			\
 | 
			
		||||
  permute##dir(Chi_0,Chi_0);			\
 | 
			
		||||
  permute##dir(Chi_1,Chi_1);			\
 | 
			
		||||
  permute##dir(Chi_2,Chi_2);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define HAND_STENCIL_LEG_BASE(Dir,Perm,skew)	\
 | 
			
		||||
  SE=st.GetEntry(ptype,Dir+skew,sF);	\
 | 
			
		||||
  offset = SE->_offset;			\
 | 
			
		||||
  local  = SE->_is_local;		\
 | 
			
		||||
  perm   = SE->_permute;		\
 | 
			
		||||
  if ( local ) {						\
 | 
			
		||||
    LOAD_CHI(in);					\
 | 
			
		||||
    LOAD_CHI(Perm,in);						\
 | 
			
		||||
    if ( perm) {						\
 | 
			
		||||
      PERMUTE_DIR(Perm);					\
 | 
			
		||||
    }								\
 | 
			
		||||
  } else {							\
 | 
			
		||||
    LOAD_CHI(buf);						\
 | 
			
		||||
    LOAD_CHI_COMMS(buf);					\
 | 
			
		||||
  }								
 | 
			
		||||
 | 
			
		||||
#define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even)		\
 | 
			
		||||
@@ -116,19 +135,18 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even)	\
 | 
			
		||||
  SE=st.GetEntry(ptype,Dir+skew,sF);			\
 | 
			
		||||
  offset = SE->_offset;					\
 | 
			
		||||
  local  = SE->_is_local;				\
 | 
			
		||||
  perm   = SE->_permute;				\
 | 
			
		||||
  if ( local ) {					\
 | 
			
		||||
    LOAD_CHI(in);				\
 | 
			
		||||
    LOAD_CHI(Perm,in);					\
 | 
			
		||||
    if ( perm) {					\
 | 
			
		||||
      PERMUTE_DIR(Perm);				\
 | 
			
		||||
    }							\
 | 
			
		||||
  } else if ( st.same_node[Dir] ) {			\
 | 
			
		||||
    LOAD_CHI(buf);					\
 | 
			
		||||
    LOAD_CHI_COMMS(buf);				\
 | 
			
		||||
  }							\
 | 
			
		||||
  if (local || st.same_node[Dir] ) {		\
 | 
			
		||||
    MULT_ADD(U,Dir,even);				\
 | 
			
		||||
@@ -140,10 +158,32 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
  local  = SE->_is_local;				\
 | 
			
		||||
  if ((!local) && (!st.same_node[Dir]) ) {		\
 | 
			
		||||
    nmu++;							\
 | 
			
		||||
    { LOAD_CHI(buf);	  }					\
 | 
			
		||||
    { LOAD_CHI_COMMS(buf);	  }				\
 | 
			
		||||
    { MULT_ADD(U,Dir,even); }					\
 | 
			
		||||
  }								
 | 
			
		||||
 | 
			
		||||
#define HAND_DECLARATIONS(Simd) \
 | 
			
		||||
  Simd even_0;			\
 | 
			
		||||
  Simd even_1;			\
 | 
			
		||||
  Simd even_2;			\
 | 
			
		||||
  Simd odd_0;			\
 | 
			
		||||
  Simd odd_1;			\
 | 
			
		||||
  Simd odd_2;		        \
 | 
			
		||||
		      		\
 | 
			
		||||
  Simd Chi_0;			\
 | 
			
		||||
  Simd Chi_1;			\
 | 
			
		||||
  Simd Chi_2;			\
 | 
			
		||||
				\
 | 
			
		||||
  Simd U_00;			\
 | 
			
		||||
  Simd U_10;			\
 | 
			
		||||
  Simd U_20;			\
 | 
			
		||||
  Simd U_01;			\
 | 
			
		||||
  Simd U_11;			\
 | 
			
		||||
  Simd U_21;			\
 | 
			
		||||
  Simd U_02;			\
 | 
			
		||||
  Simd U_12;			\
 | 
			
		||||
  Simd U_22;			
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
template <int Naik> accelerator_inline
 | 
			
		||||
@@ -155,28 +195,14 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  Simd even_0; // 12 regs on knc
 | 
			
		||||
  Simd even_1;
 | 
			
		||||
  Simd even_2;
 | 
			
		||||
  Simd odd_0; // 12 regs on knc
 | 
			
		||||
  Simd odd_1;
 | 
			
		||||
  Simd odd_2;
 | 
			
		||||
 | 
			
		||||
  Simd Chi_0;    // two spinor; 6 regs
 | 
			
		||||
  Simd Chi_1;
 | 
			
		||||
  Simd Chi_2;
 | 
			
		||||
  
 | 
			
		||||
  Simd U_00;  // two rows of U matrix
 | 
			
		||||
  Simd U_10;
 | 
			
		||||
  Simd U_20;  
 | 
			
		||||
  Simd U_01;
 | 
			
		||||
  Simd U_11;
 | 
			
		||||
  Simd U_21;  // 2 reg left.
 | 
			
		||||
  Simd U_02;
 | 
			
		||||
  Simd U_12;
 | 
			
		||||
  Simd U_22; 
 | 
			
		||||
  const int Nsimd = SiteHalfSpinor::Nsimd();
 | 
			
		||||
  const int lane=acceleratorSIMTlane(Nsimd);
 | 
			
		||||
  typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
 | 
			
		||||
  HAND_DECLARATIONS(Simt);
 | 
			
		||||
 | 
			
		||||
  SiteSpinor result;
 | 
			
		||||
  typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
 | 
			
		||||
  calcSiteSpinor result;
 | 
			
		||||
  int offset,local,perm, ptype;
 | 
			
		||||
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
@@ -215,7 +241,7 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
 | 
			
		||||
      result()()(1) = even_1 + odd_1;
 | 
			
		||||
      result()()(2) = even_2 + odd_2;
 | 
			
		||||
    }
 | 
			
		||||
    vstream(out[sF],result);
 | 
			
		||||
    coalescedWrite(out[sF],result);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -230,28 +256,13 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  Simd even_0; // 12 regs on knc
 | 
			
		||||
  Simd even_1;
 | 
			
		||||
  Simd even_2;
 | 
			
		||||
  Simd odd_0; // 12 regs on knc
 | 
			
		||||
  Simd odd_1;
 | 
			
		||||
  Simd odd_2;
 | 
			
		||||
  const int Nsimd = SiteHalfSpinor::Nsimd();
 | 
			
		||||
  const int lane=acceleratorSIMTlane(Nsimd);
 | 
			
		||||
  typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
 | 
			
		||||
  HAND_DECLARATIONS(Simt);
 | 
			
		||||
 | 
			
		||||
  Simd Chi_0;    // two spinor; 6 regs
 | 
			
		||||
  Simd Chi_1;
 | 
			
		||||
  Simd Chi_2;
 | 
			
		||||
  
 | 
			
		||||
  Simd U_00;  // two rows of U matrix
 | 
			
		||||
  Simd U_10;
 | 
			
		||||
  Simd U_20;  
 | 
			
		||||
  Simd U_01;
 | 
			
		||||
  Simd U_11;
 | 
			
		||||
  Simd U_21;  // 2 reg left.
 | 
			
		||||
  Simd U_02;
 | 
			
		||||
  Simd U_12;
 | 
			
		||||
  Simd U_22; 
 | 
			
		||||
 | 
			
		||||
  SiteSpinor result;
 | 
			
		||||
  typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
 | 
			
		||||
  calcSiteSpinor result;
 | 
			
		||||
  int offset, ptype, local, perm;
 | 
			
		||||
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
@@ -261,8 +272,8 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
 | 
			
		||||
  //    int sF=s+LLs*sU;
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
    even_0 = Zero();    even_1 = Zero();    even_2 = Zero();
 | 
			
		||||
     odd_0 = Zero();     odd_1 = Zero();     odd_2 = Zero();
 | 
			
		||||
    zeroit(even_0);    zeroit(even_1);    zeroit(even_2);
 | 
			
		||||
    zeroit(odd_0);    zeroit(odd_1);    zeroit(odd_2);
 | 
			
		||||
 | 
			
		||||
    skew = 0;
 | 
			
		||||
    HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);  
 | 
			
		||||
@@ -294,7 +305,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
 | 
			
		||||
      result()()(1) = even_1 + odd_1;
 | 
			
		||||
      result()()(2) = even_2 + odd_2;
 | 
			
		||||
    }
 | 
			
		||||
    vstream(out[sF],result);
 | 
			
		||||
    coalescedWrite(out[sF],result);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -309,28 +320,13 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  Simd even_0; // 12 regs on knc
 | 
			
		||||
  Simd even_1;
 | 
			
		||||
  Simd even_2;
 | 
			
		||||
  Simd odd_0; // 12 regs on knc
 | 
			
		||||
  Simd odd_1;
 | 
			
		||||
  Simd odd_2;
 | 
			
		||||
  const int Nsimd = SiteHalfSpinor::Nsimd();
 | 
			
		||||
  const int lane=acceleratorSIMTlane(Nsimd);
 | 
			
		||||
  typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
 | 
			
		||||
  HAND_DECLARATIONS(Simt);
 | 
			
		||||
 | 
			
		||||
  Simd Chi_0;    // two spinor; 6 regs
 | 
			
		||||
  Simd Chi_1;
 | 
			
		||||
  Simd Chi_2;
 | 
			
		||||
  
 | 
			
		||||
  Simd U_00;  // two rows of U matrix
 | 
			
		||||
  Simd U_10;
 | 
			
		||||
  Simd U_20;  
 | 
			
		||||
  Simd U_01;
 | 
			
		||||
  Simd U_11;
 | 
			
		||||
  Simd U_21;  // 2 reg left.
 | 
			
		||||
  Simd U_02;
 | 
			
		||||
  Simd U_12;
 | 
			
		||||
  Simd U_22; 
 | 
			
		||||
 | 
			
		||||
  SiteSpinor result;
 | 
			
		||||
  typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
 | 
			
		||||
  calcSiteSpinor result;
 | 
			
		||||
  int offset, ptype, local;
 | 
			
		||||
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
@@ -340,8 +336,8 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
 | 
			
		||||
  //    int sF=s+LLs*sU;
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
    even_0 = Zero();    even_1 = Zero();    even_2 = Zero();
 | 
			
		||||
     odd_0 = Zero();     odd_1 = Zero();     odd_2 = Zero();
 | 
			
		||||
    zeroit(even_0);    zeroit(even_1);    zeroit(even_2);
 | 
			
		||||
    zeroit(odd_0);    zeroit(odd_1);    zeroit(odd_2);
 | 
			
		||||
    int nmu=0;
 | 
			
		||||
    skew = 0;
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);  
 | 
			
		||||
@@ -374,7 +370,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
 | 
			
		||||
	result()()(1) = even_1 + odd_1;
 | 
			
		||||
	result()()(2) = even_2 + odd_2;
 | 
			
		||||
      }
 | 
			
		||||
      out[sF] = out[sF] + result;
 | 
			
		||||
      coalescedWrite(out[sF] , out(sF)+ result);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@@ -397,6 +393,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
 | 
			
		||||
						     const FermionFieldView &in, FermionFieldView &out, int dag); \
 | 
			
		||||
*/
 | 
			
		||||
#undef LOAD_CHI
 | 
			
		||||
#undef HAND_DECLARATIONS
 | 
			
		||||
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -35,39 +35,32 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink)		\
 | 
			
		||||
  SE = st.GetEntry(ptype, Dir+skew, sF);			\
 | 
			
		||||
  if (SE->_is_local ) {						\
 | 
			
		||||
    if (SE->_permute) {						\
 | 
			
		||||
      chi_p = χ						\
 | 
			
		||||
      permute(chi,  in[SE->_offset], ptype);			\
 | 
			
		||||
    } else {							\
 | 
			
		||||
      chi_p = &in[SE->_offset];					\
 | 
			
		||||
    }								\
 | 
			
		||||
    int perm= SE->_permute;						\
 | 
			
		||||
    chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\
 | 
			
		||||
  } else {							\
 | 
			
		||||
    chi_p = &buf[SE->_offset];					\
 | 
			
		||||
    chi = coalescedRead(buf[SE->_offset],lane);			\
 | 
			
		||||
  }								\
 | 
			
		||||
  multLink(Uchi, U[sU], *chi_p, Dir);			
 | 
			
		||||
  acceleratorSynchronise();					\
 | 
			
		||||
  multLink(Uchi, U[sU], chi, Dir);			
 | 
			
		||||
 | 
			
		||||
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink)		\
 | 
			
		||||
  SE = st.GetEntry(ptype, Dir+skew, sF);			\
 | 
			
		||||
  if (SE->_is_local ) {						\
 | 
			
		||||
    if (SE->_permute) {						\
 | 
			
		||||
      chi_p = χ						\
 | 
			
		||||
      permute(chi,  in[SE->_offset], ptype);			\
 | 
			
		||||
    } else {							\
 | 
			
		||||
      chi_p = &in[SE->_offset];					\
 | 
			
		||||
    }								\
 | 
			
		||||
    int perm= SE->_permute;						\
 | 
			
		||||
    chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\
 | 
			
		||||
  } else if ( st.same_node[Dir] ) {				\
 | 
			
		||||
    chi_p = &buf[SE->_offset];					\
 | 
			
		||||
    chi = coalescedRead(buf[SE->_offset],lane);                 \
 | 
			
		||||
  }								\
 | 
			
		||||
  if (SE->_is_local || st.same_node[Dir] ) {			\
 | 
			
		||||
    multLink(Uchi, U[sU], *chi_p, Dir);				\
 | 
			
		||||
    multLink(Uchi, U[sU], chi, Dir);				\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink)		\
 | 
			
		||||
  SE = st.GetEntry(ptype, Dir+skew, sF);			\
 | 
			
		||||
  if ((!SE->_is_local) && (!st.same_node[Dir]) ) {		\
 | 
			
		||||
    nmu++;							\
 | 
			
		||||
    chi_p = &buf[SE->_offset];					\
 | 
			
		||||
    multLink(Uchi, U[sU], *chi_p, Dir);				\
 | 
			
		||||
    chi = coalescedRead(buf[SE->_offset],lane);			\
 | 
			
		||||
    multLink(Uchi, U[sU], chi, Dir);				\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
@@ -84,12 +77,14 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
 | 
			
		||||
					     SiteSpinor *buf, int sF, int sU, 
 | 
			
		||||
					     const FermionFieldView &in, FermionFieldView &out, int dag) 
 | 
			
		||||
{
 | 
			
		||||
  const SiteSpinor *chi_p;
 | 
			
		||||
  SiteSpinor chi;
 | 
			
		||||
  SiteSpinor Uchi;
 | 
			
		||||
  typedef decltype(coalescedRead(in[0])) calcSpinor;
 | 
			
		||||
  calcSpinor chi;
 | 
			
		||||
  calcSpinor Uchi;
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int ptype;
 | 
			
		||||
  int skew;
 | 
			
		||||
  const int Nsimd = SiteHalfSpinor::Nsimd();
 | 
			
		||||
  const int lane=acceleratorSIMTlane(Nsimd);
 | 
			
		||||
 | 
			
		||||
  //  for(int s=0;s<LLs;s++){
 | 
			
		||||
  //
 | 
			
		||||
@@ -118,7 +113,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
 | 
			
		||||
    if ( dag ) { 
 | 
			
		||||
      Uchi = - Uchi;
 | 
			
		||||
    } 
 | 
			
		||||
    vstream(out[sF], Uchi);
 | 
			
		||||
    coalescedWrite(out[sF], Uchi,lane);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@@ -130,13 +125,16 @@ template <int Naik> accelerator_inline
 | 
			
		||||
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st, 
 | 
			
		||||
						DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
 | 
			
		||||
						SiteSpinor *buf, int sF, int sU, 
 | 
			
		||||
						const FermionFieldView &in, FermionFieldView &out,int dag) {
 | 
			
		||||
  const SiteSpinor *chi_p;
 | 
			
		||||
  SiteSpinor chi;
 | 
			
		||||
  SiteSpinor Uchi;
 | 
			
		||||
						const FermionFieldView &in, FermionFieldView &out,int dag)
 | 
			
		||||
{
 | 
			
		||||
  typedef decltype(coalescedRead(in[0])) calcSpinor;
 | 
			
		||||
  calcSpinor chi;
 | 
			
		||||
  calcSpinor Uchi;
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int ptype;
 | 
			
		||||
  int skew ;
 | 
			
		||||
  const int Nsimd = SiteHalfSpinor::Nsimd();
 | 
			
		||||
  const int lane=acceleratorSIMTlane(Nsimd);
 | 
			
		||||
 | 
			
		||||
  //  for(int s=0;s<LLs;s++){
 | 
			
		||||
  //    int sF=LLs*sU+s;
 | 
			
		||||
@@ -165,7 +163,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
 | 
			
		||||
    if ( dag ) {
 | 
			
		||||
      Uchi = - Uchi;
 | 
			
		||||
    }
 | 
			
		||||
    vstream(out[sF], Uchi);
 | 
			
		||||
    coalescedWrite(out[sF], Uchi,lane);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@@ -178,14 +176,17 @@ template <int Naik> accelerator_inline
 | 
			
		||||
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st, 
 | 
			
		||||
						DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
 | 
			
		||||
						SiteSpinor *buf, int sF, int sU,
 | 
			
		||||
						const FermionFieldView &in, FermionFieldView &out,int dag) {
 | 
			
		||||
  const SiteSpinor *chi_p;
 | 
			
		||||
  //  SiteSpinor chi;
 | 
			
		||||
  SiteSpinor Uchi;
 | 
			
		||||
						const FermionFieldView &in, FermionFieldView &out,int dag)
 | 
			
		||||
{
 | 
			
		||||
  typedef decltype(coalescedRead(in[0])) calcSpinor;
 | 
			
		||||
  calcSpinor chi;
 | 
			
		||||
  calcSpinor Uchi;
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int ptype;
 | 
			
		||||
  int nmu=0;
 | 
			
		||||
  int skew ;
 | 
			
		||||
  const int Nsimd = SiteHalfSpinor::Nsimd();
 | 
			
		||||
  const int lane=acceleratorSIMTlane(Nsimd);
 | 
			
		||||
 | 
			
		||||
  //  for(int s=0;s<LLs;s++){
 | 
			
		||||
  //    int sF=LLs*sU+s;
 | 
			
		||||
@@ -211,11 +212,12 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
 | 
			
		||||
    }
 | 
			
		||||
    if ( nmu ) { 
 | 
			
		||||
      if ( dag ) { 
 | 
			
		||||
	out[sF] = out[sF] - Uchi;
 | 
			
		||||
    if ( nmu ) {
 | 
			
		||||
      auto _out = coalescedRead(out[sF],lane);
 | 
			
		||||
      if ( dag ) {
 | 
			
		||||
	coalescedWrite(out[sF], _out-Uchi,lane);
 | 
			
		||||
      } else { 
 | 
			
		||||
	out[sF] = out[sF] + Uchi;
 | 
			
		||||
	coalescedWrite(out[sF], _out+Uchi,lane);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
@@ -261,6 +263,8 @@ void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
 | 
			
		||||
  GridBase *FGrid=in.Grid();  
 | 
			
		||||
  GridBase *UGrid=U.Grid();  
 | 
			
		||||
  typedef StaggeredKernels<Impl> ThisKernel;
 | 
			
		||||
  const int Nsimd = SiteHalfSpinor::Nsimd();
 | 
			
		||||
  const int lane=acceleratorSIMTlane(Nsimd);
 | 
			
		||||
  autoView( UUU_v , UUU, AcceleratorRead);
 | 
			
		||||
  autoView( U_v   ,   U, AcceleratorRead);
 | 
			
		||||
  autoView( in_v  ,  in, AcceleratorRead);
 | 
			
		||||
@@ -301,6 +305,8 @@ void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo,
 | 
			
		||||
  GridBase *FGrid=in.Grid();  
 | 
			
		||||
  GridBase *UGrid=U.Grid();  
 | 
			
		||||
  typedef StaggeredKernels<Impl> ThisKernel;
 | 
			
		||||
  const int Nsimd = SiteHalfSpinor::Nsimd();
 | 
			
		||||
  const int lane=acceleratorSIMTlane(Nsimd);
 | 
			
		||||
  autoView( UUU_v ,   U, AcceleratorRead);
 | 
			
		||||
  autoView( U_v   ,   U, AcceleratorRead);
 | 
			
		||||
  autoView( in_v  ,  in, AcceleratorRead);
 | 
			
		||||
 
 | 
			
		||||
@@ -85,21 +85,18 @@ public:
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogDebug << "Stout smearing started\n";
 | 
			
		||||
 | 
			
		||||
    // Smear the configurations
 | 
			
		||||
    // C contains the staples multiplied by some rho
 | 
			
		||||
    u_smr = U ; // set the smeared field to the current gauge field
 | 
			
		||||
    SmearBase->smear(C, U);
 | 
			
		||||
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
      if( mu == OrthogDim )
 | 
			
		||||
        tmp = 1.0;  // Don't smear in the orthogonal direction
 | 
			
		||||
      else {
 | 
			
		||||
        tmp = peekLorentz(C, mu);
 | 
			
		||||
        Umu = peekLorentz(U, mu);
 | 
			
		||||
        iq_mu = Ta(
 | 
			
		||||
                   tmp *
 | 
			
		||||
                   adj(Umu));  // iq_mu = Ta(Omega_mu) to match the signs with the paper
 | 
			
		||||
        exponentiate_iQ(tmp, iq_mu);
 | 
			
		||||
      }
 | 
			
		||||
      pokeLorentz(u_smr, tmp * Umu, mu);  // u_smr = exp(iQ_mu)*U_mu
 | 
			
		||||
      if( mu == OrthogDim ) continue ;
 | 
			
		||||
      // u_smr = exp(iQ_mu)*U_mu apart from Orthogdim
 | 
			
		||||
      Umu = peekLorentz(U, mu);
 | 
			
		||||
      tmp = peekLorentz(C, mu);
 | 
			
		||||
      iq_mu = Ta( tmp * adj(Umu));  
 | 
			
		||||
      exponentiate_iQ(tmp, iq_mu);
 | 
			
		||||
      pokeLorentz(u_smr, tmp * Umu, mu);
 | 
			
		||||
    }
 | 
			
		||||
    std::cout << GridLogDebug << "Stout smearing completed\n";
 | 
			
		||||
  };
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										35
									
								
								Grid/serialisation/BaseIO.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								Grid/serialisation/BaseIO.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,35 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/serialisation/BaseIO.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Michael Marshall <michael.marshall@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/GridCore.h>
 | 
			
		||||
 | 
			
		||||
NAMESPACE_BEGIN(Grid)
 | 
			
		||||
 | 
			
		||||
std::uint64_t EigenIO::EigenResizeCounter(0);
 | 
			
		||||
 | 
			
		||||
NAMESPACE_END(Grid)
 | 
			
		||||
@@ -9,6 +9,7 @@
 | 
			
		||||
Author: Antonin Portelli <antonin.portelli@me.com>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
Author: Michael Marshall <michael.marshall@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
@@ -30,6 +31,7 @@ Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
#ifndef GRID_SERIALISATION_ABSTRACT_READER_H
 | 
			
		||||
#define GRID_SERIALISATION_ABSTRACT_READER_H
 | 
			
		||||
 | 
			
		||||
#include <atomic>
 | 
			
		||||
#include <type_traits>
 | 
			
		||||
#include <Grid/tensors/Tensors.h>
 | 
			
		||||
#include <Grid/serialisation/VectorUtils.h>
 | 
			
		||||
@@ -110,6 +112,10 @@ namespace Grid {
 | 
			
		||||
    template <typename ET>
 | 
			
		||||
    inline typename std::enable_if<is_tensor_of_container<ET>::value, typename Traits<ET>::scalar_type *>::type
 | 
			
		||||
    getFirstScalar(ET &eigenTensor) { return eigenTensor.data()->begin(); }
 | 
			
		||||
 | 
			
		||||
    // Counter for resized EigenTensors (poor man's substitute for allocator)
 | 
			
		||||
    // Defined in BinaryIO.cc
 | 
			
		||||
    extern std::uint64_t EigenResizeCounter;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Abstract writer/reader classes ////////////////////////////////////////////
 | 
			
		||||
@@ -497,8 +503,14 @@ namespace Grid {
 | 
			
		||||
  typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
 | 
			
		||||
  Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
 | 
			
		||||
  {
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
    // The memory counter is the reason this must be done from the primary thread
 | 
			
		||||
    assert(omp_in_parallel()==0 && "Deserialisation which resizes Eigen tensor must happen from primary thread");
 | 
			
		||||
#endif
 | 
			
		||||
    EigenIO::EigenResizeCounter -= static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar);
 | 
			
		||||
    //t.reshape( dims );
 | 
			
		||||
    t.resize( dims );
 | 
			
		||||
    EigenIO::EigenResizeCounter += static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,34 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 
 | 
			
		||||
 Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 
 | 
			
		||||
 Source file: ./Grid/serialisation/VectorUtils.h
 | 
			
		||||
 
 | 
			
		||||
 Copyright (C) 2015
 | 
			
		||||
 
 | 
			
		||||
 Author: Antonin Portelli <antonin.portelli@me.com>
 | 
			
		||||
 Author: Peter Boyle <paboyle@ed.ac.uk>
 | 
			
		||||
 Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 Author: Michael Marshall <michael.marshall@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
 This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 it under the terms of the GNU General Public License as published by
 | 
			
		||||
 the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 (at your option) any later version.
 | 
			
		||||
 
 | 
			
		||||
 This program is distributed in the hope that it will be useful,
 | 
			
		||||
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 GNU General Public License for more details.
 | 
			
		||||
 
 | 
			
		||||
 You should have received a copy of the GNU General Public License along
 | 
			
		||||
 with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 
 | 
			
		||||
 See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
 *************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,34 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 
 | 
			
		||||
 Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 
 | 
			
		||||
 Source file: ./Grid/serialisation/VectorUtils.h
 | 
			
		||||
 
 | 
			
		||||
 Copyright (C) 2015
 | 
			
		||||
 
 | 
			
		||||
 Author: Peter Boyle <paboyle@ed.ac.uk>
 | 
			
		||||
 Author: Antonin Portelli <antonin.portelli@me.com>
 | 
			
		||||
 Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 Author: Michael Marshall <michael.marshall@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
 This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 it under the terms of the GNU General Public License as published by
 | 
			
		||||
 the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 (at your option) any later version.
 | 
			
		||||
 
 | 
			
		||||
 This program is distributed in the hope that it will be useful,
 | 
			
		||||
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 GNU General Public License for more details.
 | 
			
		||||
 
 | 
			
		||||
 You should have received a copy of the GNU General Public License along
 | 
			
		||||
 with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 
 | 
			
		||||
 See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
 *************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef GRID_SERIALISATION_HDF5_H
 | 
			
		||||
#define GRID_SERIALISATION_HDF5_H
 | 
			
		||||
 | 
			
		||||
@@ -34,11 +65,13 @@ namespace Grid
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    void writeDefault(const std::string &s, const U &x);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    typename std::enable_if<element<std::vector<U>>::is_number, void>::type
 | 
			
		||||
    void writeRagged(const std::string &s, const std::vector<U> &x);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
 | 
			
		||||
    writeDefault(const std::string &s, const std::vector<U> &x);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
 | 
			
		||||
    writeDefault(const std::string &s, const std::vector<U> &x);
 | 
			
		||||
    typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
 | 
			
		||||
    writeDefault(const std::string &s, const std::vector<U> &x) { writeRagged(s, x); }
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
 | 
			
		||||
    H5NS::Group & getGroup(void);
 | 
			
		||||
@@ -64,11 +97,13 @@ namespace Grid
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    void readDefault(const std::string &s, U &output);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    typename std::enable_if<element<std::vector<U>>::is_number, void>::type
 | 
			
		||||
    void readRagged(const std::string &s, std::vector<U> &x);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
 | 
			
		||||
    readDefault(const std::string &s, std::vector<U> &x);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
 | 
			
		||||
    readDefault(const std::string &s, std::vector<U> &x);
 | 
			
		||||
    typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
 | 
			
		||||
    readDefault(const std::string &s, std::vector<U> &x) { readRagged(s, x); }
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
 | 
			
		||||
    H5NS::Group & getGroup(void);
 | 
			
		||||
@@ -176,24 +211,30 @@ namespace Grid
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  typename std::enable_if<element<std::vector<U>>::is_number, void>::type
 | 
			
		||||
  typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
 | 
			
		||||
  Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
 | 
			
		||||
  {
 | 
			
		||||
    // alias to element type
 | 
			
		||||
    typedef typename element<std::vector<U>>::type Element;
 | 
			
		||||
    
 | 
			
		||||
    // flatten the vector and getting dimensions
 | 
			
		||||
    Flatten<std::vector<U>> flat(x);
 | 
			
		||||
    std::vector<size_t> dim;
 | 
			
		||||
    const auto           &flatx = flat.getFlatVector();
 | 
			
		||||
    for (auto &d: flat.getDim())
 | 
			
		||||
      dim.push_back(d);
 | 
			
		||||
    writeMultiDim<Element>(s, dim, &flatx[0], flatx.size());
 | 
			
		||||
    if (isRegularShape(x))
 | 
			
		||||
    {
 | 
			
		||||
      // alias to element type
 | 
			
		||||
      using Scalar = typename is_flattenable<std::vector<U>>::type;
 | 
			
		||||
      
 | 
			
		||||
      // flatten the vector and getting dimensions
 | 
			
		||||
      Flatten<std::vector<U>> flat(x);
 | 
			
		||||
      std::vector<size_t> dim;
 | 
			
		||||
      const auto           &flatx = flat.getFlatVector();
 | 
			
		||||
      for (auto &d: flat.getDim())
 | 
			
		||||
        dim.push_back(d);
 | 
			
		||||
      writeMultiDim<Scalar>(s, dim, &flatx[0], flatx.size());
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      writeRagged(s, x);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
 | 
			
		||||
  Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
 | 
			
		||||
  void Hdf5Writer::writeRagged(const std::string &s, const std::vector<U> &x)
 | 
			
		||||
  {
 | 
			
		||||
    push(s);
 | 
			
		||||
    writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size",
 | 
			
		||||
@@ -229,7 +270,7 @@ namespace Grid
 | 
			
		||||
  void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
 | 
			
		||||
  {
 | 
			
		||||
    // alias to element type
 | 
			
		||||
    typedef typename element<std::vector<U>>::type Element;
 | 
			
		||||
    using Scalar = typename is_flattenable<std::vector<U>>::type;
 | 
			
		||||
    
 | 
			
		||||
    // read the dimensions
 | 
			
		||||
    H5NS::DataSpace       dataSpace;
 | 
			
		||||
@@ -260,37 +301,44 @@ namespace Grid
 | 
			
		||||
      H5NS::DataSet dataSet;
 | 
			
		||||
      
 | 
			
		||||
      dataSet = group_.openDataSet(s);
 | 
			
		||||
      dataSet.read(buf.data(), Hdf5Type<Element>::type());
 | 
			
		||||
      dataSet.read(buf.data(), Hdf5Type<Scalar>::type());
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      H5NS::Attribute attribute;
 | 
			
		||||
      
 | 
			
		||||
      attribute = group_.openAttribute(s);
 | 
			
		||||
      attribute.read(Hdf5Type<Element>::type(), buf.data());
 | 
			
		||||
      attribute.read(Hdf5Type<Scalar>::type(), buf.data());
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  typename std::enable_if<element<std::vector<U>>::is_number, void>::type
 | 
			
		||||
  typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
 | 
			
		||||
  Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
 | 
			
		||||
  {
 | 
			
		||||
    // alias to element type
 | 
			
		||||
    typedef typename element<std::vector<U>>::type Element;
 | 
			
		||||
    if (H5Lexists        (group_.getId(), s.c_str(), H5P_DEFAULT) > 0
 | 
			
		||||
     && H5Aexists_by_name(group_.getId(), s.c_str(), HDF5_GRID_GUARD "vector_size", H5P_DEFAULT ) > 0)
 | 
			
		||||
    {
 | 
			
		||||
      readRagged(s, x);
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      // alias to element type
 | 
			
		||||
      using Scalar = typename is_flattenable<std::vector<U>>::type;
 | 
			
		||||
 | 
			
		||||
    std::vector<size_t>   dim;
 | 
			
		||||
    std::vector<Element>  buf;
 | 
			
		||||
    readMultiDim( s, buf, dim );
 | 
			
		||||
      std::vector<size_t>   dim;
 | 
			
		||||
      std::vector<Scalar>   buf;
 | 
			
		||||
      readMultiDim( s, buf, dim );
 | 
			
		||||
 | 
			
		||||
    // reconstruct the multidimensional vector
 | 
			
		||||
    Reconstruct<std::vector<U>> r(buf, dim);
 | 
			
		||||
    
 | 
			
		||||
    x = r.getVector();
 | 
			
		||||
      // reconstruct the multidimensional vector
 | 
			
		||||
      Reconstruct<std::vector<U>> r(buf, dim);
 | 
			
		||||
 | 
			
		||||
      x = r.getVector();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
 | 
			
		||||
  Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
 | 
			
		||||
  void Hdf5Reader::readRagged(const std::string &s, std::vector<U> &x)
 | 
			
		||||
  {
 | 
			
		||||
    uint64_t size;
 | 
			
		||||
    
 | 
			
		||||
 
 | 
			
		||||
@@ -118,13 +118,13 @@ static inline std::string SerialisableClassName(void) {return std::string(#cname
 | 
			
		||||
static constexpr bool isEnum = false; \
 | 
			
		||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\
 | 
			
		||||
template <typename T>\
 | 
			
		||||
static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \
 | 
			
		||||
static inline void write(::Grid::Writer<T> &WR,const std::string &s, const cname &obj){ \
 | 
			
		||||
  push(WR,s);\
 | 
			
		||||
  GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__))	\
 | 
			
		||||
  pop(WR);\
 | 
			
		||||
}\
 | 
			
		||||
template <typename T>\
 | 
			
		||||
static inline void read(Reader<T> &RD,const std::string &s, cname &obj){	\
 | 
			
		||||
static inline void read(::Grid::Reader<T> &RD,const std::string &s, cname &obj){	\
 | 
			
		||||
  if (!push(RD,s))\
 | 
			
		||||
  {\
 | 
			
		||||
    std::cout << ::Grid::GridLogWarning << "IO: Cannot open node '" << s << "'" << std::endl; \
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,8 @@
 | 
			
		||||
 Author: Antonin Portelli <antonin.portelli@me.com>
 | 
			
		||||
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 
 | 
			
		||||
 Author: Michael Marshall <michael.marshall@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
 This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 it under the terms of the GNU General Public License as published by
 | 
			
		||||
 the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
@@ -236,21 +237,36 @@ namespace Grid {
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Vector element trait //////////////////////////////////////////////////////  
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  struct element
 | 
			
		||||
  // is_flattenable<T>::value is true if T is a std::vector<> which can be flattened //////////////////////
 | 
			
		||||
  template <typename T, typename V = void>
 | 
			
		||||
  struct is_flattenable : std::false_type
 | 
			
		||||
  {
 | 
			
		||||
    typedef T type;
 | 
			
		||||
    static constexpr bool is_number = false;
 | 
			
		||||
    using type      = T;
 | 
			
		||||
    using grid_type = T;
 | 
			
		||||
    static constexpr int vecRank = 0;
 | 
			
		||||
    static constexpr bool isGridTensor = false;
 | 
			
		||||
    static constexpr bool children_flattenable = std::is_arithmetic<T>::value or is_complex<T>::value;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  struct element<std::vector<T>>
 | 
			
		||||
  struct is_flattenable<T, typename std::enable_if<isGridTensor<T>::value>::type> : std::false_type
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename element<T>::type type;
 | 
			
		||||
    static constexpr bool is_number = std::is_arithmetic<T>::value
 | 
			
		||||
                                      or is_complex<T>::value
 | 
			
		||||
                                      or element<T>::is_number;
 | 
			
		||||
    using type      = typename GridTypeMapper<T>::scalar_type;
 | 
			
		||||
    using grid_type = T;
 | 
			
		||||
    static constexpr int vecRank = 0;
 | 
			
		||||
    static constexpr bool isGridTensor = true;
 | 
			
		||||
    static constexpr bool children_flattenable = true;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  struct is_flattenable<std::vector<T>, typename std::enable_if<is_flattenable<T>::children_flattenable>::type>
 | 
			
		||||
  : std::true_type
 | 
			
		||||
  {
 | 
			
		||||
    using type      = typename is_flattenable<T>::type;
 | 
			
		||||
    using grid_type = typename is_flattenable<T>::grid_type;
 | 
			
		||||
    static constexpr bool isGridTensor = is_flattenable<T>::isGridTensor;
 | 
			
		||||
    static constexpr int vecRank = is_flattenable<T>::vecRank + 1;
 | 
			
		||||
    static constexpr bool children_flattenable = true;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  // Vector flattening utility class ////////////////////////////////////////////
 | 
			
		||||
@@ -259,23 +275,30 @@ namespace Grid {
 | 
			
		||||
  class Flatten
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    typedef typename element<V>::type Element;
 | 
			
		||||
    using Scalar  = typename is_flattenable<V>::type;
 | 
			
		||||
    static constexpr bool isGridTensor = is_flattenable<V>::isGridTensor;
 | 
			
		||||
  public:
 | 
			
		||||
    explicit                     Flatten(const V &vector);
 | 
			
		||||
    const V &                    getVector(void);
 | 
			
		||||
    const std::vector<Element> & getFlatVector(void);
 | 
			
		||||
    const std::vector<size_t>  & getDim(void);
 | 
			
		||||
    explicit                    Flatten(const V &vector);
 | 
			
		||||
    const V &                   getVector(void)     const { return vector_; }
 | 
			
		||||
    const std::vector<Scalar> & getFlatVector(void) const { return flatVector_; }
 | 
			
		||||
    const std::vector<size_t> & getDim(void)        const { return dim_; }
 | 
			
		||||
  private:
 | 
			
		||||
    void accumulate(const Element &e);
 | 
			
		||||
    template <typename W>
 | 
			
		||||
    void accumulate(const W &v);
 | 
			
		||||
    void accumulateDim(const Element &e);
 | 
			
		||||
    template <typename W>
 | 
			
		||||
    void accumulateDim(const W &v);
 | 
			
		||||
    template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
    accumulate(const W &e);
 | 
			
		||||
    template <typename W> typename std::enable_if<!is_flattenable<W>::value &&  is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
    accumulate(const W &e);
 | 
			
		||||
    template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
 | 
			
		||||
    accumulate(const W &v);
 | 
			
		||||
    template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
    accumulateDim(const W &e) {} // Innermost is a scalar - do nothing
 | 
			
		||||
    template <typename W> typename std::enable_if<!is_flattenable<W>::value &&  is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
    accumulateDim(const W &e);
 | 
			
		||||
    template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
 | 
			
		||||
    accumulateDim(const W &v);
 | 
			
		||||
  private:
 | 
			
		||||
    const V              &vector_;
 | 
			
		||||
    std::vector<Element> flatVector_;
 | 
			
		||||
    std::vector<size_t>  dim_;
 | 
			
		||||
    const V             &vector_;
 | 
			
		||||
    std::vector<Scalar> flatVector_;
 | 
			
		||||
    std::vector<size_t> dim_;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  // Class to reconstruct a multidimensional std::vector
 | 
			
		||||
@@ -283,38 +306,57 @@ namespace Grid {
 | 
			
		||||
  class Reconstruct
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    typedef typename element<V>::type Element;
 | 
			
		||||
    using Scalar  = typename is_flattenable<V>::type;
 | 
			
		||||
    static constexpr bool isGridTensor = is_flattenable<V>::isGridTensor;
 | 
			
		||||
  public:
 | 
			
		||||
    Reconstruct(const std::vector<Element> &flatVector,
 | 
			
		||||
    Reconstruct(const std::vector<Scalar> &flatVector,
 | 
			
		||||
                const std::vector<size_t> &dim);
 | 
			
		||||
    const V &                    getVector(void);
 | 
			
		||||
    const std::vector<Element> & getFlatVector(void);
 | 
			
		||||
    const std::vector<size_t>  & getDim(void);
 | 
			
		||||
    const V &                   getVector(void)     const { return vector_; }
 | 
			
		||||
    const std::vector<Scalar> & getFlatVector(void) const { return flatVector_; }
 | 
			
		||||
    const std::vector<size_t> & getDim(void)        const { return dim_; }
 | 
			
		||||
  private:
 | 
			
		||||
    void fill(std::vector<Element> &v);
 | 
			
		||||
    template <typename W>
 | 
			
		||||
    void fill(W &v);
 | 
			
		||||
    void resize(std::vector<Element> &v, const unsigned int dim);
 | 
			
		||||
    template <typename W>
 | 
			
		||||
    void resize(W &v, const unsigned int dim);
 | 
			
		||||
    template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
    fill(W &v);
 | 
			
		||||
    template <typename W> typename std::enable_if<!is_flattenable<W>::value &&  is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
    fill(W &v);
 | 
			
		||||
    template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
 | 
			
		||||
    fill(W &v);
 | 
			
		||||
    template <typename W> typename std::enable_if< is_flattenable<W>::value &&  is_flattenable<W>::vecRank==1>::type
 | 
			
		||||
    resize(W &v, const unsigned int dim);
 | 
			
		||||
    template <typename W> typename std::enable_if< is_flattenable<W>::value && (is_flattenable<W>::vecRank>1)>::type
 | 
			
		||||
    resize(W &v, const unsigned int dim);
 | 
			
		||||
    template <typename W> typename std::enable_if<!is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
    checkInnermost(const W &e) {} // Innermost is a scalar - do nothing
 | 
			
		||||
    template <typename W> typename std::enable_if< is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
    checkInnermost(const W &e);
 | 
			
		||||
  private:
 | 
			
		||||
    V                          vector_;
 | 
			
		||||
    const std::vector<Element> &flatVector_;
 | 
			
		||||
    std::vector<size_t>        dim_;
 | 
			
		||||
    size_t                     ind_{0};
 | 
			
		||||
    unsigned int               dimInd_{0};
 | 
			
		||||
    V                         vector_;
 | 
			
		||||
    const std::vector<Scalar> &flatVector_;
 | 
			
		||||
    std::vector<size_t>       dim_;
 | 
			
		||||
    size_t                    ind_{0};
 | 
			
		||||
    unsigned int              dimInd_{0};
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Flatten class template implementation
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  void Flatten<V>::accumulate(const Element &e)
 | 
			
		||||
  template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
  Flatten<V>::accumulate(const W &e)
 | 
			
		||||
  {
 | 
			
		||||
    flatVector_.push_back(e);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  template <typename W>
 | 
			
		||||
  void Flatten<V>::accumulate(const W &v)
 | 
			
		||||
  template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
  Flatten<V>::accumulate(const W &e)
 | 
			
		||||
  {
 | 
			
		||||
    for (const Scalar &x: e) {
 | 
			
		||||
      flatVector_.push_back(x);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
 | 
			
		||||
  Flatten<V>::accumulate(const W &v)
 | 
			
		||||
  {
 | 
			
		||||
    for (auto &e: v)
 | 
			
		||||
    {
 | 
			
		||||
@@ -323,11 +365,17 @@ namespace Grid {
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  void Flatten<V>::accumulateDim(const Element &e) {};
 | 
			
		||||
  template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
  Flatten<V>::accumulateDim(const W &e)
 | 
			
		||||
  {
 | 
			
		||||
    using Traits = GridTypeMapper<typename is_flattenable<W>::grid_type>;
 | 
			
		||||
    for (int rank=0; rank < Traits::Rank; ++rank)
 | 
			
		||||
      dim_.push_back(Traits::Dimension(rank));
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  template <typename W>
 | 
			
		||||
  void Flatten<V>::accumulateDim(const W &v)
 | 
			
		||||
  template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
 | 
			
		||||
  Flatten<V>::accumulateDim(const W &v)
 | 
			
		||||
  {
 | 
			
		||||
    dim_.push_back(v.size());
 | 
			
		||||
    accumulateDim(v[0]);
 | 
			
		||||
@@ -337,42 +385,36 @@ namespace Grid {
 | 
			
		||||
  Flatten<V>::Flatten(const V &vector)
 | 
			
		||||
  : vector_(vector)
 | 
			
		||||
  {
 | 
			
		||||
    accumulate(vector_);
 | 
			
		||||
    accumulateDim(vector_);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const V & Flatten<V>::getVector(void)
 | 
			
		||||
  {
 | 
			
		||||
    return vector_;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const std::vector<typename Flatten<V>::Element> &
 | 
			
		||||
  Flatten<V>::getFlatVector(void)
 | 
			
		||||
  {
 | 
			
		||||
    return flatVector_;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const std::vector<size_t> & Flatten<V>::getDim(void)
 | 
			
		||||
  {
 | 
			
		||||
    return dim_;
 | 
			
		||||
    std::size_t TotalSize{ dim_[0] };
 | 
			
		||||
    for (int i = 1; i < dim_.size(); ++i) {
 | 
			
		||||
      TotalSize *= dim_[i];
 | 
			
		||||
    }
 | 
			
		||||
    flatVector_.reserve(TotalSize);
 | 
			
		||||
    accumulate(vector_);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // Reconstruct class template implementation
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  void Reconstruct<V>::fill(std::vector<Element> &v)
 | 
			
		||||
  template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
  Reconstruct<V>::fill(W &v)
 | 
			
		||||
  {
 | 
			
		||||
    v = flatVector_[ind_++];
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  template <typename W> typename std::enable_if<!is_flattenable<W>::value &&  is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
  Reconstruct<V>::fill(W &v)
 | 
			
		||||
  {
 | 
			
		||||
    for (auto &e: v)
 | 
			
		||||
    {
 | 
			
		||||
      e = flatVector_[ind_++];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  template <typename W>
 | 
			
		||||
  void Reconstruct<V>::fill(W &v)
 | 
			
		||||
  template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
 | 
			
		||||
  Reconstruct<V>::fill(W &v)
 | 
			
		||||
  {
 | 
			
		||||
    for (auto &e: v)
 | 
			
		||||
    {
 | 
			
		||||
@@ -381,14 +423,15 @@ namespace Grid {
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  void Reconstruct<V>::resize(std::vector<Element> &v, const unsigned int dim)
 | 
			
		||||
  template <typename W> typename std::enable_if<is_flattenable<W>::value && is_flattenable<W>::vecRank==1>::type
 | 
			
		||||
  Reconstruct<V>::resize(W &v, const unsigned int dim)
 | 
			
		||||
  {
 | 
			
		||||
    v.resize(dim_[dim]);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  template <typename W>
 | 
			
		||||
  void Reconstruct<V>::resize(W &v, const unsigned int dim)
 | 
			
		||||
  template <typename W> typename std::enable_if<is_flattenable<W>::value && (is_flattenable<W>::vecRank>1)>::type
 | 
			
		||||
  Reconstruct<V>::resize(W &v, const unsigned int dim)
 | 
			
		||||
  {
 | 
			
		||||
    v.resize(dim_[dim]);
 | 
			
		||||
    for (auto &e: v)
 | 
			
		||||
@@ -398,34 +441,31 @@ namespace Grid {
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  Reconstruct<V>::Reconstruct(const std::vector<Element> &flatVector,
 | 
			
		||||
  template <typename W> typename std::enable_if<is_flattenable<W>::isGridTensor>::type
 | 
			
		||||
  Reconstruct<V>::checkInnermost(const W &)
 | 
			
		||||
  {
 | 
			
		||||
    using Traits = GridTypeMapper<typename is_flattenable<W>::grid_type>;
 | 
			
		||||
    const int gridRank{Traits::Rank};
 | 
			
		||||
    const int dimRank{static_cast<int>(dim_.size())};
 | 
			
		||||
    assert(dimRank >= gridRank && "Tensor rank too low for Grid tensor");
 | 
			
		||||
    for (int i=0; i<gridRank; ++i) {
 | 
			
		||||
      assert(dim_[dimRank - gridRank + i] == Traits::Dimension(i) && "Tensor dimension doesn't match Grid tensor");
 | 
			
		||||
    }
 | 
			
		||||
    dim_.resize(dimRank - gridRank);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  Reconstruct<V>::Reconstruct(const std::vector<Scalar> &flatVector,
 | 
			
		||||
                              const std::vector<size_t> &dim)
 | 
			
		||||
  : flatVector_(flatVector)
 | 
			
		||||
  , dim_(dim)
 | 
			
		||||
  {
 | 
			
		||||
    checkInnermost(vector_);
 | 
			
		||||
    assert(dim_.size() == is_flattenable<V>::vecRank && "Tensor rank doesn't match nested std::vector rank");
 | 
			
		||||
    resize(vector_, 0);
 | 
			
		||||
    fill(vector_);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const V & Reconstruct<V>::getVector(void)
 | 
			
		||||
  {
 | 
			
		||||
    return vector_;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const std::vector<typename Reconstruct<V>::Element> &
 | 
			
		||||
  Reconstruct<V>::getFlatVector(void)
 | 
			
		||||
  {
 | 
			
		||||
    return flatVector_;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const std::vector<size_t> & Reconstruct<V>::getDim(void)
 | 
			
		||||
  {
 | 
			
		||||
    return dim_;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Vector IO utilities ///////////////////////////////////////////////////////
 | 
			
		||||
  // helper function to read space-separated values
 | 
			
		||||
  template <typename T>
 | 
			
		||||
@@ -459,6 +499,64 @@ namespace Grid {
 | 
			
		||||
    
 | 
			
		||||
    return os;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // In general, scalar types are considered "flattenable" (regularly shaped)
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  bool isRegularShapeHelper(const std::vector<T> &, std::vector<std::size_t> &, int, bool)
 | 
			
		||||
  {
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  bool isRegularShapeHelper(const std::vector<std::vector<T>> &v, std::vector<std::size_t> &Dims, int Depth, bool bFirst)
 | 
			
		||||
  {
 | 
			
		||||
    if( bFirst)
 | 
			
		||||
    {
 | 
			
		||||
      assert( Dims.size() == Depth     && "Bug: Delete this message after testing" );
 | 
			
		||||
      Dims.push_back(v[0].size());
 | 
			
		||||
      if (!Dims[Depth])
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      assert( Dims.size() >= Depth + 1 && "Bug: Delete this message after testing" );
 | 
			
		||||
    }
 | 
			
		||||
    for (std::size_t i = 0; i < v.size(); ++i)
 | 
			
		||||
    {
 | 
			
		||||
      if (v[i].size() != Dims[Depth] || !isRegularShapeHelper(v[i], Dims, Depth + 1, bFirst && i==0))
 | 
			
		||||
      {
 | 
			
		||||
        return false;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  bool isRegularShape(const T &t) { return true; }
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  bool isRegularShape(const std::vector<T> &v) { return !v.empty(); }
 | 
			
		||||
 | 
			
		||||
  // Return non-zero if all dimensions of this std::vector<std::vector<T>> are regularly shaped
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  bool isRegularShape(const std::vector<std::vector<T>> &v)
 | 
			
		||||
  {
 | 
			
		||||
    if (v.empty() || v[0].empty())
 | 
			
		||||
      return false;
 | 
			
		||||
    // Make sure all of my rows are the same size
 | 
			
		||||
    std::vector<std::size_t> Dims;
 | 
			
		||||
    Dims.reserve(is_flattenable<T>::vecRank);
 | 
			
		||||
    Dims.push_back(v.size());
 | 
			
		||||
    Dims.push_back(v[0].size());
 | 
			
		||||
    for (std::size_t i = 0; i < Dims[0]; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      if (v[i].size() != Dims[1] || !isRegularShapeHelper(v[i], Dims, 2, i==0))
 | 
			
		||||
      {
 | 
			
		||||
        return false;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// helper function to read space-separated values
 | 
			
		||||
 
 | 
			
		||||
@@ -65,7 +65,8 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifndef GRID_SYCL
 | 
			
		||||
//#ifndef GRID_SYCL
 | 
			
		||||
#if 1
 | 
			
		||||
// Use the scalar as our own complex on GPU ... thrust::complex or std::complex
 | 
			
		||||
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
 | 
			
		||||
typename vsimd::scalar_type
 | 
			
		||||
 
 | 
			
		||||
@@ -417,7 +417,7 @@ public:
 | 
			
		||||
      stream << "{";
 | 
			
		||||
      for (int j = 0; j < N; j++) {
 | 
			
		||||
	stream << o._internal[i][j];
 | 
			
		||||
	if (i < N - 1) stream << ",";
 | 
			
		||||
	if (j < N - 1) stream << ",";
 | 
			
		||||
      }
 | 
			
		||||
      stream << "}";
 | 
			
		||||
      if (i != N - 1) stream << "\n\t\t";
 | 
			
		||||
 
 | 
			
		||||
@@ -34,6 +34,16 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
// outerProduct Scalar x Scalar -> Scalar
 | 
			
		||||
//              Vector x Vector -> Matrix
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template<class CC,IfComplex<CC> = 0>
 | 
			
		||||
accelerator_inline CC outerProduct(const CC &l, const CC& r)
 | 
			
		||||
{
 | 
			
		||||
  return l*conj(r);
 | 
			
		||||
}
 | 
			
		||||
template<class RR,IfReal<RR> = 0>
 | 
			
		||||
accelerator_inline RR outerProduct(const RR &l, const RR& r)
 | 
			
		||||
{
 | 
			
		||||
  return l*r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class l,class r,int N> accelerator_inline
 | 
			
		||||
auto outerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iMatrix<decltype(outerProduct(lhs._internal[0],rhs._internal[0])),N>
 | 
			
		||||
@@ -57,17 +67,6 @@ auto outerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<declt
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class CC,IfComplex<CC> = 0>
 | 
			
		||||
accelerator_inline CC outerProduct(const CC &l, const CC& r)
 | 
			
		||||
{
 | 
			
		||||
  return l*conj(r);
 | 
			
		||||
}
 | 
			
		||||
template<class RR,IfReal<RR> = 0>
 | 
			
		||||
accelerator_inline RR outerProduct(const RR &l, const RR& r)
 | 
			
		||||
{
 | 
			
		||||
  return l*r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -457,7 +457,7 @@ accelerator_inline void acceleratorSynchronise(void)
 | 
			
		||||
  __syncwarp();
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef GRID_SYCL
 | 
			
		||||
  cl::sycl::detail::workGroupBarrier();
 | 
			
		||||
  //cl::sycl::detail::workGroupBarrier();
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef GRID_HIP
 | 
			
		||||
  __syncthreads();
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,4 @@
 | 
			
		||||
# Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=GridBasedSoftware_Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid)
 | 
			
		||||
# Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=GridBasedSoftware_Grid&tab=projectOverview) 
 | 
			
		||||
 | 
			
		||||
**Data parallel C++ mathematical object library.**
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							@@ -1787,7 +1787,7 @@ Hdf5Writer     Hdf5Reader       HDF5
 | 
			
		||||
 | 
			
		||||
Write interfaces, similar to the XML facilities in QDP++ are presented. However,
 | 
			
		||||
the serialisation routines are automatically generated by the macro, and a virtual
 | 
			
		||||
reader adn writer interface enables writing to any of a number of formats.
 | 
			
		||||
reader and writer interface enables writing to any of a number of formats.
 | 
			
		||||
 | 
			
		||||
**Example**::
 | 
			
		||||
 | 
			
		||||
@@ -1814,6 +1814,91 @@ reader adn writer interface enables writing to any of a number of formats.
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Eigen tensor support -- added 2019H1
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
The Serialisation library was expanded in 2019 to support de/serialisation of
 | 
			
		||||
Eigen tensors. De/serialisation of existing types was not changed. Data files
 | 
			
		||||
without Eigen tensors remain compatible with earlier versions of Grid and other readers.
 | 
			
		||||
Conversely, data files containing serialised Eigen tensors is a breaking change.
 | 
			
		||||
 | 
			
		||||
Eigen tensor serialisation support was added to BaseIO, which was modified to provide a Traits class
 | 
			
		||||
to recognise Eigen tensors with elements that are either: primitive scalars (arithmetic and complex types);
 | 
			
		||||
or Grid tensors.
 | 
			
		||||
 | 
			
		||||
**Traits determining de/serialisable scalars**::
 | 
			
		||||
 | 
			
		||||
  // Is this an Eigen tensor
 | 
			
		||||
  template<typename T> struct is_tensor : std::integral_constant<bool,
 | 
			
		||||
    std::is_base_of<Eigen::TensorBase<T, Eigen::ReadOnlyAccessors>, T>::value> {};
 | 
			
		||||
  // Is this an Eigen tensor of a supported scalar
 | 
			
		||||
  template<typename T, typename V = void> struct is_tensor_of_scalar : public std::false_type {};
 | 
			
		||||
  template<typename T> struct is_tensor_of_scalar<T, typename std::enable_if<is_tensor<T>::value && is_scalar<typename T::Scalar>::value>::type> : public std::true_type {};
 | 
			
		||||
  // Is this an Eigen tensor of a supported container
 | 
			
		||||
  template<typename T, typename V = void> struct is_tensor_of_container : public std::false_type {};
 | 
			
		||||
  template<typename T> struct is_tensor_of_container<T, typename std::enable_if<is_tensor<T>::value && isGridTensor<typename T::Scalar>::value>::type> : public std::true_type {};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Eigen tensors are regular, multidimensional objects, and each Reader/Writer
 | 
			
		||||
was extended to support this new datatype. Where the Eigen tensor contains
 | 
			
		||||
a Grid tensor, the dimensions of the data written are the dimensions of the
 | 
			
		||||
Eigen tensor plus the dimensions of the underlying Grid scalar. Dimensions
 | 
			
		||||
of size 1 are preserved.
 | 
			
		||||
 | 
			
		||||
**New Reader/Writer methods for multi-dimensional data**::
 | 
			
		||||
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
On readback, the Eigen tensor rank must match the data being read, but the tensor
 | 
			
		||||
dimensions will be resized if necessary. Resizing is not possible for Eigen::TensorMap<T>
 | 
			
		||||
because these tensors use a buffer provided at construction, and this buffer cannot be changed.
 | 
			
		||||
Deserialisation failures cause Grid to assert.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
HDF5 Optimisations -- added June 2021
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
Grid serialisation is intended to be light, deterministic and provide a layer of abstraction over
 | 
			
		||||
multiple file formats. HDF5 excels at handling multi-dimensional data, and the Grid HDF5Reader/HDF5Writer exploits this.
 | 
			
		||||
When serialising nested ``std::vector<T>``, where ``T`` is an arithmetic or complex type,
 | 
			
		||||
the Hdf5Writer writes the data as an Hdf5 DataSet object.
 | 
			
		||||
 | 
			
		||||
However, nested ``std::vector<std::vector<...T>>`` might be "ragged", i.e. not necessarily regular. E.g. a 3d nested
 | 
			
		||||
``std::vector`` might contain 2 rows, the first being a 2x2 block and the second row being a 1 x 2 block.
 | 
			
		||||
A bug existed whereby this was not checked on write, so nested, ragged vectors
 | 
			
		||||
were written as a regular dataset, with a buffer under/overrun and jumbled contents.
 | 
			
		||||
 | 
			
		||||
Clearly this was not used in production, as the bug went undetected until now. Fixing this bug
 | 
			
		||||
is an opportunity to further optimise the HDF5 file format.
 | 
			
		||||
 | 
			
		||||
The goals of this change are to:
 | 
			
		||||
 | 
			
		||||
* Make changes to the Hdf5 file format only -- i.e. do not impact other file formats
 | 
			
		||||
 | 
			
		||||
* Implement file format changes in such a way that they are transparent to the Grid reader
 | 
			
		||||
 | 
			
		||||
* Correct the bug for ragged vectors of numeric / complex types
 | 
			
		||||
 | 
			
		||||
* Extend the support of nested std::vector<T> to arbitrarily nested Grid tensors
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
The trait class ``element`` has been redefined to ``is_flattenable``, which is a trait class for
 | 
			
		||||
potentially "flattenable" objects. These are (possibly nested) ``std::vector<T>`` where ``T`` is
 | 
			
		||||
an arithmetic, complex or Grid tensor type. Flattenable objects are tested on write
 | 
			
		||||
(with the function ``isRegularShape``) to see whether they actually are regular.
 | 
			
		||||
 | 
			
		||||
Flattenable, regular objects are written to a multidimensional HDF5 DataSet.
 | 
			
		||||
Otherwise, an Hdf5 sub group is created with the object "name", and each element of the outer dimension is
 | 
			
		||||
recursively written to as object "name_n", where n is a 0-indexed number.
 | 
			
		||||
 | 
			
		||||
On readback (by Grid)), the presence of a subgroup containing the attribute ``Grid_vector_size`` triggers a
 | 
			
		||||
"ragged read", otherwise a read from a DataSet is attempted.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Data parallel field IO
 | 
			
		||||
-----------------------
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -48,7 +48,9 @@ public:
 | 
			
		||||
                          std::vector<double>, array,
 | 
			
		||||
                          std::vector<std::vector<double> >, twodimarray,
 | 
			
		||||
			  std::vector<std::vector<std::vector<std::complex<double>> > >, cmplx3darray,
 | 
			
		||||
			  SpinColourMatrix, scm
 | 
			
		||||
			  SpinColourMatrix, scm,
 | 
			
		||||
                          std::vector<std::vector<std::vector<int> > >, ragged,
 | 
			
		||||
                          std::vector<std::vector<SpinColourMatrix> >, vscm
 | 
			
		||||
                          );
 | 
			
		||||
  myclass() {}
 | 
			
		||||
  myclass(int i)
 | 
			
		||||
@@ -56,6 +58,10 @@ public:
 | 
			
		||||
  , twodimarray(3,std::vector<double>(5, 1.23456))
 | 
			
		||||
  , cmplx3darray(3,std::vector<std::vector<std::complex<double>>>(5, std::vector<std::complex<double>>(7, std::complex<double>(1.2, 3.4))))
 | 
			
		||||
  , ve(2, myenum::blue)
 | 
			
		||||
  , ragged( {{{i+1},{i+2,i+3}}, // ragged
 | 
			
		||||
            {{i+4,i+5,i+6,i+7},{i+8,i+9,i+10,i+11},{i+12,i+13,i+14,i+15}}, // block
 | 
			
		||||
            {{i+16,i+17},{i+18,i+19,i+20}}} ) //ragged
 | 
			
		||||
  , vscm(3, std::vector<SpinColourMatrix>(5))
 | 
			
		||||
  {
 | 
			
		||||
    e=myenum::red;
 | 
			
		||||
    x=i;
 | 
			
		||||
@@ -68,6 +74,13 @@ public:
 | 
			
		||||
    scm()(0, 2)(1, 1) = 6.336;
 | 
			
		||||
    scm()(2, 1)(2, 2) = 7.344;
 | 
			
		||||
    scm()(1, 1)(2, 0) = 8.3534;
 | 
			
		||||
    int Counter = i;
 | 
			
		||||
    for( auto & v : vscm ) {
 | 
			
		||||
      for( auto & j : v ) {
 | 
			
		||||
        j = std::complex<double>(Counter, -Counter);
 | 
			
		||||
        Counter++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -66,7 +66,9 @@ int main(int argc, char** argv)
 | 
			
		||||
  // Set up RNGs
 | 
			
		||||
  std::vector<int> seeds4({1, 2, 3, 4});
 | 
			
		||||
  std::vector<int> seeds5({5, 6, 7, 8});
 | 
			
		||||
  GridSerialRNG sRNG;
 | 
			
		||||
  GridParallelRNG RNG5(FGrid);
 | 
			
		||||
  sRNG.SeedFixedIntegers(seeds5);
 | 
			
		||||
  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  GridParallelRNG RNG4(UGrid);
 | 
			
		||||
  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
@@ -84,7 +86,7 @@ int main(int argc, char** argv)
 | 
			
		||||
    ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
 | 
			
		||||
    ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, false);
 | 
			
		||||
 | 
			
		||||
    Meofa.refresh(Umu, RNG5);
 | 
			
		||||
    Meofa.refresh(Umu,sRNG, RNG5);
 | 
			
		||||
    printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@@ -94,7 +96,7 @@ int main(int argc, char** argv)
 | 
			
		||||
    ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
 | 
			
		||||
    ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, true);
 | 
			
		||||
 | 
			
		||||
    Meofa.refresh(Umu, RNG5);
 | 
			
		||||
    Meofa.refresh(Umu,sRNG, RNG5);
 | 
			
		||||
    printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -74,6 +74,9 @@ int main(int argc, char** argv)
 | 
			
		||||
  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  GridParallelRNG RNG4(UGrid);
 | 
			
		||||
  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
  GridSerialRNG sRNG;
 | 
			
		||||
  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
  sRNG.SeedFixedIntegers(seeds5);
 | 
			
		||||
 | 
			
		||||
  // Random gauge field
 | 
			
		||||
  LatticeGaugeField Umu(UGrid);
 | 
			
		||||
@@ -90,7 +93,7 @@ int main(int argc, char** argv)
 | 
			
		||||
    ConjugateGradient<FermionField> CG(1.0e-12, 5000);
 | 
			
		||||
    ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, false);
 | 
			
		||||
 | 
			
		||||
    Meofa.refresh(Umu, RNG5);
 | 
			
		||||
    Meofa.refresh(Umu,sRNG, RNG5);
 | 
			
		||||
    printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@@ -100,7 +103,7 @@ int main(int argc, char** argv)
 | 
			
		||||
    ConjugateGradient<FermionField> CG(1.0e-12, 5000);
 | 
			
		||||
    ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, true);
 | 
			
		||||
 | 
			
		||||
    Meofa.refresh(Umu, RNG5);
 | 
			
		||||
    Meofa.refresh(Umu,sRNG, RNG5);
 | 
			
		||||
    printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -68,8 +68,10 @@ int main(int argc, char** argv)
 | 
			
		||||
  // Set up RNGs
 | 
			
		||||
  std::vector<int> seeds4({1, 2, 3, 4});
 | 
			
		||||
  std::vector<int> seeds5({5, 6, 7, 8});
 | 
			
		||||
  GridSerialRNG sRNG;
 | 
			
		||||
  GridParallelRNG RNG5(FGrid);
 | 
			
		||||
  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  sRNG.SeedFixedIntegers(seeds5);
 | 
			
		||||
  GridParallelRNG RNG4(UGrid);
 | 
			
		||||
  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
 | 
			
		||||
@@ -86,7 +88,7 @@ int main(int argc, char** argv)
 | 
			
		||||
    ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
 | 
			
		||||
    ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, false);
 | 
			
		||||
 | 
			
		||||
    Meofa.refresh(Umu, RNG5);
 | 
			
		||||
    Meofa.refresh(Umu, sRNG,RNG5);
 | 
			
		||||
    printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@@ -96,7 +98,7 @@ int main(int argc, char** argv)
 | 
			
		||||
    ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
 | 
			
		||||
    ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, true);
 | 
			
		||||
 | 
			
		||||
    Meofa.refresh(Umu, RNG5);
 | 
			
		||||
    Meofa.refresh(Umu, sRNG,RNG5);
 | 
			
		||||
    printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -73,7 +73,9 @@ int main(int argc, char** argv)
 | 
			
		||||
  std::vector<int> seeds4({1, 2, 3, 4});
 | 
			
		||||
  std::vector<int> seeds5({5, 6, 7, 8});
 | 
			
		||||
  GridParallelRNG RNG5(FGrid);
 | 
			
		||||
  GridSerialRNG   sRNG;
 | 
			
		||||
  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  sRNG.SeedFixedIntegers(seeds5);
 | 
			
		||||
  GridParallelRNG RNG4(UGrid);
 | 
			
		||||
  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
 | 
			
		||||
@@ -91,7 +93,7 @@ int main(int argc, char** argv)
 | 
			
		||||
    ConjugateGradient<FermionField> CG(1.0e-12, 5000);
 | 
			
		||||
    ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, false);
 | 
			
		||||
 | 
			
		||||
    Meofa.refresh(Umu, RNG5);
 | 
			
		||||
    Meofa.refresh(Umu, sRNG, RNG5);
 | 
			
		||||
    printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@@ -101,7 +103,7 @@ int main(int argc, char** argv)
 | 
			
		||||
    ConjugateGradient<FermionField> CG(1.0e-12, 5000);
 | 
			
		||||
    ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, true);
 | 
			
		||||
 | 
			
		||||
    Meofa.refresh(Umu, RNG5);
 | 
			
		||||
    Meofa.refresh(Umu, sRNG, RNG5);
 | 
			
		||||
    printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 ;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
 | 
			
		||||
@@ -59,6 +58,10 @@ int main (int argc, char ** argv)
 | 
			
		||||
  double beta = 1.0;
 | 
			
		||||
  double c1   = 0.331;
 | 
			
		||||
 | 
			
		||||
  const int nu = 1;
 | 
			
		||||
  std::vector<int> twists(Nd,0);
 | 
			
		||||
  twists[nu] = 1;
 | 
			
		||||
  ConjugateGimplD::setDirections(twists);
 | 
			
		||||
  ConjugatePlaqPlusRectangleActionR Action(beta,c1);
 | 
			
		||||
  //ConjugateWilsonGaugeActionR Action(beta);
 | 
			
		||||
  //WilsonGaugeActionR Action(beta);
 | 
			
		||||
 
 | 
			
		||||
@@ -61,7 +61,9 @@ int main (int argc, char ** argv)
 | 
			
		||||
  std::vector<int> seeds({1,2,3,4});
 | 
			
		||||
 | 
			
		||||
  GridParallelRNG          pRNG(&Grid);
 | 
			
		||||
  GridSerialRNG            sRNG;
 | 
			
		||||
  pRNG.SeedFixedIntegers(seeds);
 | 
			
		||||
  sRNG.SeedFixedIntegers(seeds);
 | 
			
		||||
 | 
			
		||||
  typedef PeriodicGimplR Gimpl;
 | 
			
		||||
  typedef WilsonGaugeAction<Gimpl> GaugeAction;
 | 
			
		||||
@@ -115,7 +117,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
  
 | 
			
		||||
  integrator.setMomentumFilter(filter);
 | 
			
		||||
 | 
			
		||||
  integrator.refresh(U, pRNG); //doesn't actually change the gauge field
 | 
			
		||||
  integrator.refresh(U, sRNG, pRNG); //doesn't actually change the gauge field
 | 
			
		||||
 | 
			
		||||
  //Check the momentum is zero on the boundary
 | 
			
		||||
  const auto &P = integrator.getMomentum();
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user