mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Merge branch 'develop' into feature/hirep
This commit is contained in:
		@@ -4,7 +4,7 @@ EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
 | 
			
		||||
FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz
 | 
			
		||||
 | 
			
		||||
echo "-- deploying Eigen source..."
 | 
			
		||||
wget ${EIGEN_URL}
 | 
			
		||||
wget ${EIGEN_URL} --no-check-certificate
 | 
			
		||||
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
 | 
			
		||||
rm `basename ${EIGEN_URL}`
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -194,22 +194,22 @@ class BinaryIO {
 | 
			
		||||
 | 
			
		||||
      std::vector<int> site({x,y,z,t});
 | 
			
		||||
 | 
			
		||||
      if ( grid->IsBoss() ) {
 | 
			
		||||
	fin.read((char *)&file_object,sizeof(file_object));
 | 
			
		||||
	bytes += sizeof(file_object);
 | 
			
		||||
	if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee32)    le32toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64)    le64toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
      if (grid->IsBoss()) {
 | 
			
		||||
        fin.read((char *)&file_object, sizeof(file_object));
 | 
			
		||||
        bytes += sizeof(file_object);
 | 
			
		||||
        if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object));
 | 
			
		||||
        if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object));
 | 
			
		||||
        if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object));
 | 
			
		||||
        if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object));
 | 
			
		||||
 | 
			
		||||
	munge(file_object,munged,csum);
 | 
			
		||||
        munge(file_object, munged, csum);
 | 
			
		||||
      }
 | 
			
		||||
      // The boss who read the file has their value poked
 | 
			
		||||
      pokeSite(munged,Umu,site);
 | 
			
		||||
    }}}}
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
    std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
 | 
			
		||||
	     << (double)bytes/ (double)timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
       << (double)bytes/ (double)timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
@@ -254,20 +254,20 @@ class BinaryIO {
 | 
			
		||||
 | 
			
		||||
      
 | 
			
		||||
      if ( grid->IsBoss() ) {
 | 
			
		||||
	
 | 
			
		||||
	if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee32)    htole32_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64)    htole64_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
  
 | 
			
		||||
  if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
  if(ieee32)    htole32_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
  if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
  if(ieee64)    htole64_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
 | 
			
		||||
	// NB could gather an xstrip as an optimisation.
 | 
			
		||||
	fout.write((char *)&file_object,sizeof(file_object));
 | 
			
		||||
	bytes+=sizeof(file_object);
 | 
			
		||||
  // NB could gather an xstrip as an optimisation.
 | 
			
		||||
  fout.write((char *)&file_object,sizeof(file_object));
 | 
			
		||||
  bytes+=sizeof(file_object);
 | 
			
		||||
      }
 | 
			
		||||
    }}}}
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
    std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
 | 
			
		||||
	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
       << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
@@ -305,15 +305,15 @@ class BinaryIO {
 | 
			
		||||
      int l_idx=parallel.generator_idx(o_idx,i_idx);
 | 
			
		||||
 | 
			
		||||
      if( rank == grid->ThisRank() ){
 | 
			
		||||
	//	std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
 | 
			
		||||
	parallel.GetState(saved,l_idx);
 | 
			
		||||
  //  std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
 | 
			
		||||
  parallel.GetState(saved,l_idx);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      grid->Broadcast(rank,(void *)&saved[0],bytes);
 | 
			
		||||
 | 
			
		||||
      if ( grid->IsBoss() ) {
 | 
			
		||||
	Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
 | 
			
		||||
	fout.write((char *)&saved[0],bytes);
 | 
			
		||||
  Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
 | 
			
		||||
  fout.write((char *)&saved[0],bytes);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
@@ -355,14 +355,14 @@ class BinaryIO {
 | 
			
		||||
      int l_idx=parallel.generator_idx(o_idx,i_idx);
 | 
			
		||||
 | 
			
		||||
      if ( grid->IsBoss() ) {
 | 
			
		||||
	fin.read((char *)&saved[0],bytes);
 | 
			
		||||
	Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
 | 
			
		||||
  fin.read((char *)&saved[0],bytes);
 | 
			
		||||
  Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      grid->Broadcast(0,(void *)&saved[0],bytes);
 | 
			
		||||
 | 
			
		||||
      if( rank == grid->ThisRank() ){
 | 
			
		||||
	parallel.SetState(saved,l_idx);
 | 
			
		||||
  parallel.SetState(saved,l_idx);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
@@ -415,15 +415,15 @@ class BinaryIO {
 | 
			
		||||
 | 
			
		||||
      if ( d == 0 ) parallel[d] = 0;
 | 
			
		||||
      if (parallel[d]) {
 | 
			
		||||
	range[d] = grid->_ldimensions[d];
 | 
			
		||||
	start[d] = grid->_processor_coor[d]*range[d];
 | 
			
		||||
	ioproc[d]= grid->_processor_coor[d];
 | 
			
		||||
  range[d] = grid->_ldimensions[d];
 | 
			
		||||
  start[d] = grid->_processor_coor[d]*range[d];
 | 
			
		||||
  ioproc[d]= grid->_processor_coor[d];
 | 
			
		||||
      } else {
 | 
			
		||||
	range[d] = grid->_gdimensions[d];
 | 
			
		||||
	start[d] = 0;
 | 
			
		||||
	ioproc[d]= 0;
 | 
			
		||||
  range[d] = grid->_gdimensions[d];
 | 
			
		||||
  start[d] = 0;
 | 
			
		||||
  ioproc[d]= 0;
 | 
			
		||||
 | 
			
		||||
	if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
 | 
			
		||||
  if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
 | 
			
		||||
      }
 | 
			
		||||
      slice_vol = slice_vol * range[d];
 | 
			
		||||
    }
 | 
			
		||||
@@ -434,9 +434,9 @@ class BinaryIO {
 | 
			
		||||
      std::cout<< std::dec ;
 | 
			
		||||
      std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
 | 
			
		||||
      for(int d=0;d<grid->_ndimension;d++){
 | 
			
		||||
	std::cout<< range[d];
 | 
			
		||||
	if( d< grid->_ndimension-1 ) 
 | 
			
		||||
	  std::cout<< " x ";
 | 
			
		||||
  std::cout<< range[d];
 | 
			
		||||
  if( d< grid->_ndimension-1 ) 
 | 
			
		||||
    std::cout<< " x ";
 | 
			
		||||
      }
 | 
			
		||||
      std::cout << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
@@ -463,7 +463,7 @@ class BinaryIO {
 | 
			
		||||
 | 
			
		||||
      // need to implement these loops in Nd independent way with a lexico conversion
 | 
			
		||||
    for(int tlex=0;tlex<slice_vol;tlex++){
 | 
			
		||||
	
 | 
			
		||||
  
 | 
			
		||||
      std::vector<int> tsite(nd); // temporary mixed up site
 | 
			
		||||
      std::vector<int> gsite(nd);
 | 
			
		||||
      std::vector<int> lsite(nd);
 | 
			
		||||
@@ -472,8 +472,8 @@ class BinaryIO {
 | 
			
		||||
      Lexicographic::CoorFromIndex(tsite,tlex,range);
 | 
			
		||||
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site
 | 
			
		||||
	gsite[d] = tsite[d]+start[d];               // global site
 | 
			
		||||
  lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site
 | 
			
		||||
  gsite[d] = tsite[d]+start[d];               // global site
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      /////////////////////////
 | 
			
		||||
@@ -487,29 +487,29 @@ class BinaryIO {
 | 
			
		||||
      // iorank reads from the seek
 | 
			
		||||
      ////////////////////////////////
 | 
			
		||||
      if (myrank == iorank) {
 | 
			
		||||
	
 | 
			
		||||
	fin.seekg(offset+g_idx*sizeof(fileObj));
 | 
			
		||||
	fin.read((char *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	bytes+=sizeof(fileObj);
 | 
			
		||||
	
 | 
			
		||||
	if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee32)    le32toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee64)    le64toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	
 | 
			
		||||
	munge(fileObj,siteObj,csum);
 | 
			
		||||
  
 | 
			
		||||
  fin.seekg(offset+g_idx*sizeof(fileObj));
 | 
			
		||||
  fin.read((char *)&fileObj,sizeof(fileObj));
 | 
			
		||||
  bytes+=sizeof(fileObj);
 | 
			
		||||
  
 | 
			
		||||
  if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
  if(ieee32)    le32toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
  if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
  if(ieee64)    le64toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
  
 | 
			
		||||
  munge(fileObj,siteObj,csum);
 | 
			
		||||
 | 
			
		||||
      }	
 | 
			
		||||
      } 
 | 
			
		||||
 | 
			
		||||
      // Possibly do transport through pt2pt 
 | 
			
		||||
      if ( rank != iorank ) { 
 | 
			
		||||
	if ( (myrank == rank) || (myrank==iorank) ) {
 | 
			
		||||
	  grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
 | 
			
		||||
	}
 | 
			
		||||
  if ( (myrank == rank) || (myrank==iorank) ) {
 | 
			
		||||
    grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
 | 
			
		||||
  }
 | 
			
		||||
      }
 | 
			
		||||
      // Poke at destination
 | 
			
		||||
      if ( myrank == rank ) {
 | 
			
		||||
	  pokeLocalSite(siteObj,Umu,lsite);
 | 
			
		||||
    pokeLocalSite(siteObj,Umu,lsite);
 | 
			
		||||
      }
 | 
			
		||||
      grid->Barrier(); // necessary?
 | 
			
		||||
    }
 | 
			
		||||
@@ -520,7 +520,7 @@ class BinaryIO {
 | 
			
		||||
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
    std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
 | 
			
		||||
	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
       << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
@@ -558,15 +558,15 @@ class BinaryIO {
 | 
			
		||||
      if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
 | 
			
		||||
 | 
			
		||||
      if (parallel[d]) {
 | 
			
		||||
	range[d] = grid->_ldimensions[d];
 | 
			
		||||
	start[d] = grid->_processor_coor[d]*range[d];
 | 
			
		||||
	ioproc[d]= grid->_processor_coor[d];
 | 
			
		||||
  range[d] = grid->_ldimensions[d];
 | 
			
		||||
  start[d] = grid->_processor_coor[d]*range[d];
 | 
			
		||||
  ioproc[d]= grid->_processor_coor[d];
 | 
			
		||||
      } else {
 | 
			
		||||
	range[d] = grid->_gdimensions[d];
 | 
			
		||||
	start[d] = 0;
 | 
			
		||||
	ioproc[d]= 0;
 | 
			
		||||
  range[d] = grid->_gdimensions[d];
 | 
			
		||||
  start[d] = 0;
 | 
			
		||||
  ioproc[d]= 0;
 | 
			
		||||
 | 
			
		||||
	if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
 | 
			
		||||
  if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      slice_vol = slice_vol * range[d];
 | 
			
		||||
@@ -577,9 +577,9 @@ class BinaryIO {
 | 
			
		||||
      grid->GlobalSum(tmp);
 | 
			
		||||
      std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
 | 
			
		||||
      for(int d=0;d<grid->_ndimension;d++){
 | 
			
		||||
	std::cout<< range[d];
 | 
			
		||||
	if( d< grid->_ndimension-1 ) 
 | 
			
		||||
	  std::cout<< " x ";
 | 
			
		||||
  std::cout<< range[d];
 | 
			
		||||
  if( d< grid->_ndimension-1 ) 
 | 
			
		||||
    std::cout<< " x ";
 | 
			
		||||
      }
 | 
			
		||||
      std::cout << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
@@ -610,7 +610,7 @@ class BinaryIO {
 | 
			
		||||
    // should aggregate a whole chunk and then write.
 | 
			
		||||
    // need to implement these loops in Nd independent way with a lexico conversion
 | 
			
		||||
    for(int tlex=0;tlex<slice_vol;tlex++){
 | 
			
		||||
	
 | 
			
		||||
  
 | 
			
		||||
      std::vector<int> tsite(nd); // temporary mixed up site
 | 
			
		||||
      std::vector<int> gsite(nd);
 | 
			
		||||
      std::vector<int> lsite(nd);
 | 
			
		||||
@@ -619,8 +619,8 @@ class BinaryIO {
 | 
			
		||||
      Lexicographic::CoorFromIndex(tsite,tlex,range);
 | 
			
		||||
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site
 | 
			
		||||
	gsite[d] = tsite[d]+start[d];               // global site
 | 
			
		||||
  lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site
 | 
			
		||||
  gsite[d] = tsite[d]+start[d];               // global site
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -640,26 +640,26 @@ class BinaryIO {
 | 
			
		||||
 | 
			
		||||
      // Pair of nodes may need to do pt2pt send
 | 
			
		||||
      if ( rank != iorank ) { // comms is necessary
 | 
			
		||||
	if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
 | 
			
		||||
	  // Send to IOrank 
 | 
			
		||||
	  grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
 | 
			
		||||
	}
 | 
			
		||||
  if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
 | 
			
		||||
    // Send to IOrank 
 | 
			
		||||
    grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
 | 
			
		||||
  }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      grid->Barrier(); // necessary?
 | 
			
		||||
 | 
			
		||||
      if (myrank == iorank) {
 | 
			
		||||
	
 | 
			
		||||
	munge(siteObj,fileObj,csum);
 | 
			
		||||
  
 | 
			
		||||
  munge(siteObj,fileObj,csum);
 | 
			
		||||
 | 
			
		||||
	if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee32)    htole32_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee64)    htole64_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	
 | 
			
		||||
	fout.seekp(offset+g_idx*sizeof(fileObj));
 | 
			
		||||
	fout.write((char *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	bytes+=sizeof(fileObj);
 | 
			
		||||
  if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
  if(ieee32)    htole32_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
  if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
  if(ieee64)    htole64_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
  
 | 
			
		||||
  fout.seekp(offset+g_idx*sizeof(fileObj));
 | 
			
		||||
  fout.write((char *)&fileObj,sizeof(fileObj));
 | 
			
		||||
  bytes+=sizeof(fileObj);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -668,7 +668,7 @@ class BinaryIO {
 | 
			
		||||
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
    std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
 | 
			
		||||
	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
       << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -55,11 +55,14 @@ namespace QCD {
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // QCD iMatrix types
 | 
			
		||||
    // Index conventions:                            Lorentz x Spin x Colour
 | 
			
		||||
    // note: static const int or constexpr will work for type deductions
 | 
			
		||||
    //       with the intel compiler (up to version 17)
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    static const int ColourIndex = 2;
 | 
			
		||||
    static const int SpinIndex   = 1;
 | 
			
		||||
    static const int LorentzIndex= 0;
 | 
			
		||||
    #define ColourIndex  2
 | 
			
		||||
    #define SpinIndex    1
 | 
			
		||||
    #define LorentzIndex 0
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
    // Also should make these a named enum type
 | 
			
		||||
    static const int DaggerNo=0;
 | 
			
		||||
    static const int DaggerYes=1;
 | 
			
		||||
 
 | 
			
		||||
@@ -49,154 +49,171 @@ namespace Grid {
 | 
			
		||||
    template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic { 
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
     INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
     typedef FermionOperator<Impl> Base;
 | 
			
		||||
      INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
      typedef FermionOperator<Impl> Base;
 | 
			
		||||
     
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
  template <bool EnableBool = true>
 | 
			
		||||
  typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
 | 
			
		||||
  DiracOptDhopSite(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
      FermionField &out) {
 | 
			
		||||
      template <bool EnableBool = true>
 | 
			
		||||
      typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
 | 
			
		||||
	DiracOptDhopSite(
 | 
			
		||||
			 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
			 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
			 int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
			 FermionField &out) {
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
    if (AsmOpt) {
 | 
			
		||||
      WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns,
 | 
			
		||||
                                               in, out);
 | 
			
		||||
	if (AsmOpt) {
 | 
			
		||||
	  WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns,
 | 
			
		||||
						   in, out);
 | 
			
		||||
 | 
			
		||||
    } else {
 | 
			
		||||
	} else {
 | 
			
		||||
#else
 | 
			
		||||
    {
 | 
			
		||||
	  {
 | 
			
		||||
#endif
 | 
			
		||||
      for (int site = 0; site < Ns; site++) {
 | 
			
		||||
        for (int s = 0; s < Ls; s++) {
 | 
			
		||||
          if (HandOpt)
 | 
			
		||||
            WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU,
 | 
			
		||||
                                                      in, out);
 | 
			
		||||
          else
 | 
			
		||||
            WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU,
 | 
			
		||||
                                                         in, out);
 | 
			
		||||
          sF++;
 | 
			
		||||
        }
 | 
			
		||||
        sU++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
	    for (int site = 0; site < Ns; site++) {
 | 
			
		||||
	      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
		if (HandOpt)
 | 
			
		||||
		  WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU,
 | 
			
		||||
							    in, out);
 | 
			
		||||
		else
 | 
			
		||||
		  WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU,
 | 
			
		||||
							       in, out);
 | 
			
		||||
		sF++;
 | 
			
		||||
	      }
 | 
			
		||||
	      sU++;
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
  template <bool EnableBool = true>
 | 
			
		||||
    typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
 | 
			
		||||
  DiracOptDhopSite(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
      FermionField &out) {
 | 
			
		||||
    for (int site = 0; site < Ns; site++) {
 | 
			
		||||
      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
        WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in,
 | 
			
		||||
                                                     out);
 | 
			
		||||
        sF++;
 | 
			
		||||
      }
 | 
			
		||||
      sU++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
	template <bool EnableBool = true>
 | 
			
		||||
	  typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
 | 
			
		||||
	  DiracOptDhopSite(
 | 
			
		||||
			   StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
			   std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
			   int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
			   FermionField &out) {
 | 
			
		||||
	  for (int site = 0; site < Ns; site++) {
 | 
			
		||||
	    for (int s = 0; s < Ls; s++) {
 | 
			
		||||
	      WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in,
 | 
			
		||||
							   out);
 | 
			
		||||
	      sF++;
 | 
			
		||||
	    }
 | 
			
		||||
	    sU++;
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
  template <bool EnableBool = true>
 | 
			
		||||
  typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,
 | 
			
		||||
                          void>::type
 | 
			
		||||
  DiracOptDhopSiteDag(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
      FermionField &out) {
 | 
			
		||||
	template <bool EnableBool = true>
 | 
			
		||||
	  typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,
 | 
			
		||||
				  void>::type
 | 
			
		||||
	  DiracOptDhopSiteDag(
 | 
			
		||||
			      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
			      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
			      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
			      FermionField &out) {
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
    if (AsmOpt) {
 | 
			
		||||
      WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls,
 | 
			
		||||
                                                  Ns, in, out);
 | 
			
		||||
    } else {
 | 
			
		||||
				    if (AsmOpt) {
 | 
			
		||||
				      WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls,
 | 
			
		||||
										  Ns, in, out);
 | 
			
		||||
				    } else {
 | 
			
		||||
#else
 | 
			
		||||
    {
 | 
			
		||||
				      {
 | 
			
		||||
#endif
 | 
			
		||||
      for (int site = 0; site < Ns; site++) {
 | 
			
		||||
        for (int s = 0; s < Ls; s++) {
 | 
			
		||||
          if (HandOpt)
 | 
			
		||||
            WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
 | 
			
		||||
                                                         in, out);
 | 
			
		||||
          else
 | 
			
		||||
            WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
 | 
			
		||||
                                                            sU, in, out);
 | 
			
		||||
          sF++;
 | 
			
		||||
        }
 | 
			
		||||
        sU++;
 | 
			
		||||
					for (int site = 0; site < Ns; site++) {
 | 
			
		||||
					  for (int s = 0; s < Ls; s++) {
 | 
			
		||||
					    if (HandOpt)
 | 
			
		||||
					      WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
 | 
			
		||||
											   in, out);
 | 
			
		||||
					    else
 | 
			
		||||
					      WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
 | 
			
		||||
											      sU, in, out);
 | 
			
		||||
					    sF++;
 | 
			
		||||
					  }
 | 
			
		||||
					  sU++;
 | 
			
		||||
					}
 | 
			
		||||
				      }
 | 
			
		||||
				    }
 | 
			
		||||
 | 
			
		||||
				    template <bool EnableBool = true>
 | 
			
		||||
				      typename std::enable_if<
 | 
			
		||||
				      (Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,
 | 
			
		||||
				      void>::type
 | 
			
		||||
				      DiracOptDhopSiteDag(
 | 
			
		||||
							  StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
							  std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
							  int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
							  FermionField &out) {
 | 
			
		||||
					for (int site = 0; site < Ns; site++) {
 | 
			
		||||
					  for (int s = 0; s < Ls; s++) {
 | 
			
		||||
					    WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
 | 
			
		||||
											    in, out);
 | 
			
		||||
					    sF++;
 | 
			
		||||
					  }
 | 
			
		||||
					  sU++;
 | 
			
		||||
					}
 | 
			
		||||
				      }
 | 
			
		||||
 | 
			
		||||
				    void DiracOptDhopDir(
 | 
			
		||||
							 StencilImpl &st, DoubledGaugeField &U,
 | 
			
		||||
							 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
							 int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
 | 
			
		||||
							 int gamma);
 | 
			
		||||
 | 
			
		||||
	private:
 | 
			
		||||
				    // Specialised variants
 | 
			
		||||
				    void DiracOptGenericDhopSite(
 | 
			
		||||
								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
								 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
								 int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
				    void DiracOptGenericDhopSiteDag(
 | 
			
		||||
								    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
								    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
								    int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
				    void DiracOptAsmDhopSite(
 | 
			
		||||
							     StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
							     std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
							     int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
							     FermionField &out);
 | 
			
		||||
 | 
			
		||||
				    void DiracOptAsmDhopSiteDag(
 | 
			
		||||
								StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
								std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
								int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
								FermionField &out);
 | 
			
		||||
 | 
			
		||||
				    void DiracOptHandDhopSite(
 | 
			
		||||
							      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
							      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
							      int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
				    void DiracOptHandDhopSiteDag(
 | 
			
		||||
								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
								 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
								 int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
	public:
 | 
			
		||||
				    WilsonKernels(const ImplParams &p = ImplParams());
 | 
			
		||||
				  };
 | 
			
		||||
    
 | 
			
		||||
	///////////////////////////////////////////////////////////
 | 
			
		||||
	// Default to no assembler implementation
 | 
			
		||||
	///////////////////////////////////////////////////////////
 | 
			
		||||
	template<class Impl>
 | 
			
		||||
	  void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
							 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
	{
 | 
			
		||||
	  assert(0);
 | 
			
		||||
	}
 | 
			
		||||
	template<class Impl>
 | 
			
		||||
	  void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
							    std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
	{
 | 
			
		||||
	  assert(0);
 | 
			
		||||
	}
 | 
			
		||||
  
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <bool EnableBool = true>
 | 
			
		||||
  typename std::enable_if<
 | 
			
		||||
      (Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,
 | 
			
		||||
      void>::type
 | 
			
		||||
  DiracOptDhopSiteDag(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
      FermionField &out) {
 | 
			
		||||
    for (int site = 0; site < Ns; site++) {
 | 
			
		||||
      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
        WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
 | 
			
		||||
                                                        in, out);
 | 
			
		||||
        sF++;
 | 
			
		||||
      }
 | 
			
		||||
      sU++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void DiracOptDhopDir(
 | 
			
		||||
      StencilImpl &st, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
 | 
			
		||||
      int gamma);
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  // Specialised variants
 | 
			
		||||
  void DiracOptGenericDhopSite(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void DiracOptGenericDhopSiteDag(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void DiracOptAsmDhopSite(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
      FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void DiracOptAsmDhopSiteDag(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
      FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void DiracOptHandDhopSite(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void DiracOptHandDhopSiteDag(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  WilsonKernels(const ImplParams &p = ImplParams());
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,4 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
@@ -26,68 +26,56 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////
 | 
			
		||||
  // Default to no assembler implementation
 | 
			
		||||
  ///////////////////////////////////////////////////////////
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
					       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
					       int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
    
 | 
			
		||||
#if defined(AVX512) 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////
 | 
			
		||||
  // If we are AVX512 specialise the single precision routine
 | 
			
		||||
  ///////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
    // If we are AVX512 specialise the single precision routine
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
    
 | 
			
		||||
#include <simd/Intel512wilson.h>
 | 
			
		||||
#include <simd/Intel512single.h>
 | 
			
		||||
 | 
			
		||||
static Vector<vComplexF> signs;
 | 
			
		||||
 | 
			
		||||
int setupSigns(void ){
 | 
			
		||||
  Vector<vComplexF> bother(2);
 | 
			
		||||
  signs = bother;
 | 
			
		||||
  vrsign(signs[0]);
 | 
			
		||||
  visign(signs[1]);
 | 
			
		||||
  return 1;
 | 
			
		||||
}
 | 
			
		||||
static int signInit = setupSigns();
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    static Vector<vComplexF> signs;
 | 
			
		||||
    
 | 
			
		||||
    int setupSigns(void ){
 | 
			
		||||
      Vector<vComplexF> bother(2);
 | 
			
		||||
      signs = bother;
 | 
			
		||||
      vrsign(signs[0]);
 | 
			
		||||
      visign(signs[1]);
 | 
			
		||||
      return 1;
 | 
			
		||||
    }
 | 
			
		||||
    static int signInit = setupSigns();
 | 
			
		||||
  
 | 
			
		||||
#define label(A)  ilabel(A)
 | 
			
		||||
#define ilabel(A) ".globl\n"  #A ":\n" 
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
 | 
			
		||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
 | 
			
		||||
#define FX(A) WILSONASM_ ##A
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
#undef KERNEL_DAG
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
						     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
						     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
    template<>
 | 
			
		||||
    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
							 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
 | 
			
		||||
      
 | 
			
		||||
#define KERNEL_DAG
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
						     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
						     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
    template<>
 | 
			
		||||
    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
							    std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
 | 
			
		||||
				    
 | 
			
		||||
#undef VMOVIDUP
 | 
			
		||||
#undef VMOVRDUP
 | 
			
		||||
#undef MAYBEPERM
 | 
			
		||||
@@ -98,43 +86,22 @@ void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,Lebesgue
 | 
			
		||||
#define VMOVIDUP(A,B,C)                                  VBCASTIDUPf(A,B,C)
 | 
			
		||||
#define VMOVRDUP(A,B,C)                                  VBCASTRDUPf(A,B,C)
 | 
			
		||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
 | 
			
		||||
 | 
			
		||||
				    
 | 
			
		||||
#undef KERNEL_DAG
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
								   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								   int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
    template<>
 | 
			
		||||
    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
 | 
			
		||||
				    
 | 
			
		||||
#define KERNEL_DAG
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
								   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								   int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
    template<>
 | 
			
		||||
    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
								     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
 | 
			
		||||
				    
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							      int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);		
 | 
			
		||||
 | 
			
		||||
template void WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, 
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							       int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);		
 | 
			
		||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, 
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							       int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);		
 | 
			
		||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, 
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							       int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);		
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, 
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							       int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);		
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, 
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							       int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);		
 | 
			
		||||
}}
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user