mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Fix a regression failure on Mobius; chroma regression added
This commit is contained in:
		@@ -68,11 +68,11 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
    for(int mu=0;mu<Nd;mu++){
 | 
					    for(int mu=0;mu<Nd;mu++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      tmp = U[mu]*Cshift(src,mu+1,1);
 | 
					      tmp = U[mu]*Cshift(src,mu+1,1);
 | 
				
			||||||
      ref=ref + tmp + Gamma(Gmu[mu])*tmp;
 | 
					      ref=ref + tmp - Gamma(Gmu[mu])*tmp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      tmp =adj(U[mu])*src;
 | 
					      tmp =adj(U[mu])*src;
 | 
				
			||||||
      tmp =Cshift(tmp,mu+1,-1);
 | 
					      tmp =Cshift(tmp,mu+1,-1);
 | 
				
			||||||
      ref=ref + tmp - Gamma(Gmu[mu])*tmp;
 | 
					      ref=ref + tmp + Gamma(Gmu[mu])*tmp;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    ref = -0.5*ref;
 | 
					    ref = -0.5*ref;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@@ -111,13 +111,13 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
      //    ref =  src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
 | 
					      //    ref =  src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
 | 
				
			||||||
      tmp = U[mu]*Cshift(src,mu+1,1);
 | 
					      tmp = U[mu]*Cshift(src,mu+1,1);
 | 
				
			||||||
      for(int i=0;i<ref._odata.size();i++){
 | 
					      for(int i=0;i<ref._odata.size();i++){
 | 
				
			||||||
	ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
						ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      tmp =adj(U[mu])*src;
 | 
					      tmp =adj(U[mu])*src;
 | 
				
			||||||
      tmp =Cshift(tmp,mu+1,-1);
 | 
					      tmp =Cshift(tmp,mu+1,-1);
 | 
				
			||||||
      for(int i=0;i<ref._odata.size();i++){
 | 
					      for(int i=0;i<ref._odata.size();i++){
 | 
				
			||||||
	ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
						ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    ref = -0.5*ref;
 | 
					    ref = -0.5*ref;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -75,13 +75,13 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
      //    ref =  src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
 | 
					      //    ref =  src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
 | 
				
			||||||
      tmp = U[mu]*Cshift(src,mu,1);
 | 
					      tmp = U[mu]*Cshift(src,mu,1);
 | 
				
			||||||
      for(int i=0;i<ref._odata.size();i++){
 | 
					      for(int i=0;i<ref._odata.size();i++){
 | 
				
			||||||
	ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
						ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      tmp =adj(U[mu])*src;
 | 
					      tmp =adj(U[mu])*src;
 | 
				
			||||||
      tmp =Cshift(tmp,mu,-1);
 | 
					      tmp =Cshift(tmp,mu,-1);
 | 
				
			||||||
      for(int i=0;i<ref._odata.size();i++){
 | 
					      for(int i=0;i<ref._odata.size();i++){
 | 
				
			||||||
	ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
						ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@@ -124,13 +124,13 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
      //    ref =  src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
 | 
					      //    ref =  src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
 | 
				
			||||||
      tmp = U[mu]*Cshift(src,mu,1);
 | 
					      tmp = U[mu]*Cshift(src,mu,1);
 | 
				
			||||||
      for(int i=0;i<ref._odata.size();i++){
 | 
					      for(int i=0;i<ref._odata.size();i++){
 | 
				
			||||||
	ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
						ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      tmp =adj(U[mu])*src;
 | 
					      tmp =adj(U[mu])*src;
 | 
				
			||||||
      tmp =Cshift(tmp,mu,-1);
 | 
					      tmp =Cshift(tmp,mu,-1);
 | 
				
			||||||
      for(int i=0;i<ref._odata.size();i++){
 | 
					      for(int i=0;i<ref._odata.size();i++){
 | 
				
			||||||
	ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
						ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										16
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								configure.ac
									
									
									
									
									
								
							@@ -164,6 +164,21 @@ esac
 | 
				
			|||||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ])
 | 
					AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ])
 | 
				
			||||||
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
 | 
					AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					case ${ac_CHROMA} in
 | 
				
			||||||
 | 
					     yes)
 | 
				
			||||||
 | 
					       echo Enabling tests regressing to Chroma
 | 
				
			||||||
 | 
					     ;;
 | 
				
			||||||
 | 
					     no)
 | 
				
			||||||
 | 
					       echo Disabling tests regressing to Chroma
 | 
				
			||||||
 | 
					     ;;
 | 
				
			||||||
 | 
					     *)
 | 
				
			||||||
 | 
					     AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); 
 | 
				
			||||||
 | 
					     ;;
 | 
				
			||||||
 | 
					esac
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
###################################################################
 | 
					###################################################################
 | 
				
			||||||
# Checks for doxygen support
 | 
					# Checks for doxygen support
 | 
				
			||||||
@@ -184,6 +199,7 @@ echo :::::::::::::::::::::::::::::::::::::::::::
 | 
				
			|||||||
AC_CONFIG_FILES(Makefile)
 | 
					AC_CONFIG_FILES(Makefile)
 | 
				
			||||||
AC_CONFIG_FILES(lib/Makefile)
 | 
					AC_CONFIG_FILES(lib/Makefile)
 | 
				
			||||||
AC_CONFIG_FILES(tests/Makefile)
 | 
					AC_CONFIG_FILES(tests/Makefile)
 | 
				
			||||||
 | 
					AC_CONFIG_FILES(tests/qdpxx/Makefile)
 | 
				
			||||||
AC_CONFIG_FILES(benchmarks/Makefile)
 | 
					AC_CONFIG_FILES(benchmarks/Makefile)
 | 
				
			||||||
AC_OUTPUT
 | 
					AC_OUTPUT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -137,6 +137,9 @@
 | 
				
			|||||||
/* Define to the one symbol short name of this package. */
 | 
					/* Define to the one symbol short name of this package. */
 | 
				
			||||||
#undef PACKAGE_TARNAME
 | 
					#undef PACKAGE_TARNAME
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Define to the home page for this package. */
 | 
				
			||||||
 | 
					#undef PACKAGE_URL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Define to the version of this package. */
 | 
					/* Define to the version of this package. */
 | 
				
			||||||
#undef PACKAGE_VERSION
 | 
					#undef PACKAGE_VERSION
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,4 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/MatrixUtils.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Config.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./Init.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./Log.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./parallelIO/BinaryIO.h ./parallelIO/NerscIO.h ./pugixml/pugixml.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./serialisation/BaseIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/Serialisation.h ./serialisation/TextIO.h ./serialisation/XmlIO.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_imci.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h ./Timer.h
 | 
					HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/DenseMatrix.h ./algorithms/iterative/EigenSort.h ./algorithms/iterative/Francis.h ./algorithms/iterative/Householder.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/Matrix.h ./algorithms/iterative/MatrixUtils.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./Init.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./Log.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./parallelIO/BinaryIO.h ./parallelIO/NerscIO.h ./PerfCount.h ./pugixml/pugixml.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./serialisation/BaseIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/Serialisation.h ./serialisation/TextIO.h ./serialisation/XmlIO.h ./simd/Avx512Asm.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_imci.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h ./Timer.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./Init.cc ./Log.cc ./pugixml/pugixml.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./serialisation/BinaryIO.cc ./serialisation/TextIO.cc ./serialisation/XmlIO.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
 | 
					CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./Init.cc ./Log.cc ./PerfCount.cc ./pugixml/pugixml.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsAsm.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./serialisation/BinaryIO.cc ./serialisation/TextIO.cc ./serialisation/XmlIO.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,8 +3,11 @@
 | 
				
			|||||||
#include <PerfCount.h>
 | 
					#include <PerfCount.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
 | 
					#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
 | 
					const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
 | 
				
			||||||
 | 
					#ifdef __linux__
 | 
				
			||||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES          ,  "CPUCYCLES.........." },
 | 
					  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES          ,  "CPUCYCLES.........." },
 | 
				
			||||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS        ,  "INSTRUCTIONS......." },
 | 
					  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS        ,  "INSTRUCTIONS......." },
 | 
				
			||||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES    ,  "CACHE_REFERENCES..." },
 | 
					  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES    ,  "CACHE_REFERENCES..." },
 | 
				
			||||||
@@ -23,6 +26,7 @@ const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::Performan
 | 
				
			|||||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS)  ,  "LL_PREFETCH_ACCESS."},
 | 
					  { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS)  ,  "LL_PREFETCH_ACCESS."},
 | 
				
			||||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS)       ,  "L1I_READ_MISS......"},
 | 
					  { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS)       ,  "L1I_READ_MISS......"},
 | 
				
			||||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS)     ,  "L1I_READ_ACCESS...."}
 | 
					  { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS)     ,  "L1I_READ_ACCESS...."}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
  //  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" },
 | 
					  //  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" },
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -30,6 +30,7 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class PerformanceCounter {
 | 
					class PerformanceCounter {
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  typedef struct { 
 | 
					  typedef struct { 
 | 
				
			||||||
  public:
 | 
					  public:
 | 
				
			||||||
    uint32_t type;
 | 
					    uint32_t type;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -11,7 +11,6 @@
 | 
				
			|||||||
// Vector types are arch dependent
 | 
					// Vector types are arch dependent
 | 
				
			||||||
////////////////////////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
typedef uint32_t Integer;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D))
 | 
					#define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D))
 | 
				
			||||||
#define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H))
 | 
					#define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H))
 | 
				
			||||||
@@ -20,6 +19,8 @@ typedef uint32_t Integer;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  typedef uint32_t Integer;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  typedef  float  RealF;
 | 
					  typedef  float  RealF;
 | 
				
			||||||
  typedef  double RealD;
 | 
					  typedef  double RealD;
 | 
				
			||||||
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
 | 
					#ifdef GRID_DEFAULT_PRECISION_DOUBLE
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -114,7 +114,7 @@ namespace QCD {
 | 
				
			|||||||
    // Apply Dw
 | 
					    // Apply Dw
 | 
				
			||||||
    this->DW(psi,Din,DaggerYes); 
 | 
					    this->DW(psi,Din,DaggerYes); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Meooe5D(Din,chi);
 | 
					    MeooeDag5D(Din,chi);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int Ls=this->Ls;
 | 
					    int Ls=this->Ls;
 | 
				
			||||||
    for(int s=0;s<Ls;s++){
 | 
					    for(int s=0;s<Ls;s++){
 | 
				
			||||||
@@ -163,7 +163,6 @@ namespace QCD {
 | 
				
			|||||||
    FermionField tmp(psi._grid);
 | 
					    FermionField tmp(psi._grid);
 | 
				
			||||||
    // Assemble the 5d matrix
 | 
					    // Assemble the 5d matrix
 | 
				
			||||||
    Meooe5D(psi,tmp); 
 | 
					    Meooe5D(psi,tmp); 
 | 
				
			||||||
 | 
					 | 
				
			||||||
#if 0
 | 
					#if 0
 | 
				
			||||||
    std::cout << "Meooe Test replacement norm2 tmp = " <<norm2(tmp)<<std::endl;
 | 
					    std::cout << "Meooe Test replacement norm2 tmp = " <<norm2(tmp)<<std::endl;
 | 
				
			||||||
    for(int s=0;s<Ls;s++){
 | 
					    for(int s=0;s<Ls;s++){
 | 
				
			||||||
@@ -202,7 +201,7 @@ namespace QCD {
 | 
				
			|||||||
      this->DhopOE(psi,tmp,DaggerYes);
 | 
					      this->DhopOE(psi,tmp,DaggerYes);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Meooe5D(tmp,chi); 
 | 
					    MeooeDag5D(tmp,chi); 
 | 
				
			||||||
#if 0
 | 
					#if 0
 | 
				
			||||||
    std::cout << "Meooe Test replacement norm2 chi new = " <<norm2(chi)<<std::endl;
 | 
					    std::cout << "Meooe Test replacement norm2 chi new = " <<norm2(chi)<<std::endl;
 | 
				
			||||||
    // Assemble the 5d matrix
 | 
					    // Assemble the 5d matrix
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,9 +3,12 @@ namespace Grid {
 | 
				
			|||||||
namespace QCD {
 | 
					namespace QCD {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Impl> 
 | 
					template<class Impl> 
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					WilsonKernels<Impl>::WilsonKernels(const ImplParams &p): Base(p) {};
 | 
				
			||||||
						  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
					
 | 
				
			||||||
						  int sF,int sU,const FermionField &in, FermionField &out)
 | 
					template<class Impl> 
 | 
				
			||||||
 | 
					void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
										   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
										   int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  SiteHalfSpinor  tmp;    
 | 
					  SiteHalfSpinor  tmp;    
 | 
				
			||||||
  SiteHalfSpinor  chi;    
 | 
					  SiteHalfSpinor  chi;    
 | 
				
			||||||
@@ -122,7 +125,7 @@ void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			|||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Impl> 
 | 
					template<class Impl> 
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
					      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
										      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
					      int sF,int sU,const FermionField &in, FermionField &out)
 | 
										      int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -369,6 +372,16 @@ void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			|||||||
  vstream(out._odata[sF],result*(-0.5));
 | 
					  vstream(out._odata[sF],result*(-0.5));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if ( ! defined(AVX512) ) && ( ! defined(IMCI) )
 | 
				
			||||||
 | 
					template<class Impl> 
 | 
				
			||||||
 | 
					void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
										      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
										      int sF,int sU,const FermionField &in, FermionField &out,uint64_t *p)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  FermOpTemplateInstantiate(WilsonKernels);
 | 
					  FermOpTemplateInstantiate(WilsonKernels);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}}
 | 
					}}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -28,19 +28,12 @@ namespace Grid {
 | 
				
			|||||||
     void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
 | 
					     void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
			  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
			  int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
 | 
								  int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
 | 
				
			||||||
#if defined(AVX512) || defined(IMCI)
 | 
					
 | 
				
			||||||
     void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					     void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
			      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
								      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
			      int sF,int sU,const FermionField &in, FermionField &out,uint64_t *);
 | 
								      int sF,int sU,const FermionField &in, FermionField &out,uint64_t *);
 | 
				
			||||||
#else
 | 
					
 | 
				
			||||||
     void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					 | 
				
			||||||
			      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
					 | 
				
			||||||
			      int sF,int sU,const FermionField &in, FermionField &out,uint64_t *p){
 | 
					 | 
				
			||||||
       DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
					 | 
				
			||||||
     }
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#define HANDOPT
 | 
					#define HANDOPT
 | 
				
			||||||
#ifdef HANDOPT
 | 
					 | 
				
			||||||
     void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					     void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
			       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
								       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
			       int sF,int sU,const FermionField &in, FermionField &out);
 | 
								       int sF,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
@@ -48,25 +41,9 @@ namespace Grid {
 | 
				
			|||||||
     void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					     void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
				  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
									  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
				  int sF,int sU,const FermionField &in, FermionField &out);
 | 
									  int sF,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
     void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					 | 
				
			||||||
			       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
					 | 
				
			||||||
			       int sF,int sU,const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
     {
 | 
					 | 
				
			||||||
       DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
					 | 
				
			||||||
     }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
     void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					 | 
				
			||||||
				  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
					 | 
				
			||||||
				  int sF,int sU,const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
     {
 | 
					 | 
				
			||||||
       DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
					 | 
				
			||||||
     }
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
     WilsonKernels(const ImplParams &p= ImplParams()) : Base(p) {};
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     WilsonKernels(const ImplParams &p= ImplParams());
 | 
				
			||||||
 | 
					     
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -282,7 +282,7 @@ namespace QCD {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#ifdef HANDOPT
 | 
					#ifdef HANDOPT
 | 
				
			||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
					       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
										       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
					       int ss,int sU,const FermionField &in, FermionField &out)
 | 
										       int ss,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -526,7 +526,7 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeFiel
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
						   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
											   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
						   int ss,int sU,const FermionField &in, FermionField &out)
 | 
											   int ss,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -767,6 +767,36 @@ void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeF
 | 
				
			|||||||
    vstream(ref()(3)(2),result_32*(-0.5));
 | 
					    vstream(ref()(3)(2),result_32*(-0.5));
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
  FermOpTemplateInstantiate(WilsonKernels);
 | 
					#else 
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
										       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
										       int ss,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
											   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
											   int ss,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
												  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
												  int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
												  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
												  int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
												  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
												  int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
												  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
												  int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}}
 | 
					}}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -16,6 +16,7 @@
 | 
				
			|||||||
#define _mm256_set_m128i(hi,lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo),(hi),1)
 | 
					#define _mm256_set_m128i(hi,lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo),(hi),1)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Grid {
 | 
				
			||||||
namespace Optimization {
 | 
					namespace Optimization {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  template<class vtype>
 | 
					  template<class vtype>
 | 
				
			||||||
@@ -467,7 +468,7 @@ namespace Optimization {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
//////////////////////////////////////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
// Here assign types 
 | 
					// Here assign types 
 | 
				
			||||||
namespace Grid {
 | 
					
 | 
				
			||||||
  typedef __m256  SIMD_Ftype; // Single precision type
 | 
					  typedef __m256  SIMD_Ftype; // Single precision type
 | 
				
			||||||
  typedef __m256d SIMD_Dtype; // Double precision type
 | 
					  typedef __m256d SIMD_Dtype; // Double precision type
 | 
				
			||||||
  typedef __m256i SIMD_Itype; // Integer type
 | 
					  typedef __m256i SIMD_Itype; // Integer type
 | 
				
			||||||
@@ -488,8 +489,8 @@ namespace Grid {
 | 
				
			|||||||
  typedef Optimization::Vstore   VstoreSIMD;
 | 
					  typedef Optimization::Vstore   VstoreSIMD;
 | 
				
			||||||
  typedef Optimization::Vset     VsetSIMD;
 | 
					  typedef Optimization::Vset     VsetSIMD;
 | 
				
			||||||
  typedef Optimization::Vstream  VstreamSIMD;
 | 
					  typedef Optimization::Vstream  VstreamSIMD;
 | 
				
			||||||
  template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Arithmetic operations
 | 
					  // Arithmetic operations
 | 
				
			||||||
  typedef Optimization::Sum         SumSIMD;
 | 
					  typedef Optimization::Sum         SumSIMD;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -11,8 +11,6 @@ echo CCFILES=$CCFILES >> Make.inc
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
cd ..
 | 
					cd ..
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cd tests
 | 
					cd tests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
echo> Make.inc
 | 
					echo> Make.inc
 | 
				
			||||||
@@ -32,6 +30,26 @@ echo ${BNAME}_LDADD=-lGrid>> Make.inc
 | 
				
			|||||||
echo >> Make.inc
 | 
					echo >> Make.inc
 | 
				
			||||||
done
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd qdpxx
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo> Make.inc
 | 
				
			||||||
 | 
					TESTS=`ls T*.cc`
 | 
				
			||||||
 | 
					TESTLIST=`echo ${TESTS} | sed s/.cc//g `
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo > Make.inc
 | 
				
			||||||
 | 
					echo bin_PROGRAMS = ${TESTLIST} >> Make.inc
 | 
				
			||||||
 | 
					echo >> Make.inc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for f in $TESTS
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					BNAME=`basename $f .cc`
 | 
				
			||||||
 | 
					echo >> Make.inc
 | 
				
			||||||
 | 
					echo ${BNAME}_SOURCES=$f  >> Make.inc
 | 
				
			||||||
 | 
					echo ${BNAME}_LDADD=-lGrid>> Make.inc
 | 
				
			||||||
 | 
					echo >> Make.inc
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd ..
 | 
				
			||||||
cd ..
 | 
					cd ..
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_hdcr Test_gamma Test_hmc_EODWFRatio Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi Test_synthetic_lanczos
 | 
					bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_hdcr Test_dwf_lanczos Test_gamma Test_hmc_EODWFRatio Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_synthetic_lanczos Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Test_GaugeAction_SOURCES=Test_GaugeAction.cc
 | 
					Test_GaugeAction_SOURCES=Test_GaugeAction.cc
 | 
				
			||||||
@@ -98,8 +98,8 @@ Test_gparity_SOURCES=Test_gparity.cc
 | 
				
			|||||||
Test_gparity_LDADD=-lGrid
 | 
					Test_gparity_LDADD=-lGrid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc
 | 
					Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc
 | 
				
			||||||
#Test_gpwilson_even_odd_LDADD=-lGrid
 | 
					Test_gpwilson_even_odd_LDADD=-lGrid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Test_hmc_EODWFRatio_SOURCES=Test_hmc_EODWFRatio.cc
 | 
					Test_hmc_EODWFRatio_SOURCES=Test_hmc_EODWFRatio.cc
 | 
				
			||||||
@@ -225,3 +225,7 @@ Test_wilson_force_phiMdagMphi_LDADD=-lGrid
 | 
				
			|||||||
Test_wilson_force_phiMphi_SOURCES=Test_wilson_force_phiMphi.cc
 | 
					Test_wilson_force_phiMphi_SOURCES=Test_wilson_force_phiMphi.cc
 | 
				
			||||||
Test_wilson_force_phiMphi_LDADD=-lGrid
 | 
					Test_wilson_force_phiMphi_LDADD=-lGrid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Test_zmm_SOURCES=Test_zmm.cc
 | 
				
			||||||
 | 
					Test_zmm_LDADD=-lGrid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,12 @@
 | 
				
			|||||||
# additional include paths necessary to compile the C++ library
 | 
					# additional include paths necessary to compile the C++ library
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SUBDIRS = 
 | 
				
			||||||
 | 
					if BUILD_CHROMA_REGRESSION
 | 
				
			||||||
 | 
					  SUBDIRS+= qdpxx
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
AM_CXXFLAGS = -I$(top_srcdir)/lib
 | 
					AM_CXXFLAGS = -I$(top_srcdir)/lib
 | 
				
			||||||
AM_LDFLAGS = -L$(top_builddir)/lib
 | 
					AM_LDFLAGS = -L$(top_builddir)/lib
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
include Make.inc
 | 
					include Make.inc
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -66,7 +66,8 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
 | 
					  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
 | 
				
			||||||
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
					  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
 | 
					  LatticeGaugeField Umu(UGrid);
 | 
				
			||||||
 | 
					  SU3::HotConfiguration(RNG4,Umu);
 | 
				
			||||||
  std::vector<LatticeColourMatrix> U(4,UGrid);
 | 
					  std::vector<LatticeColourMatrix> U(4,UGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  RealD mass=0.1;
 | 
					  RealD mass=0.1;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -27,8 +27,8 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  const int Ls=8;
 | 
					  const int Ls=8;
 | 
				
			||||||
  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
					  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
				
			||||||
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
					 | 
				
			||||||
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
					  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
				
			||||||
 | 
					  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
				
			||||||
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
					  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::vector<int> seeds4({1,2,3,4});
 | 
					  std::vector<int> seeds4({1,2,3,4});
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user