diff --git a/configure.ac b/configure.ac index 827a15a9..b7205b4a 100644 --- a/configure.ac +++ b/configure.ac @@ -117,7 +117,7 @@ CXXFLAGS=$CXXFLAGS_CPY LDFLAGS=$LDFLAGS_CPY ############### SIMD instruction selection -AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\ +AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVXFMA|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\ [Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\ [ac_SIMD=${enable_simd}],[ac_SIMD=GEN]) @@ -133,6 +133,9 @@ case ${ax_cv_cxx_compiler_vendor} in AVXFMA4) AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) SIMD_FLAGS='-mavx -mfma4';; + AVXFMA) + AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) + SIMD_FLAGS='-mavx -mfma';; AVX2) AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) SIMD_FLAGS='-mavx2 -mfma';; @@ -162,6 +165,9 @@ case ${ax_cv_cxx_compiler_vendor} in AVXFMA4) AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) SIMD_FLAGS='-mavx -mfma';; + AVXFMA) + AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4]) + SIMD_FLAGS='-mavx -mfma';; AVX2) AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) SIMD_FLAGS='-march=core-avx2 -xcore-avx2';; diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 910bf488..4248ec4c 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -186,10 +186,10 @@ namespace Grid { inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ int Ls=Btilde._grid->_fdimensions[0]; - GaugeLinkField tmp(mat._grid); tmp = zero; - PARALLEL_FOR_LOOP + + PARALLEL_FOR_LOOP for(int sss=0;sssoSites();sss++){ int sU=sss; for(int s=0;s(mat,tmp,mu); - + } }; diff --git a/tests/hmc/Test_hmc_EODWFRatio.cc b/tests/hmc/Test_hmc_EODWFRatio.cc index 20ef7db6..02b312b8 100644 --- a/tests/hmc/Test_hmc_EODWFRatio.cc +++ b/tests/hmc/Test_hmc_EODWFRatio.cc @@ -75,8 +75,10 @@ public: Level1.push_back(&Waction); TheAction.push_back(Level1); + NumOp.ZeroCounters(); + DenOp.ZeroCounters(); Run(argc,argv); - + std::cout << GridLogMessage << "Numerator report, Pauli-Villars term : " << std::endl; NumOp.Report(); std::cout << GridLogMessage << "Denominator report, Dw(m) term (includes CG) : " << std::endl;