1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Fix for AVXFMA

This commit is contained in:
Guido Cossu 2016-10-10 14:43:37 +01:00
parent 70c32fa49b
commit b56c9ffa52
3 changed files with 13 additions and 5 deletions

View File

@ -117,7 +117,7 @@ CXXFLAGS=$CXXFLAGS_CPY
LDFLAGS=$LDFLAGS_CPY LDFLAGS=$LDFLAGS_CPY
############### SIMD instruction selection ############### SIMD instruction selection
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\ AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVXFMA|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\ [Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
[ac_SIMD=${enable_simd}],[ac_SIMD=GEN]) [ac_SIMD=${enable_simd}],[ac_SIMD=GEN])
@ -133,6 +133,9 @@ case ${ax_cv_cxx_compiler_vendor} in
AVXFMA4) AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma4';; SIMD_FLAGS='-mavx -mfma4';;
AVXFMA)
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
SIMD_FLAGS='-mavx -mfma';;
AVX2) AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-mavx2 -mfma';; SIMD_FLAGS='-mavx2 -mfma';;
@ -162,6 +165,9 @@ case ${ax_cv_cxx_compiler_vendor} in
AVXFMA4) AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma';; SIMD_FLAGS='-mavx -mfma';;
AVXFMA)
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma';;
AVX2) AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-march=core-avx2 -xcore-avx2';; SIMD_FLAGS='-march=core-avx2 -xcore-avx2';;

View File

@ -186,10 +186,10 @@ namespace Grid {
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){ inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
int Ls=Btilde._grid->_fdimensions[0]; int Ls=Btilde._grid->_fdimensions[0];
GaugeLinkField tmp(mat._grid); GaugeLinkField tmp(mat._grid);
tmp = zero; tmp = zero;
PARALLEL_FOR_LOOP
PARALLEL_FOR_LOOP
for(int sss=0;sss<tmp._grid->oSites();sss++){ for(int sss=0;sss<tmp._grid->oSites();sss++){
int sU=sss; int sU=sss;
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
@ -198,7 +198,7 @@ namespace Grid {
} }
} }
PokeIndex<LorentzIndex>(mat,tmp,mu); PokeIndex<LorentzIndex>(mat,tmp,mu);
} }
}; };

View File

@ -75,8 +75,10 @@ public:
Level1.push_back(&Waction); Level1.push_back(&Waction);
TheAction.push_back(Level1); TheAction.push_back(Level1);
NumOp.ZeroCounters();
DenOp.ZeroCounters();
Run(argc,argv); Run(argc,argv);
std::cout << GridLogMessage << "Numerator report, Pauli-Villars term : " << std::endl; std::cout << GridLogMessage << "Numerator report, Pauli-Villars term : " << std::endl;
NumOp.Report(); NumOp.Report();
std::cout << GridLogMessage << "Denominator report, Dw(m) term (includes CG) : " << std::endl; std::cout << GridLogMessage << "Denominator report, Dw(m) term (includes CG) : " << std::endl;