diff --git a/configure.ac b/configure.ac index 4da6e3d9..88d85c99 100644 --- a/configure.ac +++ b/configure.ac @@ -55,15 +55,6 @@ echo ::::::::::::::::::::::::::::::::::::::::::: AC_CHECK_FUNCS([gettimeofday]) -#AC_CHECK_LIB([gmp],[__gmpf_init],, -# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system. -#Please install or provide the correct path to your installation -#Info at: http://www.gmplib.org)]) - -#AC_CHECK_LIB([mpfr],[mpfr_init],, -# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system. -#Please install or provide the correct path to your installation -#Info at: http://www.mpfr.org/)]) # # SIMD instructions selection @@ -124,7 +115,7 @@ case ${ac_SIMD} in echo Configuring for IMCI AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] ) supported="cross compilation" - ac_ZMM=yes; + ac_ZMM=no; ;; NEONv8) echo Configuring for experimental ARMv8a support diff --git a/lib/algorithms/approx/Remez.h b/lib/algorithms/approx/Remez.h index 6e3cf05b..4a56d5d2 100644 --- a/lib/algorithms/approx/Remez.h +++ b/lib/algorithms/approx/Remez.h @@ -16,9 +16,13 @@ #define INCLUDED_ALG_REMEZ_H #include +#include -//#include +#ifdef HAVE_GMP_H +#include +#else #include +#endif #define JMAX 10000 //Maximum number of iterations of Newton's approximation #define SUM_MAX 10 // Maximum number of terms in exponential diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index 859d1a20..980a2b17 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -27,7 +27,7 @@ Author: paboyle *************************************************************************************/ /* END LEGAL */ #include -#if defined(AVX512) || defined (IMCI) +#if defined(AVX512) //#if defined (IMCI) #include diff --git a/lib/simd/Grid_imci.h b/lib/simd/Grid_imci.h index 044c5cb4..119a31a3 100644 --- a/lib/simd/Grid_imci.h +++ b/lib/simd/Grid_imci.h @@ -36,7 +36,9 @@ Author: paboyle //---------------------------------------------------------------------- #include +#include +namespace Grid{ namespace Optimization { struct Vsplat{ @@ -316,6 +318,54 @@ namespace Optimization { }; + struct Rotate{ + + static inline __m512 rotate(__m512 in,int n){ + switch(n){ + case 0: return tRotate<0>(in);break; + case 1: return tRotate<1>(in);break; + case 2: return tRotate<2>(in);break; + case 3: return tRotate<3>(in);break; + case 4: return tRotate<4>(in);break; + case 5: return tRotate<5>(in);break; + case 6: return tRotate<6>(in);break; + case 7: return tRotate<7>(in);break; + + case 8 : return tRotate<8>(in);break; + case 9 : return tRotate<9>(in);break; + case 10: return tRotate<10>(in);break; + case 11: return tRotate<11>(in);break; + case 12: return tRotate<12>(in);break; + case 13: return tRotate<13>(in);break; + case 14: return tRotate<14>(in);break; + case 15: return tRotate<15>(in);break; + default: assert(0); + } + } + static inline __m512d rotate(__m512d in,int n){ + switch(n){ + case 0: return tRotate<0>(in);break; + case 1: return tRotate<1>(in);break; + case 2: return tRotate<2>(in);break; + case 3: return tRotate<3>(in);break; + case 4: return tRotate<4>(in);break; + case 5: return tRotate<5>(in);break; + case 6: return tRotate<6>(in);break; + case 7: return tRotate<7>(in);break; + default: assert(0); + } + } + + template static inline __m512 tRotate(__m512 in){ + return (__m512)_mm512_alignr_epi32((__m512i)in,(__m512i)in,n); + }; + + template static inline __m512d tRotate(__m512d in){ + return (__m512d)_mm512_alignr_epi32((__m512i)in,(__m512i)in,2*n); + }; + + }; + ////////////////////////////////////////////// @@ -358,7 +408,7 @@ namespace Optimization { ////////////////////////////////////////////////////////////////////////////////////// // Here assign types -namespace Grid { + typedef __m512 SIMD_Ftype; // Single precision type typedef __m512d SIMD_Dtype; // Double precision type typedef __m512i SIMD_Itype; // Integer type