1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Fixingn IMCI

This commit is contained in:
paboyle 2016-04-22 21:52:54 -07:00
parent e3f141f82f
commit c79ea0dcef
4 changed files with 58 additions and 13 deletions

View File

@ -55,15 +55,6 @@ echo :::::::::::::::::::::::::::::::::::::::::::
AC_CHECK_FUNCS([gettimeofday])
#AC_CHECK_LIB([gmp],[__gmpf_init],,
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
#Please install or provide the correct path to your installation
#Info at: http://www.gmplib.org)])
#AC_CHECK_LIB([mpfr],[mpfr_init],,
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
#Please install or provide the correct path to your installation
#Info at: http://www.mpfr.org/)])
#
# SIMD instructions selection
@ -124,7 +115,7 @@ case ${ac_SIMD} in
echo Configuring for IMCI
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
supported="cross compilation"
ac_ZMM=yes;
ac_ZMM=no;
;;
NEONv8)
echo Configuring for experimental ARMv8a support

View File

@ -16,9 +16,13 @@
#define INCLUDED_ALG_REMEZ_H
#include <stddef.h>
#include <Config.h>
//#include <algorithms/approx/bigfloat.h>
#ifdef HAVE_GMP_H
#include <algorithms/approx/bigfloat.h>
#else
#include <algorithms/approx/bigfloat_double.h>
#endif
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
#define SUM_MAX 10 // Maximum number of terms in exponential

View File

@ -27,7 +27,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#if defined(AVX512) || defined (IMCI)
#if defined(AVX512)
//#if defined (IMCI)
#include <simd/Intel512wilson.h>

View File

@ -36,7 +36,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
//----------------------------------------------------------------------
#include <immintrin.h>
#include <zmmintrin.h>
namespace Grid{
namespace Optimization {
struct Vsplat{
@ -316,6 +318,54 @@ namespace Optimization {
};
struct Rotate{
static inline __m512 rotate(__m512 in,int n){
switch(n){
case 0: return tRotate<0>(in);break;
case 1: return tRotate<1>(in);break;
case 2: return tRotate<2>(in);break;
case 3: return tRotate<3>(in);break;
case 4: return tRotate<4>(in);break;
case 5: return tRotate<5>(in);break;
case 6: return tRotate<6>(in);break;
case 7: return tRotate<7>(in);break;
case 8 : return tRotate<8>(in);break;
case 9 : return tRotate<9>(in);break;
case 10: return tRotate<10>(in);break;
case 11: return tRotate<11>(in);break;
case 12: return tRotate<12>(in);break;
case 13: return tRotate<13>(in);break;
case 14: return tRotate<14>(in);break;
case 15: return tRotate<15>(in);break;
default: assert(0);
}
}
static inline __m512d rotate(__m512d in,int n){
switch(n){
case 0: return tRotate<0>(in);break;
case 1: return tRotate<1>(in);break;
case 2: return tRotate<2>(in);break;
case 3: return tRotate<3>(in);break;
case 4: return tRotate<4>(in);break;
case 5: return tRotate<5>(in);break;
case 6: return tRotate<6>(in);break;
case 7: return tRotate<7>(in);break;
default: assert(0);
}
}
template<int n> static inline __m512 tRotate(__m512 in){
return (__m512)_mm512_alignr_epi32((__m512i)in,(__m512i)in,n);
};
template<int n> static inline __m512d tRotate(__m512d in){
return (__m512d)_mm512_alignr_epi32((__m512i)in,(__m512i)in,2*n);
};
};
//////////////////////////////////////////////
@ -358,7 +408,7 @@ namespace Optimization {
//////////////////////////////////////////////////////////////////////////////////////
// Here assign types
namespace Grid {
typedef __m512 SIMD_Ftype; // Single precision type
typedef __m512d SIMD_Dtype; // Double precision type
typedef __m512i SIMD_Itype; // Integer type