mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Fixingn IMCI
This commit is contained in:
parent
e3f141f82f
commit
c79ea0dcef
11
configure.ac
11
configure.ac
@ -55,15 +55,6 @@ echo :::::::::::::::::::::::::::::::::::::::::::
|
|||||||
|
|
||||||
AC_CHECK_FUNCS([gettimeofday])
|
AC_CHECK_FUNCS([gettimeofday])
|
||||||
|
|
||||||
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
|
||||||
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
|
||||||
#Please install or provide the correct path to your installation
|
|
||||||
#Info at: http://www.gmplib.org)])
|
|
||||||
|
|
||||||
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
|
||||||
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
|
||||||
#Please install or provide the correct path to your installation
|
|
||||||
#Info at: http://www.mpfr.org/)])
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# SIMD instructions selection
|
# SIMD instructions selection
|
||||||
@ -124,7 +115,7 @@ case ${ac_SIMD} in
|
|||||||
echo Configuring for IMCI
|
echo Configuring for IMCI
|
||||||
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
|
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
|
||||||
supported="cross compilation"
|
supported="cross compilation"
|
||||||
ac_ZMM=yes;
|
ac_ZMM=no;
|
||||||
;;
|
;;
|
||||||
NEONv8)
|
NEONv8)
|
||||||
echo Configuring for experimental ARMv8a support
|
echo Configuring for experimental ARMv8a support
|
||||||
|
@ -16,9 +16,13 @@
|
|||||||
#define INCLUDED_ALG_REMEZ_H
|
#define INCLUDED_ALG_REMEZ_H
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
#include <Config.h>
|
||||||
|
|
||||||
//#include <algorithms/approx/bigfloat.h>
|
#ifdef HAVE_GMP_H
|
||||||
|
#include <algorithms/approx/bigfloat.h>
|
||||||
|
#else
|
||||||
#include <algorithms/approx/bigfloat_double.h>
|
#include <algorithms/approx/bigfloat_double.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
||||||
#define SUM_MAX 10 // Maximum number of terms in exponential
|
#define SUM_MAX 10 // Maximum number of terms in exponential
|
||||||
|
@ -27,7 +27,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
#if defined(AVX512) || defined (IMCI)
|
#if defined(AVX512)
|
||||||
//#if defined (IMCI)
|
//#if defined (IMCI)
|
||||||
|
|
||||||
#include <simd/Intel512wilson.h>
|
#include <simd/Intel512wilson.h>
|
||||||
|
@ -36,7 +36,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
#include <zmmintrin.h>
|
||||||
|
|
||||||
|
namespace Grid{
|
||||||
namespace Optimization {
|
namespace Optimization {
|
||||||
|
|
||||||
struct Vsplat{
|
struct Vsplat{
|
||||||
@ -316,6 +318,54 @@ namespace Optimization {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Rotate{
|
||||||
|
|
||||||
|
static inline __m512 rotate(__m512 in,int n){
|
||||||
|
switch(n){
|
||||||
|
case 0: return tRotate<0>(in);break;
|
||||||
|
case 1: return tRotate<1>(in);break;
|
||||||
|
case 2: return tRotate<2>(in);break;
|
||||||
|
case 3: return tRotate<3>(in);break;
|
||||||
|
case 4: return tRotate<4>(in);break;
|
||||||
|
case 5: return tRotate<5>(in);break;
|
||||||
|
case 6: return tRotate<6>(in);break;
|
||||||
|
case 7: return tRotate<7>(in);break;
|
||||||
|
|
||||||
|
case 8 : return tRotate<8>(in);break;
|
||||||
|
case 9 : return tRotate<9>(in);break;
|
||||||
|
case 10: return tRotate<10>(in);break;
|
||||||
|
case 11: return tRotate<11>(in);break;
|
||||||
|
case 12: return tRotate<12>(in);break;
|
||||||
|
case 13: return tRotate<13>(in);break;
|
||||||
|
case 14: return tRotate<14>(in);break;
|
||||||
|
case 15: return tRotate<15>(in);break;
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static inline __m512d rotate(__m512d in,int n){
|
||||||
|
switch(n){
|
||||||
|
case 0: return tRotate<0>(in);break;
|
||||||
|
case 1: return tRotate<1>(in);break;
|
||||||
|
case 2: return tRotate<2>(in);break;
|
||||||
|
case 3: return tRotate<3>(in);break;
|
||||||
|
case 4: return tRotate<4>(in);break;
|
||||||
|
case 5: return tRotate<5>(in);break;
|
||||||
|
case 6: return tRotate<6>(in);break;
|
||||||
|
case 7: return tRotate<7>(in);break;
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int n> static inline __m512 tRotate(__m512 in){
|
||||||
|
return (__m512)_mm512_alignr_epi32((__m512i)in,(__m512i)in,n);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int n> static inline __m512d tRotate(__m512d in){
|
||||||
|
return (__m512d)_mm512_alignr_epi32((__m512i)in,(__m512i)in,2*n);
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
@ -358,7 +408,7 @@ namespace Optimization {
|
|||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Here assign types
|
// Here assign types
|
||||||
namespace Grid {
|
|
||||||
typedef __m512 SIMD_Ftype; // Single precision type
|
typedef __m512 SIMD_Ftype; // Single precision type
|
||||||
typedef __m512d SIMD_Dtype; // Double precision type
|
typedef __m512d SIMD_Dtype; // Double precision type
|
||||||
typedef __m512i SIMD_Itype; // Integer type
|
typedef __m512i SIMD_Itype; // Integer type
|
||||||
|
Loading…
x
Reference in New Issue
Block a user