mirror of
https://github.com/paboyle/Grid.git
synced 2024-12-23 19:35:26 +00:00
Fixingn IMCI
This commit is contained in:
parent
e3f141f82f
commit
c79ea0dcef
11
configure.ac
11
configure.ac
@ -55,15 +55,6 @@ echo :::::::::::::::::::::::::::::::::::::::::::
|
||||
|
||||
AC_CHECK_FUNCS([gettimeofday])
|
||||
|
||||
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
||||
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
||||
#Please install or provide the correct path to your installation
|
||||
#Info at: http://www.gmplib.org)])
|
||||
|
||||
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
||||
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
||||
#Please install or provide the correct path to your installation
|
||||
#Info at: http://www.mpfr.org/)])
|
||||
|
||||
#
|
||||
# SIMD instructions selection
|
||||
@ -124,7 +115,7 @@ case ${ac_SIMD} in
|
||||
echo Configuring for IMCI
|
||||
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
|
||||
supported="cross compilation"
|
||||
ac_ZMM=yes;
|
||||
ac_ZMM=no;
|
||||
;;
|
||||
NEONv8)
|
||||
echo Configuring for experimental ARMv8a support
|
||||
|
@ -16,9 +16,13 @@
|
||||
#define INCLUDED_ALG_REMEZ_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <Config.h>
|
||||
|
||||
//#include <algorithms/approx/bigfloat.h>
|
||||
#ifdef HAVE_GMP_H
|
||||
#include <algorithms/approx/bigfloat.h>
|
||||
#else
|
||||
#include <algorithms/approx/bigfloat_double.h>
|
||||
#endif
|
||||
|
||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
||||
#define SUM_MAX 10 // Maximum number of terms in exponential
|
||||
|
@ -27,7 +27,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
#if defined(AVX512) || defined (IMCI)
|
||||
#if defined(AVX512)
|
||||
//#if defined (IMCI)
|
||||
|
||||
#include <simd/Intel512wilson.h>
|
||||
|
@ -36,7 +36,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <zmmintrin.h>
|
||||
|
||||
namespace Grid{
|
||||
namespace Optimization {
|
||||
|
||||
struct Vsplat{
|
||||
@ -316,6 +318,54 @@ namespace Optimization {
|
||||
|
||||
};
|
||||
|
||||
struct Rotate{
|
||||
|
||||
static inline __m512 rotate(__m512 in,int n){
|
||||
switch(n){
|
||||
case 0: return tRotate<0>(in);break;
|
||||
case 1: return tRotate<1>(in);break;
|
||||
case 2: return tRotate<2>(in);break;
|
||||
case 3: return tRotate<3>(in);break;
|
||||
case 4: return tRotate<4>(in);break;
|
||||
case 5: return tRotate<5>(in);break;
|
||||
case 6: return tRotate<6>(in);break;
|
||||
case 7: return tRotate<7>(in);break;
|
||||
|
||||
case 8 : return tRotate<8>(in);break;
|
||||
case 9 : return tRotate<9>(in);break;
|
||||
case 10: return tRotate<10>(in);break;
|
||||
case 11: return tRotate<11>(in);break;
|
||||
case 12: return tRotate<12>(in);break;
|
||||
case 13: return tRotate<13>(in);break;
|
||||
case 14: return tRotate<14>(in);break;
|
||||
case 15: return tRotate<15>(in);break;
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
static inline __m512d rotate(__m512d in,int n){
|
||||
switch(n){
|
||||
case 0: return tRotate<0>(in);break;
|
||||
case 1: return tRotate<1>(in);break;
|
||||
case 2: return tRotate<2>(in);break;
|
||||
case 3: return tRotate<3>(in);break;
|
||||
case 4: return tRotate<4>(in);break;
|
||||
case 5: return tRotate<5>(in);break;
|
||||
case 6: return tRotate<6>(in);break;
|
||||
case 7: return tRotate<7>(in);break;
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
template<int n> static inline __m512 tRotate(__m512 in){
|
||||
return (__m512)_mm512_alignr_epi32((__m512i)in,(__m512i)in,n);
|
||||
};
|
||||
|
||||
template<int n> static inline __m512d tRotate(__m512d in){
|
||||
return (__m512d)_mm512_alignr_epi32((__m512i)in,(__m512i)in,2*n);
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////
|
||||
@ -358,7 +408,7 @@ namespace Optimization {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Here assign types
|
||||
namespace Grid {
|
||||
|
||||
typedef __m512 SIMD_Ftype; // Single precision type
|
||||
typedef __m512d SIMD_Dtype; // Double precision type
|
||||
typedef __m512i SIMD_Itype; // Integer type
|
||||
|
Loading…
Reference in New Issue
Block a user