mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Fixingn IMCI
This commit is contained in:
		
							
								
								
									
										11
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								configure.ac
									
									
									
									
									
								
							@@ -55,15 +55,6 @@ echo :::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
 | 
			
		||||
AC_CHECK_FUNCS([gettimeofday])
 | 
			
		||||
 | 
			
		||||
#AC_CHECK_LIB([gmp],[__gmpf_init],,
 | 
			
		||||
#        [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
 | 
			
		||||
#Please install or provide the correct path to your installation
 | 
			
		||||
#Info at: http://www.gmplib.org)])
 | 
			
		||||
 | 
			
		||||
#AC_CHECK_LIB([mpfr],[mpfr_init],,
 | 
			
		||||
#        [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
 | 
			
		||||
#Please install or provide the correct path to your installation
 | 
			
		||||
#Info at: http://www.mpfr.org/)])
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# SIMD instructions selection
 | 
			
		||||
@@ -124,7 +115,7 @@ case ${ac_SIMD} in
 | 
			
		||||
       echo Configuring for IMCI
 | 
			
		||||
       AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
 | 
			
		||||
       supported="cross compilation"
 | 
			
		||||
       ac_ZMM=yes;
 | 
			
		||||
       ac_ZMM=no;
 | 
			
		||||
     ;;
 | 
			
		||||
     NEONv8)
 | 
			
		||||
       echo Configuring for experimental ARMv8a support 
 | 
			
		||||
 
 | 
			
		||||
@@ -16,9 +16,13 @@
 | 
			
		||||
#define INCLUDED_ALG_REMEZ_H
 | 
			
		||||
 | 
			
		||||
#include <stddef.h>
 | 
			
		||||
#include <Config.h>
 | 
			
		||||
 | 
			
		||||
//#include <algorithms/approx/bigfloat.h>
 | 
			
		||||
#ifdef HAVE_GMP_H
 | 
			
		||||
#include <algorithms/approx/bigfloat.h>
 | 
			
		||||
#else
 | 
			
		||||
#include <algorithms/approx/bigfloat_double.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
 | 
			
		||||
#define SUM_MAX 10 // Maximum number of terms in exponential
 | 
			
		||||
 
 | 
			
		||||
@@ -27,7 +27,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#if defined(AVX512) || defined (IMCI)
 | 
			
		||||
#if defined(AVX512) 
 | 
			
		||||
//#if defined (IMCI)
 | 
			
		||||
 | 
			
		||||
#include <simd/Intel512wilson.h>
 | 
			
		||||
 
 | 
			
		||||
@@ -36,7 +36,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
#include <immintrin.h>
 | 
			
		||||
#include <zmmintrin.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
namespace Optimization {
 | 
			
		||||
  
 | 
			
		||||
  struct Vsplat{
 | 
			
		||||
@@ -316,6 +318,54 @@ namespace Optimization {
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 
 | 
			
		||||
  struct Rotate{
 | 
			
		||||
 | 
			
		||||
    static inline __m512 rotate(__m512 in,int n){ 
 | 
			
		||||
      switch(n){
 | 
			
		||||
      case 0: return tRotate<0>(in);break;
 | 
			
		||||
      case 1: return tRotate<1>(in);break;
 | 
			
		||||
      case 2: return tRotate<2>(in);break;
 | 
			
		||||
      case 3: return tRotate<3>(in);break;
 | 
			
		||||
      case 4: return tRotate<4>(in);break;
 | 
			
		||||
      case 5: return tRotate<5>(in);break;
 | 
			
		||||
      case 6: return tRotate<6>(in);break;
 | 
			
		||||
      case 7: return tRotate<7>(in);break;
 | 
			
		||||
 | 
			
		||||
      case 8 : return tRotate<8>(in);break;
 | 
			
		||||
      case 9 : return tRotate<9>(in);break;
 | 
			
		||||
      case 10: return tRotate<10>(in);break;
 | 
			
		||||
      case 11: return tRotate<11>(in);break;
 | 
			
		||||
      case 12: return tRotate<12>(in);break;
 | 
			
		||||
      case 13: return tRotate<13>(in);break;
 | 
			
		||||
      case 14: return tRotate<14>(in);break;
 | 
			
		||||
      case 15: return tRotate<15>(in);break;
 | 
			
		||||
      default: assert(0);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    static inline __m512d rotate(__m512d in,int n){ 
 | 
			
		||||
      switch(n){
 | 
			
		||||
      case 0: return tRotate<0>(in);break;
 | 
			
		||||
      case 1: return tRotate<1>(in);break;
 | 
			
		||||
      case 2: return tRotate<2>(in);break;
 | 
			
		||||
      case 3: return tRotate<3>(in);break;
 | 
			
		||||
      case 4: return tRotate<4>(in);break;
 | 
			
		||||
      case 5: return tRotate<5>(in);break;
 | 
			
		||||
      case 6: return tRotate<6>(in);break;
 | 
			
		||||
      case 7: return tRotate<7>(in);break;
 | 
			
		||||
      default: assert(0);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<int n> static inline __m512 tRotate(__m512 in){ 
 | 
			
		||||
      return (__m512)_mm512_alignr_epi32((__m512i)in,(__m512i)in,n);          
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<int n> static inline __m512d tRotate(__m512d in){ 
 | 
			
		||||
      return (__m512d)_mm512_alignr_epi32((__m512i)in,(__m512i)in,2*n);          
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////
 | 
			
		||||
@@ -358,7 +408,7 @@ namespace Optimization {
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Here assign types 
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  typedef __m512 SIMD_Ftype;  // Single precision type
 | 
			
		||||
  typedef __m512d SIMD_Dtype; // Double precision type
 | 
			
		||||
  typedef __m512i SIMD_Itype; // Integer type
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user