mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	More NEON functionalities
This commit is contained in:
		
							
								
								
									
										6
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								configure
									
									
									
									
										vendored
									
									
								
							@@ -6712,10 +6712,10 @@ $as_echo "#define AVX512 1" >>confdefs.h
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
       supported="cross compilation"
 | 
					       supported="cross compilation"
 | 
				
			||||||
     ;;
 | 
					     ;;
 | 
				
			||||||
     NEONv7)
 | 
					     NEONv8)
 | 
				
			||||||
       echo Configuring for experimental ARMv7 support
 | 
					       echo Configuring for experimental ARMv8a support
 | 
				
			||||||
 | 
					
 | 
				
			||||||
$as_echo "#define NEONv7 1" >>confdefs.h
 | 
					$as_echo "#define NEONv8 1" >>confdefs.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
       supported="cross compilation"
 | 
					       supported="cross compilation"
 | 
				
			||||||
     ;;
 | 
					     ;;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,7 +3,7 @@
 | 
				
			|||||||
#
 | 
					#
 | 
				
			||||||
# Project Grid package  
 | 
					# Project Grid package  
 | 
				
			||||||
# 
 | 
					# 
 | 
				
			||||||
# Time-stamp: <2015-06-09 15:26:39 neo>
 | 
					# Time-stamp: <2015-07-10 17:46:21 neo>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
AC_PREREQ([2.63])
 | 
					AC_PREREQ([2.63])
 | 
				
			||||||
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
 | 
					AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
 | 
				
			||||||
@@ -106,9 +106,9 @@ case ${ac_SIMD} in
 | 
				
			|||||||
       AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Corner] )
 | 
					       AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Corner] )
 | 
				
			||||||
       supported="cross compilation"
 | 
					       supported="cross compilation"
 | 
				
			||||||
     ;;
 | 
					     ;;
 | 
				
			||||||
     NEONv7)
 | 
					     NEONv8)
 | 
				
			||||||
       echo Configuring for experimental ARMv7 support 
 | 
					       echo Configuring for experimental ARMv8a support 
 | 
				
			||||||
       AC_DEFINE([NEONv7],[1],[NEON ARMv7 Experimental support ] )
 | 
					       AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] )
 | 
				
			||||||
       supported="cross compilation"
 | 
					       supported="cross compilation"
 | 
				
			||||||
     ;;
 | 
					     ;;
 | 
				
			||||||
     DEBUG)
 | 
					     DEBUG)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -119,8 +119,8 @@
 | 
				
			|||||||
/* Define to 1 if you have the <unistd.h> header file. */
 | 
					/* Define to 1 if you have the <unistd.h> header file. */
 | 
				
			||||||
#undef HAVE_UNISTD_H
 | 
					#undef HAVE_UNISTD_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* NEON ARMv7 Experimental support */
 | 
					/* NEON ARMv8 Experimental support */
 | 
				
			||||||
#undef NEONv7
 | 
					#undef NEONv8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Name of package */
 | 
					/* Name of package */
 | 
				
			||||||
#undef PACKAGE
 | 
					#undef PACKAGE
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -7,7 +7,7 @@ namespace Grid{
 | 
				
			|||||||
    ////////////////////////////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
    // Wilson Gauge Action .. should I template the Nc etc..
 | 
					    // Wilson Gauge Action .. should I template the Nc etc..
 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
    template<class GaugeField,class MatrixField>
 | 
					    template<class GaugeField, class MatrixField>
 | 
				
			||||||
      class WilsonGaugeAction : public Action<GaugeField> {
 | 
					      class WilsonGaugeAction : public Action<GaugeField> {
 | 
				
			||||||
    private:
 | 
					    private:
 | 
				
			||||||
      RealD beta;
 | 
					      RealD beta;
 | 
				
			||||||
@@ -23,7 +23,6 @@ namespace Grid{
 | 
				
			|||||||
	return beta*(1.0 -plaq)*(Nd*(Nd-1.0))*vol*0.5;
 | 
						return beta*(1.0 -plaq)*(Nd*(Nd-1.0))*vol*0.5;
 | 
				
			||||||
      };
 | 
					      };
 | 
				
			||||||
      virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
 | 
					      virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
 | 
				
			||||||
 | 
					 | 
				
			||||||
	//not optimal implementation FIXME
 | 
						//not optimal implementation FIXME
 | 
				
			||||||
	//extend Ta to include Lorentz indexes
 | 
						//extend Ta to include Lorentz indexes
 | 
				
			||||||
	RealD factor = 0.5*beta/RealD(Nc);
 | 
						RealD factor = 0.5*beta/RealD(Nc);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -7,7 +7,6 @@ namespace QCD {
 | 
				
			|||||||
template<class GaugeMat,class GaugeLorentz>
 | 
					template<class GaugeMat,class GaugeLorentz>
 | 
				
			||||||
class WilsonLoops {
 | 
					class WilsonLoops {
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
 | 
					 | 
				
			||||||
  //////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////
 | 
				
			||||||
  // directed plaquette oriented in mu,nu plane
 | 
					  // directed plaquette oriented in mu,nu plane
 | 
				
			||||||
  //////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,14 +1,16 @@
 | 
				
			|||||||
//----------------------------------------------------------------------
 | 
					//----------------------------------------------------------------------
 | 
				
			||||||
/*! @file Grid_sse4.h
 | 
					/*! @file Grid_sse4.h
 | 
				
			||||||
  @brief Optimization libraries for NEON (ARM) instructions set ARMv7
 | 
					  @brief Optimization libraries for NEON (ARM) instructions set ARMv8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Experimental - Using intrinsics - DEVELOPING! 
 | 
					  Experimental - Using intrinsics - DEVELOPING! 
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
// Time-stamp: <2015-06-09 15:25:40 neo>
 | 
					// Time-stamp: <2015-07-10 17:45:09 neo>
 | 
				
			||||||
//----------------------------------------------------------------------
 | 
					//----------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <arm_neon.h>
 | 
					#include <arm_neon.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// ARMv8 supports double precision
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Optimization {
 | 
					namespace Optimization {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  template<class vtype>
 | 
					  template<class vtype>
 | 
				
			||||||
@@ -22,50 +24,47 @@ namespace Optimization {
 | 
				
			|||||||
    float f[4];
 | 
					    float f[4];
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
  union u128d {
 | 
					  union u128d {
 | 
				
			||||||
    float32x4_t v;
 | 
					    float64x2_t v;
 | 
				
			||||||
    float f[4];
 | 
					    double f[4];
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  struct Vsplat{
 | 
					  struct Vsplat{
 | 
				
			||||||
    //Complex float
 | 
					    //Complex float
 | 
				
			||||||
    inline float32x4_t operator()(float a, float b){
 | 
					    inline float32x4_t operator()(float a, float b){
 | 
				
			||||||
      float32x4_t foo;
 | 
					      float tmp[4]={a,b,a,b};
 | 
				
			||||||
      return foo;
 | 
					      return vld1q_f32(tmp);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // Real float
 | 
					    // Real float
 | 
				
			||||||
    inline float32x4_t operator()(float a){
 | 
					    inline float32x4_t operator()(float a){
 | 
				
			||||||
      float32x4_t foo;
 | 
					      return vld1q_dup_f32(&a);
 | 
				
			||||||
      return foo;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Complex double
 | 
					    //Complex double
 | 
				
			||||||
    inline float32x4_t operator()(double a, double b){
 | 
					    inline float32x4_t operator()(double a, double b){
 | 
				
			||||||
      float32x4_t foo;
 | 
					      float tmp[4]={(float)a,(float)b,(float)a,(float)b};
 | 
				
			||||||
      return foo;
 | 
					      return vld1q_f32(tmp);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Real double
 | 
					    //Real double
 | 
				
			||||||
    inline float32x4_t operator()(double a){
 | 
					    inline float32x4_t operator()(double a){
 | 
				
			||||||
      float32x4_t foo;
 | 
					      return vld1q_dup_f32(&a);
 | 
				
			||||||
      return foo;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline uint32x4_t operator()(Integer a){
 | 
					    inline uint32x4_t operator()(Integer a){
 | 
				
			||||||
      uint32x4_t foo;
 | 
					      return vld1q_dup_u32(&a);
 | 
				
			||||||
      return foo;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  struct Vstore{
 | 
					  struct Vstore{
 | 
				
			||||||
    //Float 
 | 
					    //Float 
 | 
				
			||||||
    inline void operator()(float32x4_t a, float* F){
 | 
					    inline void operator()(float32x4_t a, float* F){
 | 
				
			||||||
      
 | 
					      vst1q_f32(F, a);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Double
 | 
					    //Double
 | 
				
			||||||
    inline void operator()(float32x4_t a, double* D){
 | 
					    inline void operator()(float32x4_t a, double* D){
 | 
				
			||||||
      
 | 
					      vst1q_f32((float*)D, a);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline void operator()(uint32x4_t a, Integer* I){
 | 
					    inline void operator()(uint32x4_t a, Integer* I){
 | 
				
			||||||
     
 | 
					      vst1q_u32(I, a);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
@@ -130,36 +129,30 @@ namespace Optimization {
 | 
				
			|||||||
  struct Sum{
 | 
					  struct Sum{
 | 
				
			||||||
    //Complex/Real float
 | 
					    //Complex/Real float
 | 
				
			||||||
    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
					    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
				
			||||||
      float32x4_t foo;
 | 
					      return vaddq_f32(a,b);
 | 
				
			||||||
      return foo;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Complex/Real double
 | 
					    //Complex/Real double
 | 
				
			||||||
    //inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
					    inline float64x2_t operator()(float64x2_t a, float64x2_t b){
 | 
				
			||||||
    //  float32x4_t foo;
 | 
					      return vaddq_f64(a,b);
 | 
				
			||||||
    //  return foo;
 | 
					    }
 | 
				
			||||||
    //}
 | 
					 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
 | 
					    inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
 | 
				
			||||||
      uint32x4_t foo;
 | 
					      return vaddq_u32(a,b);
 | 
				
			||||||
      return foo;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  struct Sub{
 | 
					  struct Sub{
 | 
				
			||||||
    //Complex/Real float
 | 
					    //Complex/Real float
 | 
				
			||||||
    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
					    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
				
			||||||
      float32x4_t foo;
 | 
					      return vsubq_f32(a,b);
 | 
				
			||||||
      return foo;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Complex/Real double
 | 
					    //Complex/Real double
 | 
				
			||||||
    //inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
					    inline float64x2_t operator()(float64x2_t a, float64x2_t b){
 | 
				
			||||||
    //  float32x4_t foo;
 | 
					      return vsubq_f64(a,b);
 | 
				
			||||||
    //  return foo;
 | 
					    }
 | 
				
			||||||
    //}
 | 
					 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
 | 
					    inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
 | 
				
			||||||
      uint32x4_t foo;
 | 
					      return vsubq_u32(a,b);
 | 
				
			||||||
      return foo;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -170,24 +163,24 @@ namespace Optimization {
 | 
				
			|||||||
      return foo;
 | 
					      return foo;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // Complex double
 | 
					    // Complex double
 | 
				
			||||||
    //inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
					    inline float64x2_t operator()(float64x2_t a, float64x2_t b){
 | 
				
			||||||
    //  float32x4_t foo;
 | 
					      float32x4_t foo;
 | 
				
			||||||
    //  return foo;
 | 
					      return foo;
 | 
				
			||||||
    //}
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  struct Mult{
 | 
					  struct Mult{
 | 
				
			||||||
    // Real float
 | 
					    // Real float
 | 
				
			||||||
    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
					    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
				
			||||||
      return a;
 | 
					      return vmulq_f32(a,b);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // Real double
 | 
					    // Real double
 | 
				
			||||||
    //inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
					    inline float64x2_t operator()(float64x2_t a, float64x2_t b){
 | 
				
			||||||
    //  return 0;
 | 
					      return vmulq_f64(a,b);
 | 
				
			||||||
    //}
 | 
					    }
 | 
				
			||||||
    // Integer
 | 
					    // Integer
 | 
				
			||||||
    inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
 | 
					    inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
 | 
				
			||||||
      return a;
 | 
					      return vmulq_u32(a,b);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -219,6 +212,7 @@ namespace Optimization {
 | 
				
			|||||||
  struct TimesI{
 | 
					  struct TimesI{
 | 
				
			||||||
    //Complex single
 | 
					    //Complex single
 | 
				
			||||||
    inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
 | 
					    inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
 | 
				
			||||||
 | 
					      //need shuffle
 | 
				
			||||||
      return in;
 | 
					      return in;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Complex double
 | 
					    //Complex double
 | 
				
			||||||
@@ -242,20 +236,25 @@ namespace Optimization {
 | 
				
			|||||||
  //Real float Reduce
 | 
					  //Real float Reduce
 | 
				
			||||||
  template<>
 | 
					  template<>
 | 
				
			||||||
  inline Grid::RealF Reduce<Grid::RealF, float32x4_t>::operator()(float32x4_t in){
 | 
					  inline Grid::RealF Reduce<Grid::RealF, float32x4_t>::operator()(float32x4_t in){
 | 
				
			||||||
    return 0;
 | 
					    float32x2_t high = vget_high_f32(in);
 | 
				
			||||||
 | 
					    float32x2_t low = vget_low_f32(in);
 | 
				
			||||||
 | 
					    float32x2_t tmp = vadd_f32(low, high);
 | 
				
			||||||
 | 
					    float32x2_t sum = vpadd_f32(tmp, tmp);
 | 
				
			||||||
 | 
					    return vget_lane_f32(sum,0);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  //Complex double Reduce
 | 
					  //Complex double Reduce
 | 
				
			||||||
  template<>
 | 
					  template<>
 | 
				
			||||||
  inline Grid::ComplexD Reduce<Grid::ComplexD, float32x4_t>::operator()(float32x4_t in){
 | 
					  inline Grid::ComplexD Reduce<Grid::ComplexD, float64x2_t>::operator()(float64x2_t in){
 | 
				
			||||||
    return 0;
 | 
					    return 0;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  //Real double Reduce
 | 
					  //Real double Reduce
 | 
				
			||||||
  template<>
 | 
					  template<>
 | 
				
			||||||
  inline Grid::RealD Reduce<Grid::RealD, float32x4_t>::operator()(float32x4_t in){
 | 
					  inline Grid::RealD Reduce<Grid::RealD, float64x2_t>::operator()(float64x2_t in){
 | 
				
			||||||
    return 0;
 | 
					    float64x2_t sum = vpaddq_f64(in, in);
 | 
				
			||||||
 | 
					    return vgetq_lane_f64(sum,0);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  //Integer Reduce
 | 
					  //Integer Reduce
 | 
				
			||||||
@@ -272,7 +271,7 @@ namespace Optimization {
 | 
				
			|||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  typedef float32x4_t  SIMD_Ftype; // Single precision type
 | 
					  typedef float32x4_t  SIMD_Ftype; // Single precision type
 | 
				
			||||||
  typedef float32x4_t  SIMD_Dtype; // Double precision type - no double on ARMv7
 | 
					  typedef float64x2_t  SIMD_Dtype; // Double precision type
 | 
				
			||||||
  typedef uint32x4_t   SIMD_Itype; // Integer type
 | 
					  typedef uint32x4_t   SIMD_Itype; // Integer type
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  inline void v_prefetch0(int size, const char *ptr){};  // prefetch utilities
 | 
					  inline void v_prefetch0(int size, const char *ptr){};  // prefetch utilities
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2,7 +2,7 @@
 | 
				
			|||||||
/*! @file Grid_vector_types.h
 | 
					/*! @file Grid_vector_types.h
 | 
				
			||||||
  @brief Defines templated class Grid_simd to deal with inner vector types
 | 
					  @brief Defines templated class Grid_simd to deal with inner vector types
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
// Time-stamp: <2015-06-09 15:00:47 neo>
 | 
					// Time-stamp: <2015-07-10 17:45:33 neo>
 | 
				
			||||||
//---------------------------------------------------------------------------
 | 
					//---------------------------------------------------------------------------
 | 
				
			||||||
#ifndef GRID_VECTOR_TYPES
 | 
					#ifndef GRID_VECTOR_TYPES
 | 
				
			||||||
#define GRID_VECTOR_TYPES
 | 
					#define GRID_VECTOR_TYPES
 | 
				
			||||||
@@ -22,7 +22,7 @@
 | 
				
			|||||||
#if defined QPX
 | 
					#if defined QPX
 | 
				
			||||||
#include "Grid_qpx.h"
 | 
					#include "Grid_qpx.h"
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#ifdef NEONv7
 | 
					#ifdef NEONv8
 | 
				
			||||||
#include "Grid_neon.h"
 | 
					#include "Grid_neon.h"
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										3
									
								
								scripts/arm_configure.experimental_cortex57
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								scripts/arm_configure.experimental_cortex57
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,3 @@
 | 
				
			|||||||
 | 
					#./configure --host=arm-linux-gnueabihf  CXX=clang++-3.5 CXXFLAGS='-std=c++11 -O3 -target arm-linux-gnueabihf -I/usr/arm-linux-gnueabihf/include/ -I/home/neo/Codes/gmp6.0/gmp-arm/include/ -I/usr/lib/llvm-3.5/lib/clang/3.5.0/include/ -L/home/neo/Codes/gmp6.0/gmp-arm/lib/ -I/home/neo/Codes/mpfr3.1.2/mpfr-arm/include/ -L/home/neo/Codes/mpfr3.1.2/mpfr-arm/lib/ -static -mcpu=cortex-a57' --enable-simd=NEONv7
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					./configure --host=aarch64-linux-gnu  CXX=clang++-3.5 CXXFLAGS='-std=c++11 -O3 -target aarch64-linux-gnu -static -I/home/neo/Codes/gmp6.0/gmp-armv8/include/ -L/home/neo/Codes/gmp6.0/gmp-armv8/lib/ -I/home/neo/Codes/mpfr3.1.2/mpfr-armv8/include/ -L/home/neo/Codes/mpfr3.1.2/mpfr-armv8/lib/ -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/4.8.2/aarch64-linux-gnu/' --enable-simd=NEONv7
 | 
				
			||||||
@@ -24,13 +24,13 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  GridCartesian           Fine(latt_size,simd_layout,mpi_layout);
 | 
					  GridCartesian           Fine(latt_size,simd_layout,mpi_layout);
 | 
				
			||||||
  GridParallelRNG  pRNG(&Fine);
 | 
					  GridParallelRNG  pRNG(&Fine);
 | 
				
			||||||
  pRNG.SeedRandomDevice();
 | 
					  pRNG.SeedRandomDevice();
 | 
				
			||||||
  LatticeLorentzColourMatrix     U(&Fine);
 | 
					  LatticeGaugeField U(&Fine);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SU3::HotConfiguration(pRNG, U);
 | 
					  SU3::HotConfiguration(pRNG, U);
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // simplify template declaration? Strip the lorentz from the second template
 | 
					  // simplify template declaration? Strip the lorentz from the second template
 | 
				
			||||||
  WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(6.0);
 | 
					  WilsonGaugeAction<LatticeGaugeField, LatticeColourMatrix> Waction(6.0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  //Collect actions
 | 
					  //Collect actions
 | 
				
			||||||
  ActionLevel Level1;
 | 
					  ActionLevel Level1;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -194,6 +194,31 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  // Insist that operations on random scalars gives
 | 
					  // Insist that operations on random scalars gives
 | 
				
			||||||
  // identical results to on vectors.
 | 
					  // identical results to on vectors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << "==================================="<<  std::endl;
 | 
				
			||||||
 | 
					  std::cout << "Testing vRealF "<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << "==================================="<<  std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Tester<RealF,vRealF>(funcPlus());
 | 
				
			||||||
 | 
					  Tester<RealF,vRealF>(funcMinus());
 | 
				
			||||||
 | 
					  Tester<RealF,vRealF>(funcTimes());
 | 
				
			||||||
 | 
					  Tester<RealF,vRealF>(funcAdj());
 | 
				
			||||||
 | 
					  Tester<RealF,vRealF>(funcConj());
 | 
				
			||||||
 | 
					  Tester<RealF,vRealF>(funcInnerProduct());
 | 
				
			||||||
 | 
					  ReductionTester<RealF,RealF,vRealF>(funcReduce());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << "==================================="<<  std::endl;
 | 
				
			||||||
 | 
					  std::cout << "Testing vRealD "<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << "==================================="<<  std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Tester<RealD,vRealD>(funcPlus());
 | 
				
			||||||
 | 
					  Tester<RealD,vRealD>(funcMinus());
 | 
				
			||||||
 | 
					  Tester<RealD,vRealD>(funcTimes());
 | 
				
			||||||
 | 
					  Tester<RealD,vRealD>(funcAdj());
 | 
				
			||||||
 | 
					  Tester<RealD,vRealD>(funcConj());
 | 
				
			||||||
 | 
					  Tester<RealD,vRealD>(funcInnerProduct());
 | 
				
			||||||
 | 
					  ReductionTester<RealD,RealD,vRealD>(funcReduce());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout << "==================================="<<  std::endl;
 | 
					  std::cout << "==================================="<<  std::endl;
 | 
				
			||||||
  std::cout << "Testing vComplexF "<<std::endl;
 | 
					  std::cout << "Testing vComplexF "<<std::endl;
 | 
				
			||||||
  std::cout << "==================================="<<  std::endl;
 | 
					  std::cout << "==================================="<<  std::endl;
 | 
				
			||||||
@@ -223,30 +248,6 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  Tester<ComplexD,vComplexD>(funcInnerProduct());
 | 
					  Tester<ComplexD,vComplexD>(funcInnerProduct());
 | 
				
			||||||
  ReductionTester<ComplexD,ComplexD,vComplexD>(funcReduce());
 | 
					  ReductionTester<ComplexD,ComplexD,vComplexD>(funcReduce());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout << "==================================="<<  std::endl;
 | 
					 | 
				
			||||||
  std::cout << "Testing vRealF "<<std::endl;
 | 
					 | 
				
			||||||
  std::cout << "==================================="<<  std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  Tester<RealF,vRealF>(funcPlus());
 | 
					 | 
				
			||||||
  Tester<RealF,vRealF>(funcMinus());
 | 
					 | 
				
			||||||
  Tester<RealF,vRealF>(funcTimes());
 | 
					 | 
				
			||||||
  Tester<RealF,vRealF>(funcAdj());
 | 
					 | 
				
			||||||
  Tester<RealF,vRealF>(funcConj());
 | 
					 | 
				
			||||||
  Tester<RealF,vRealF>(funcInnerProduct());
 | 
					 | 
				
			||||||
  ReductionTester<RealF,RealF,vRealF>(funcReduce());
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  std::cout << "==================================="<<  std::endl;
 | 
					 | 
				
			||||||
  std::cout << "Testing vRealD "<<std::endl;
 | 
					 | 
				
			||||||
  std::cout << "==================================="<<  std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  Tester<RealD,vRealD>(funcPlus());
 | 
					 | 
				
			||||||
  Tester<RealD,vRealD>(funcMinus());
 | 
					 | 
				
			||||||
  Tester<RealD,vRealD>(funcTimes());
 | 
					 | 
				
			||||||
  Tester<RealD,vRealD>(funcAdj());
 | 
					 | 
				
			||||||
  Tester<RealD,vRealD>(funcConj());
 | 
					 | 
				
			||||||
  Tester<RealD,vRealD>(funcInnerProduct());
 | 
					 | 
				
			||||||
  ReductionTester<RealD,RealD,vRealD>(funcReduce());
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Grid_finalize();
 | 
					  Grid_finalize();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user