mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Merge branch 'develop' into feature/hmc_generalise
This commit is contained in:
		@@ -5,8 +5,10 @@
 | 
				
			|||||||
    Source file: ./lib/simd/Grid_generic.h
 | 
					    Source file: ./lib/simd/Grid_generic.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Copyright (C) 2015
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					    Copyright (C) 2017
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Author: Antonin Portelli <antonin.portelli@me.com>
 | 
					Author: Antonin Portelli <antonin.portelli@me.com>
 | 
				
			||||||
 | 
					        Andrew Lawson    <andrew.lawson1991@gmail.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    This program is free software; you can redistribute it and/or modify
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
    it under the terms of the GNU General Public License as published by
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
@@ -26,51 +28,10 @@ Author: Antonin Portelli <antonin.portelli@me.com>
 | 
				
			|||||||
    *************************************************************************************/
 | 
					    *************************************************************************************/
 | 
				
			||||||
    /*  END LEGAL */
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static_assert(GEN_SIMD_WIDTH % 16u == 0, "SIMD vector size is not an integer multiple of 16 bytes");
 | 
					#include "Grid_generic_types.h"
 | 
				
			||||||
 | 
					 | 
				
			||||||
//#define VECTOR_LOOPS
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// playing with compiler pragmas
 | 
					 | 
				
			||||||
#ifdef VECTOR_LOOPS
 | 
					 | 
				
			||||||
#ifdef __clang__
 | 
					 | 
				
			||||||
#define VECTOR_FOR(i, w, inc)\
 | 
					 | 
				
			||||||
_Pragma("clang loop unroll(full) vectorize(enable) interleave(enable) vectorize_width(w)")\
 | 
					 | 
				
			||||||
for (unsigned int i = 0; i < w; i += inc)
 | 
					 | 
				
			||||||
#elif defined __INTEL_COMPILER
 | 
					 | 
				
			||||||
#define VECTOR_FOR(i, w, inc)\
 | 
					 | 
				
			||||||
_Pragma("simd vectorlength(w*8)")\
 | 
					 | 
				
			||||||
for (unsigned int i = 0; i < w; i += inc)
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
#define VECTOR_FOR(i, w, inc)\
 | 
					 | 
				
			||||||
for (unsigned int i = 0; i < w; i += inc)
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
#define VECTOR_FOR(i, w, inc)\
 | 
					 | 
				
			||||||
for (unsigned int i = 0; i < w; i += inc)
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
namespace Optimization {
 | 
					namespace Optimization {
 | 
				
			||||||
 | 
					 | 
				
			||||||
  // type traits giving the number of elements for each vector type
 | 
					 | 
				
			||||||
  template <typename T> struct W;
 | 
					 | 
				
			||||||
  template <> struct W<double> {
 | 
					 | 
				
			||||||
    constexpr static unsigned int c = GEN_SIMD_WIDTH/16u;
 | 
					 | 
				
			||||||
    constexpr static unsigned int r = GEN_SIMD_WIDTH/8u;
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
  template <> struct W<float> {
 | 
					 | 
				
			||||||
    constexpr static unsigned int c = GEN_SIMD_WIDTH/8u;
 | 
					 | 
				
			||||||
    constexpr static unsigned int r = GEN_SIMD_WIDTH/4u;
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  // SIMD vector types
 | 
					 | 
				
			||||||
  template <typename T>
 | 
					 | 
				
			||||||
  struct vec {
 | 
					 | 
				
			||||||
    alignas(GEN_SIMD_WIDTH) T v[W<T>::r];
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  typedef vec<float>   vecf;
 | 
					 | 
				
			||||||
  typedef vec<double>  vecd;
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  struct Vsplat{
 | 
					  struct Vsplat{
 | 
				
			||||||
    // Complex
 | 
					    // Complex
 | 
				
			||||||
@@ -99,11 +60,6 @@ namespace Optimization {
 | 
				
			|||||||
      
 | 
					      
 | 
				
			||||||
      return out;
 | 
					      return out;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    // Integer
 | 
					 | 
				
			||||||
    inline int operator()(Integer a){
 | 
					 | 
				
			||||||
      return a;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  struct Vstore{
 | 
					  struct Vstore{
 | 
				
			||||||
@@ -112,11 +68,6 @@ namespace Optimization {
 | 
				
			|||||||
    inline void operator()(vec<T> a, T *D){
 | 
					    inline void operator()(vec<T> a, T *D){
 | 
				
			||||||
      *((vec<T> *)D) = a;
 | 
					      *((vec<T> *)D) = a;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Integer
 | 
					 | 
				
			||||||
    inline void operator()(int a, Integer *I){
 | 
					 | 
				
			||||||
      *I = a;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  struct Vstream{
 | 
					  struct Vstream{
 | 
				
			||||||
@@ -151,11 +102,6 @@ namespace Optimization {
 | 
				
			|||||||
      
 | 
					      
 | 
				
			||||||
      return out;
 | 
					      return out;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Integer
 | 
					 | 
				
			||||||
    inline int operator()(Integer *a){
 | 
					 | 
				
			||||||
      return *a;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /////////////////////////////////////////////////////
 | 
					  /////////////////////////////////////////////////////
 | 
				
			||||||
@@ -174,11 +120,6 @@ namespace Optimization {
 | 
				
			|||||||
      
 | 
					      
 | 
				
			||||||
      return out;
 | 
					      return out;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    //I nteger
 | 
					 | 
				
			||||||
    inline int operator()(int a, int b){
 | 
					 | 
				
			||||||
      return a + b;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  struct Sub{
 | 
					  struct Sub{
 | 
				
			||||||
@@ -194,11 +135,6 @@ namespace Optimization {
 | 
				
			|||||||
      
 | 
					      
 | 
				
			||||||
      return out;
 | 
					      return out;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    //Integer
 | 
					 | 
				
			||||||
    inline int operator()(int a, int b){
 | 
					 | 
				
			||||||
      return a-b;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  struct Mult{
 | 
					  struct Mult{
 | 
				
			||||||
@@ -214,11 +150,6 @@ namespace Optimization {
 | 
				
			|||||||
      
 | 
					      
 | 
				
			||||||
      return out;
 | 
					      return out;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    // Integer
 | 
					 | 
				
			||||||
    inline int operator()(int a, int b){
 | 
					 | 
				
			||||||
      return a*b;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  #define cmul(a, b, c, i)\
 | 
					  #define cmul(a, b, c, i)\
 | 
				
			||||||
@@ -232,13 +163,26 @@ namespace Optimization {
 | 
				
			|||||||
      
 | 
					      
 | 
				
			||||||
      VECTOR_FOR(i, W<T>::c, 1)
 | 
					      VECTOR_FOR(i, W<T>::c, 1)
 | 
				
			||||||
      {
 | 
					      {
 | 
				
			||||||
         out.v[2*i]   = a[2*i]*b[2*i];
 | 
					         out.v[2*i]   = a.v[2*i]*b.v[2*i];
 | 
				
			||||||
         out.v[2*i+1] = a[2*i]*b[2*i+1];
 | 
					         out.v[2*i+1] = a.v[2*i]*b.v[2*i+1];
 | 
				
			||||||
      }      
 | 
					      }      
 | 
				
			||||||
      return out;
 | 
					      return out;
 | 
				
			||||||
    };
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  struct MaddRealPart{
 | 
				
			||||||
 | 
					    template <typename T>
 | 
				
			||||||
 | 
					    inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
 | 
				
			||||||
 | 
					      vec<T> out;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      VECTOR_FOR(i, W<T>::c, 1)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					         out.v[2*i]   = a.v[2*i]*b.v[2*i] + c.v[2*i];
 | 
				
			||||||
 | 
					         out.v[2*i+1] = a.v[2*i]*b.v[2*i+1] + c.v[2*i+1];
 | 
				
			||||||
 | 
					      }      
 | 
				
			||||||
 | 
					      return out;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  struct MultComplex{
 | 
					  struct MultComplex{
 | 
				
			||||||
    // Complex
 | 
					    // Complex
 | 
				
			||||||
@@ -369,6 +313,11 @@ namespace Optimization {
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  struct Rotate{
 | 
					  struct Rotate{
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					    template <int n, typename T> static inline vec<T> tRotate(vec<T> in){
 | 
				
			||||||
 | 
					      return rotate(in, n);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    template <typename T>
 | 
					    template <typename T>
 | 
				
			||||||
    static inline vec<T> rotate(vec<T> in, int n){
 | 
					    static inline vec<T> rotate(vec<T> in, int n){
 | 
				
			||||||
      vec<T> out;
 | 
					      vec<T> out;
 | 
				
			||||||
@@ -442,8 +391,12 @@ namespace Optimization {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  //Integer Reduce
 | 
					  //Integer Reduce
 | 
				
			||||||
  template<>
 | 
					  template<>
 | 
				
			||||||
  inline Integer Reduce<Integer, int>::operator()(int in){
 | 
					  inline Integer Reduce<Integer, veci>::operator()(veci in){
 | 
				
			||||||
    return in;
 | 
					    Integer a = 0;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    acc(in.v, a, 0, 1, W<Integer>::r);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return a;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -452,7 +405,7 @@ namespace Optimization {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  typedef Optimization::vecf SIMD_Ftype; // Single precision type
 | 
					  typedef Optimization::vecf SIMD_Ftype; // Single precision type
 | 
				
			||||||
  typedef Optimization::vecd SIMD_Dtype; // Double precision type
 | 
					  typedef Optimization::vecd SIMD_Dtype; // Double precision type
 | 
				
			||||||
  typedef int SIMD_Itype; // Integer type
 | 
					  typedef Optimization::veci SIMD_Itype; // Integer type
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // prefetch utilities
 | 
					  // prefetch utilities
 | 
				
			||||||
  inline void v_prefetch0(int size, const char *ptr){};
 | 
					  inline void v_prefetch0(int size, const char *ptr){};
 | 
				
			||||||
@@ -472,6 +425,7 @@ namespace Optimization {
 | 
				
			|||||||
  typedef Optimization::Mult        MultSIMD;
 | 
					  typedef Optimization::Mult        MultSIMD;
 | 
				
			||||||
  typedef Optimization::MultComplex MultComplexSIMD;
 | 
					  typedef Optimization::MultComplex MultComplexSIMD;
 | 
				
			||||||
  typedef Optimization::MultRealPart MultRealPartSIMD;
 | 
					  typedef Optimization::MultRealPart MultRealPartSIMD;
 | 
				
			||||||
 | 
					  typedef Optimization::MaddRealPart MaddRealPartSIMD;
 | 
				
			||||||
  typedef Optimization::Conj        ConjSIMD;
 | 
					  typedef Optimization::Conj        ConjSIMD;
 | 
				
			||||||
  typedef Optimization::TimesMinusI TimesMinusISIMD;
 | 
					  typedef Optimization::TimesMinusI TimesMinusISIMD;
 | 
				
			||||||
  typedef Optimization::TimesI      TimesISIMD;
 | 
					  typedef Optimization::TimesI      TimesISIMD;
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										80
									
								
								lib/simd/Grid_generic_types.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								lib/simd/Grid_generic_types.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,80 @@
 | 
				
			|||||||
 | 
					    /*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./lib/simd/Grid_generic_types.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2017
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Antonin Portelli <antonin.portelli@me.com>
 | 
				
			||||||
 | 
					        Andrew Lawson    <andrew.lawson1991@gmail.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static_assert(GEN_SIMD_WIDTH % 16u == 0, "SIMD vector size is not an integer multiple of 16 bytes");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//#define VECTOR_LOOPS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// playing with compiler pragmas
 | 
				
			||||||
 | 
					#ifdef VECTOR_LOOPS
 | 
				
			||||||
 | 
					#ifdef __clang__
 | 
				
			||||||
 | 
					#define VECTOR_FOR(i, w, inc)\
 | 
				
			||||||
 | 
					_Pragma("clang loop unroll(full) vectorize(enable) interleave(enable) vectorize_width(w)")\
 | 
				
			||||||
 | 
					for (unsigned int i = 0; i < w; i += inc)
 | 
				
			||||||
 | 
					#elif defined __INTEL_COMPILER
 | 
				
			||||||
 | 
					#define VECTOR_FOR(i, w, inc)\
 | 
				
			||||||
 | 
					_Pragma("simd vectorlength(w*8)")\
 | 
				
			||||||
 | 
					for (unsigned int i = 0; i < w; i += inc)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define VECTOR_FOR(i, w, inc)\
 | 
				
			||||||
 | 
					for (unsigned int i = 0; i < w; i += inc)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define VECTOR_FOR(i, w, inc)\
 | 
				
			||||||
 | 
					for (unsigned int i = 0; i < w; i += inc)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					namespace Optimization {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // type traits giving the number of elements for each vector type
 | 
				
			||||||
 | 
					  template <typename T> struct W;
 | 
				
			||||||
 | 
					  template <> struct W<double> {
 | 
				
			||||||
 | 
					    constexpr static unsigned int c = GEN_SIMD_WIDTH/16u;
 | 
				
			||||||
 | 
					    constexpr static unsigned int r = GEN_SIMD_WIDTH/8u;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  template <> struct W<float> {
 | 
				
			||||||
 | 
					    constexpr static unsigned int c = GEN_SIMD_WIDTH/8u;
 | 
				
			||||||
 | 
					    constexpr static unsigned int r = GEN_SIMD_WIDTH/4u;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  template <> struct W<Integer> {
 | 
				
			||||||
 | 
					    constexpr static unsigned int r = GEN_SIMD_WIDTH/4u;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // SIMD vector types
 | 
				
			||||||
 | 
					  template <typename T>
 | 
				
			||||||
 | 
					  struct vec {
 | 
				
			||||||
 | 
					    alignas(GEN_SIMD_WIDTH) T v[W<T>::r];
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  typedef vec<float>   vecf;
 | 
				
			||||||
 | 
					  typedef vec<double>  vecd;
 | 
				
			||||||
 | 
					  typedef vec<Integer> veci;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					}}
 | 
				
			||||||
@@ -5,8 +5,10 @@
 | 
				
			|||||||
 Source file: ./lib/simd/Grid_qpx.h
 | 
					 Source file: ./lib/simd/Grid_qpx.h
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
 Copyright (C) 2016
 | 
					 Copyright (C) 2016
 | 
				
			||||||
 | 
					 Copyright (C) 2017
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
 Author: Antonin Portelli <antonin.portelli@me.com>
 | 
					 Author: Antonin Portelli <antonin.portelli@me.com>
 | 
				
			||||||
 | 
					         Andrew Lawson    <andrew.lawson1991@gmail.com>
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
 This program is free software; you can redistribute it and/or modify
 | 
					 This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 it under the terms of the GNU General Public License as published by
 | 
					 it under the terms of the GNU General Public License as published by
 | 
				
			||||||
@@ -25,6 +27,11 @@
 | 
				
			|||||||
 See the full license in the file "LICENSE" in the top level distribution directory
 | 
					 See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 ******************************************************************************/
 | 
					 ******************************************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef GEN_SIMD_WIDTH
 | 
				
			||||||
 | 
					#define GEN_SIMD_WIDTH 32u
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#include "Grid_generic_types.h" // Definitions for simulated integer SIMD.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
namespace Optimization {
 | 
					namespace Optimization {
 | 
				
			||||||
  typedef struct 
 | 
					  typedef struct 
 | 
				
			||||||
@@ -62,8 +69,15 @@ namespace Optimization {
 | 
				
			|||||||
      return (vector4double){a, a, a, a};
 | 
					      return (vector4double){a, a, a, a};
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline int operator()(Integer a){
 | 
					    inline veci operator()(Integer a){
 | 
				
			||||||
      return a;
 | 
					      veci out;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      VECTOR_FOR(i, W<Integer>::r, 1)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        out.v[i] = a;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      return out;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
@@ -88,9 +102,10 @@ namespace Optimization {
 | 
				
			|||||||
    inline void operator()(vector4double a, double *d){
 | 
					    inline void operator()(vector4double a, double *d){
 | 
				
			||||||
      vec_st(a, 0, d);
 | 
					      vec_st(a, 0, d);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline void operator()(int a, Integer *i){
 | 
					    inline void operator()(veci a, Integer *i){
 | 
				
			||||||
      i[0] = a;
 | 
					      *((veci *)i) = a;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
@@ -142,11 +157,13 @@ namespace Optimization {
 | 
				
			|||||||
      return vec_ld(0, a);
 | 
					      return vec_ld(0, a);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // Integer
 | 
					    // Integer
 | 
				
			||||||
    inline int operator()(Integer *a){
 | 
					    inline veci operator()(Integer *a){
 | 
				
			||||||
      return a[0];
 | 
					      veci out;
 | 
				
			||||||
    }
 | 
					      
 | 
				
			||||||
    
 | 
					      out = *((veci *)a);
 | 
				
			||||||
    
 | 
					      
 | 
				
			||||||
 | 
					      return out;
 | 
				
			||||||
 | 
					    }    
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  template <typename Out_type, typename In_type>
 | 
					  template <typename Out_type, typename In_type>
 | 
				
			||||||
@@ -200,8 +217,15 @@ namespace Optimization {
 | 
				
			|||||||
    FLOAT_WRAP_2(operator(), inline)
 | 
					    FLOAT_WRAP_2(operator(), inline)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline int operator()(int a, int b){
 | 
					    inline veci operator()(veci a, veci b){
 | 
				
			||||||
      return a + b;
 | 
					      veci out;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      VECTOR_FOR(i, W<Integer>::r, 1)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        out.v[i] = a.v[i] + b.v[i];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      return out;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
@@ -215,8 +239,15 @@ namespace Optimization {
 | 
				
			|||||||
    FLOAT_WRAP_2(operator(), inline)
 | 
					    FLOAT_WRAP_2(operator(), inline)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline int operator()(int a, int b){
 | 
					    inline veci operator()(veci a, veci b){
 | 
				
			||||||
      return a - b;
 | 
					      veci out;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      VECTOR_FOR(i, W<Integer>::r, 1)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        out.v[i] = a.v[i] - b.v[i];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      return out;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
@@ -248,8 +279,15 @@ namespace Optimization {
 | 
				
			|||||||
    FLOAT_WRAP_2(operator(), inline)
 | 
					    FLOAT_WRAP_2(operator(), inline)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Integer
 | 
					    // Integer
 | 
				
			||||||
    inline int operator()(int a, int b){
 | 
					    inline veci operator()(veci a, veci b){
 | 
				
			||||||
      return a*b;
 | 
					      veci out;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      VECTOR_FOR(i, W<Integer>::r, 1)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        out.v[i] = a.v[i]*b.v[i];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      return out;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -263,8 +301,15 @@ namespace Optimization {
 | 
				
			|||||||
    FLOAT_WRAP_2(operator(), inline)
 | 
					    FLOAT_WRAP_2(operator(), inline)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Integer
 | 
					    // Integer
 | 
				
			||||||
    inline int operator()(int a, int b){
 | 
					    inline veci operator()(veci a, veci b){
 | 
				
			||||||
      return a/b;
 | 
					      veci out;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      VECTOR_FOR(i, W<Integer>::r, 1)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        out.v[i] = a.v[i]/b.v[i];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      return out;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -418,7 +463,7 @@ namespace Optimization {
 | 
				
			|||||||
// Here assign types
 | 
					// Here assign types
 | 
				
			||||||
typedef Optimization::vector4float SIMD_Ftype;  // Single precision type
 | 
					typedef Optimization::vector4float SIMD_Ftype;  // Single precision type
 | 
				
			||||||
typedef vector4double              SIMD_Dtype; // Double precision type
 | 
					typedef vector4double              SIMD_Dtype; // Double precision type
 | 
				
			||||||
typedef int                        SIMD_Itype; // Integer type
 | 
					typedef Optimization::veci         SIMD_Itype; // Integer type
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// prefetch utilities
 | 
					// prefetch utilities
 | 
				
			||||||
inline void v_prefetch0(int size, const char *ptr){};
 | 
					inline void v_prefetch0(int size, const char *ptr){};
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -760,6 +760,15 @@ typedef Grid_simd<std::complex<float>, SIMD_Ftype> vComplexF;
 | 
				
			|||||||
typedef Grid_simd<std::complex<double>, SIMD_Dtype> vComplexD;
 | 
					typedef Grid_simd<std::complex<double>, SIMD_Dtype> vComplexD;
 | 
				
			||||||
typedef Grid_simd<Integer, SIMD_Itype> vInteger;
 | 
					typedef Grid_simd<Integer, SIMD_Itype> vInteger;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Check our vector types are of an appropriate size.
 | 
				
			||||||
 | 
					#if defined QPX
 | 
				
			||||||
 | 
					static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect");
 | 
				
			||||||
 | 
					static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths incorrect");
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect");
 | 
				
			||||||
 | 
					static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths incorrect");
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/////////////////////////////////////////
 | 
					/////////////////////////////////////////
 | 
				
			||||||
// Some traits to recognise the types
 | 
					// Some traits to recognise the types
 | 
				
			||||||
/////////////////////////////////////////
 | 
					/////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -180,6 +180,65 @@ void Tester(const functor &func)
 | 
				
			|||||||
  assert(ok==0);
 | 
					  assert(ok==0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class functor>
 | 
				
			||||||
 | 
					void IntTester(const functor &func)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  typedef Integer  scal;
 | 
				
			||||||
 | 
					  typedef vInteger vec;
 | 
				
			||||||
 | 
					  GridSerialRNG          sRNG;
 | 
				
			||||||
 | 
					  sRNG.SeedRandomDevice();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int Nsimd = vec::Nsimd();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<scal> input1(Nsimd);
 | 
				
			||||||
 | 
					  std::vector<scal> input2(Nsimd);
 | 
				
			||||||
 | 
					  std::vector<scal> result(Nsimd);
 | 
				
			||||||
 | 
					  std::vector<scal> reference(Nsimd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<vec,alignedAllocator<vec> > buf(3);
 | 
				
			||||||
 | 
					  vec & v_input1 = buf[0];
 | 
				
			||||||
 | 
					  vec & v_input2 = buf[1];
 | 
				
			||||||
 | 
					  vec & v_result = buf[2];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int i=0;i<Nsimd;i++){
 | 
				
			||||||
 | 
					    input1[i] = (i + 1) * 30;
 | 
				
			||||||
 | 
					    input2[i] = (i + 1) * 20;
 | 
				
			||||||
 | 
					    result[i] = (i + 1) * 10;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  merge<vec,scal>(v_input1,input1);
 | 
				
			||||||
 | 
					  merge<vec,scal>(v_input2,input2);
 | 
				
			||||||
 | 
					  merge<vec,scal>(v_result,result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  func(v_result,v_input1,v_input2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int i=0;i<Nsimd;i++) {
 | 
				
			||||||
 | 
					    func(reference[i],input1[i],input2[i]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  extract<vec,scal>(v_result,result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " " << func.name() << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogDebug << v_input1 << std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogDebug << v_input2 << std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogDebug << v_result << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int ok=0;
 | 
				
			||||||
 | 
					  for(int i=0;i<Nsimd;i++){
 | 
				
			||||||
 | 
					    if ( reference[i]-result[i] != 0){
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage<< "*****" << std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage<< "["<<i<<"] "<< reference[i]-result[i] << " " <<reference[i]<< " " << result[i]<<std::endl;
 | 
				
			||||||
 | 
					      ok++;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if ( ok==0 ) {
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << " OK!" <<std::endl;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  assert(ok==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class reduced,class scal, class vec,class functor > 
 | 
					template<class reduced,class scal, class vec,class functor > 
 | 
				
			||||||
void ReductionTester(const functor &func)
 | 
					void ReductionTester(const functor &func)
 | 
				
			||||||
@@ -472,6 +531,13 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  for(int r=0;r<vComplexD::Nsimd();r++){
 | 
					  for(int r=0;r<vComplexD::Nsimd();r++){
 | 
				
			||||||
    PermTester<ComplexD,vComplexD>(funcRotate(r));
 | 
					    PermTester<ComplexD,vComplexD>(funcRotate(r));
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Testing vInteger                   "<<  std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
				
			||||||
 | 
					  IntTester(funcPlus());
 | 
				
			||||||
 | 
					  IntTester(funcMinus());
 | 
				
			||||||
 | 
					  IntTester(funcTimes());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Grid_finalize();
 | 
					  Grid_finalize();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user