mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-30 19:44:32 +00:00 
			
		
		
		
	Merge pull request #93 from Lanny91/hotfix/qpx
Some fixes for QPX and generic SIMD types.
This commit is contained in:
		| @@ -5,8 +5,10 @@ | |||||||
|     Source file: ./lib/simd/Grid_generic.h |     Source file: ./lib/simd/Grid_generic.h | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |     Copyright (C) 2017 | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |         Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
| @@ -26,52 +28,11 @@ Author: Antonin Portelli <antonin.portelli@me.com> | |||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
|  |  | ||||||
| static_assert(GEN_SIMD_WIDTH % 16u == 0, "SIMD vector size is not an integer multiple of 16 bytes"); | #include "Grid_generic_types.h" | ||||||
|  |  | ||||||
| //#define VECTOR_LOOPS |  | ||||||
|  |  | ||||||
| // playing with compiler pragmas |  | ||||||
| #ifdef VECTOR_LOOPS |  | ||||||
| #ifdef __clang__ |  | ||||||
| #define VECTOR_FOR(i, w, inc)\ |  | ||||||
| _Pragma("clang loop unroll(full) vectorize(enable) interleave(enable) vectorize_width(w)")\ |  | ||||||
| for (unsigned int i = 0; i < w; i += inc) |  | ||||||
| #elif defined __INTEL_COMPILER |  | ||||||
| #define VECTOR_FOR(i, w, inc)\ |  | ||||||
| _Pragma("simd vectorlength(w*8)")\ |  | ||||||
| for (unsigned int i = 0; i < w; i += inc) |  | ||||||
| #else |  | ||||||
| #define VECTOR_FOR(i, w, inc)\ |  | ||||||
| for (unsigned int i = 0; i < w; i += inc) |  | ||||||
| #endif |  | ||||||
| #else |  | ||||||
| #define VECTOR_FOR(i, w, inc)\ |  | ||||||
| for (unsigned int i = 0; i < w; i += inc) |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
| namespace Optimization { | namespace Optimization { | ||||||
|    |    | ||||||
|   // type traits giving the number of elements for each vector type |  | ||||||
|   template <typename T> struct W; |  | ||||||
|   template <> struct W<double> { |  | ||||||
|     constexpr static unsigned int c = GEN_SIMD_WIDTH/16u; |  | ||||||
|     constexpr static unsigned int r = GEN_SIMD_WIDTH/8u; |  | ||||||
|   }; |  | ||||||
|   template <> struct W<float> { |  | ||||||
|     constexpr static unsigned int c = GEN_SIMD_WIDTH/8u; |  | ||||||
|     constexpr static unsigned int r = GEN_SIMD_WIDTH/4u; |  | ||||||
|   }; |  | ||||||
|    |  | ||||||
|   // SIMD vector types |  | ||||||
|   template <typename T> |  | ||||||
|   struct vec { |  | ||||||
|     alignas(GEN_SIMD_WIDTH) T v[W<T>::r]; |  | ||||||
|   }; |  | ||||||
|    |  | ||||||
|   typedef vec<float>   vecf; |  | ||||||
|   typedef vec<double>  vecd; |  | ||||||
|    |  | ||||||
|   struct Vsplat{ |   struct Vsplat{ | ||||||
|     // Complex |     // Complex | ||||||
|     template <typename T> |     template <typename T> | ||||||
| @@ -99,11 +60,6 @@ namespace Optimization { | |||||||
|        |        | ||||||
|       return out; |       return out; | ||||||
|     } |     } | ||||||
|      |  | ||||||
|     // Integer |  | ||||||
|     inline int operator()(Integer a){ |  | ||||||
|       return a; |  | ||||||
|     } |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   struct Vstore{ |   struct Vstore{ | ||||||
| @@ -112,11 +68,6 @@ namespace Optimization { | |||||||
|     inline void operator()(vec<T> a, T *D){ |     inline void operator()(vec<T> a, T *D){ | ||||||
|       *((vec<T> *)D) = a; |       *((vec<T> *)D) = a; | ||||||
|     } |     } | ||||||
|     //Integer |  | ||||||
|     inline void operator()(int a, Integer *I){ |  | ||||||
|       *I = a; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   struct Vstream{ |   struct Vstream{ | ||||||
| @@ -151,11 +102,6 @@ namespace Optimization { | |||||||
|        |        | ||||||
|       return out; |       return out; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // Integer |  | ||||||
|     inline int operator()(Integer *a){ |  | ||||||
|       return *a; |  | ||||||
|     } |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   ///////////////////////////////////////////////////// |   ///////////////////////////////////////////////////// | ||||||
| @@ -174,11 +120,6 @@ namespace Optimization { | |||||||
|        |        | ||||||
|       return out; |       return out; | ||||||
|     } |     } | ||||||
|      |  | ||||||
|     //I nteger |  | ||||||
|     inline int operator()(int a, int b){ |  | ||||||
|       return a + b; |  | ||||||
|     } |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   struct Sub{ |   struct Sub{ | ||||||
| @@ -194,11 +135,6 @@ namespace Optimization { | |||||||
|        |        | ||||||
|       return out; |       return out; | ||||||
|     } |     } | ||||||
|      |  | ||||||
|     //Integer |  | ||||||
|     inline int operator()(int a, int b){ |  | ||||||
|       return a-b; |  | ||||||
|     } |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   struct Mult{ |   struct Mult{ | ||||||
| @@ -214,11 +150,6 @@ namespace Optimization { | |||||||
|        |        | ||||||
|       return out; |       return out; | ||||||
|     } |     } | ||||||
|      |  | ||||||
|     // Integer |  | ||||||
|     inline int operator()(int a, int b){ |  | ||||||
|       return a*b; |  | ||||||
|     } |  | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|   #define cmul(a, b, c, i)\ |   #define cmul(a, b, c, i)\ | ||||||
| @@ -232,13 +163,26 @@ namespace Optimization { | |||||||
|        |        | ||||||
|       VECTOR_FOR(i, W<T>::c, 1) |       VECTOR_FOR(i, W<T>::c, 1) | ||||||
|       { |       { | ||||||
|          out.v[2*i]   = a[2*i]*b[2*i]; |          out.v[2*i]   = a.v[2*i]*b.v[2*i]; | ||||||
|          out.v[2*i+1] = a[2*i]*b[2*i+1]; |          out.v[2*i+1] = a.v[2*i]*b.v[2*i+1]; | ||||||
|       }       |       }       | ||||||
|       return out; |       return out; | ||||||
|     }; |     } | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|  |   struct MaddRealPart{ | ||||||
|  |     template <typename T> | ||||||
|  |     inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){ | ||||||
|  |       vec<T> out; | ||||||
|  |        | ||||||
|  |       VECTOR_FOR(i, W<T>::c, 1) | ||||||
|  |       { | ||||||
|  |          out.v[2*i]   = a.v[2*i]*b.v[2*i] + c.v[2*i]; | ||||||
|  |          out.v[2*i+1] = a.v[2*i]*b.v[2*i+1] + c.v[2*i+1]; | ||||||
|  |       }       | ||||||
|  |       return out; | ||||||
|  |     } | ||||||
|  |   }; | ||||||
|    |    | ||||||
|   struct MultComplex{ |   struct MultComplex{ | ||||||
|     // Complex |     // Complex | ||||||
| @@ -369,6 +313,11 @@ namespace Optimization { | |||||||
|   } |   } | ||||||
|    |    | ||||||
|   struct Rotate{ |   struct Rotate{ | ||||||
|  |        | ||||||
|  |     template <int n, typename T> static inline vec<T> tRotate(vec<T> in){ | ||||||
|  |       return rotate(in, n); | ||||||
|  |     } | ||||||
|  |      | ||||||
|     template <typename T> |     template <typename T> | ||||||
|     static inline vec<T> rotate(vec<T> in, int n){ |     static inline vec<T> rotate(vec<T> in, int n){ | ||||||
|       vec<T> out; |       vec<T> out; | ||||||
| @@ -442,8 +391,12 @@ namespace Optimization { | |||||||
|  |  | ||||||
|   //Integer Reduce |   //Integer Reduce | ||||||
|   template<> |   template<> | ||||||
|   inline Integer Reduce<Integer, int>::operator()(int in){ |   inline Integer Reduce<Integer, veci>::operator()(veci in){ | ||||||
|     return in; |     Integer a = 0; | ||||||
|  |      | ||||||
|  |     acc(in.v, a, 0, 1, W<Integer>::r); | ||||||
|  |      | ||||||
|  |     return a; | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -452,7 +405,7 @@ namespace Optimization { | |||||||
|  |  | ||||||
|   typedef Optimization::vecf SIMD_Ftype; // Single precision type |   typedef Optimization::vecf SIMD_Ftype; // Single precision type | ||||||
|   typedef Optimization::vecd SIMD_Dtype; // Double precision type |   typedef Optimization::vecd SIMD_Dtype; // Double precision type | ||||||
|   typedef int SIMD_Itype; // Integer type |   typedef Optimization::veci SIMD_Itype; // Integer type | ||||||
|  |  | ||||||
|   // prefetch utilities |   // prefetch utilities | ||||||
|   inline void v_prefetch0(int size, const char *ptr){}; |   inline void v_prefetch0(int size, const char *ptr){}; | ||||||
| @@ -472,6 +425,7 @@ namespace Optimization { | |||||||
|   typedef Optimization::Mult        MultSIMD; |   typedef Optimization::Mult        MultSIMD; | ||||||
|   typedef Optimization::MultComplex MultComplexSIMD; |   typedef Optimization::MultComplex MultComplexSIMD; | ||||||
|   typedef Optimization::MultRealPart MultRealPartSIMD; |   typedef Optimization::MultRealPart MultRealPartSIMD; | ||||||
|  |   typedef Optimization::MaddRealPart MaddRealPartSIMD; | ||||||
|   typedef Optimization::Conj        ConjSIMD; |   typedef Optimization::Conj        ConjSIMD; | ||||||
|   typedef Optimization::TimesMinusI TimesMinusISIMD; |   typedef Optimization::TimesMinusI TimesMinusISIMD; | ||||||
|   typedef Optimization::TimesI      TimesISIMD; |   typedef Optimization::TimesI      TimesISIMD; | ||||||
|   | |||||||
							
								
								
									
										80
									
								
								lib/simd/Grid_generic_types.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								lib/simd/Grid_generic_types.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/simd/Grid_generic_types.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |         Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | static_assert(GEN_SIMD_WIDTH % 16u == 0, "SIMD vector size is not an integer multiple of 16 bytes"); | ||||||
|  |  | ||||||
|  | //#define VECTOR_LOOPS | ||||||
|  |  | ||||||
|  | // playing with compiler pragmas | ||||||
|  | #ifdef VECTOR_LOOPS | ||||||
|  | #ifdef __clang__ | ||||||
|  | #define VECTOR_FOR(i, w, inc)\ | ||||||
|  | _Pragma("clang loop unroll(full) vectorize(enable) interleave(enable) vectorize_width(w)")\ | ||||||
|  | for (unsigned int i = 0; i < w; i += inc) | ||||||
|  | #elif defined __INTEL_COMPILER | ||||||
|  | #define VECTOR_FOR(i, w, inc)\ | ||||||
|  | _Pragma("simd vectorlength(w*8)")\ | ||||||
|  | for (unsigned int i = 0; i < w; i += inc) | ||||||
|  | #else | ||||||
|  | #define VECTOR_FOR(i, w, inc)\ | ||||||
|  | for (unsigned int i = 0; i < w; i += inc) | ||||||
|  | #endif | ||||||
|  | #else | ||||||
|  | #define VECTOR_FOR(i, w, inc)\ | ||||||
|  | for (unsigned int i = 0; i < w; i += inc) | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  | namespace Optimization { | ||||||
|  |  | ||||||
|  |   // type traits giving the number of elements for each vector type | ||||||
|  |   template <typename T> struct W; | ||||||
|  |   template <> struct W<double> { | ||||||
|  |     constexpr static unsigned int c = GEN_SIMD_WIDTH/16u; | ||||||
|  |     constexpr static unsigned int r = GEN_SIMD_WIDTH/8u; | ||||||
|  |   }; | ||||||
|  |   template <> struct W<float> { | ||||||
|  |     constexpr static unsigned int c = GEN_SIMD_WIDTH/8u; | ||||||
|  |     constexpr static unsigned int r = GEN_SIMD_WIDTH/4u; | ||||||
|  |   }; | ||||||
|  |   template <> struct W<Integer> { | ||||||
|  |     constexpr static unsigned int r = GEN_SIMD_WIDTH/4u; | ||||||
|  |   }; | ||||||
|  |    | ||||||
|  |   // SIMD vector types | ||||||
|  |   template <typename T> | ||||||
|  |   struct vec { | ||||||
|  |     alignas(GEN_SIMD_WIDTH) T v[W<T>::r]; | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   typedef vec<float>   vecf; | ||||||
|  |   typedef vec<double>  vecd; | ||||||
|  |   typedef vec<Integer> veci; | ||||||
|  |    | ||||||
|  | }} | ||||||
| @@ -5,8 +5,10 @@ | |||||||
|  Source file: ./lib/simd/Grid_qpx.h |  Source file: ./lib/simd/Grid_qpx.h | ||||||
|   |   | ||||||
|  Copyright (C) 2016 |  Copyright (C) 2016 | ||||||
|  |  Copyright (C) 2017 | ||||||
|   |   | ||||||
|  Author: Antonin Portelli <antonin.portelli@me.com> |  Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |          Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|   |   | ||||||
|  This program is free software; you can redistribute it and/or modify |  This program is free software; you can redistribute it and/or modify | ||||||
|  it under the terms of the GNU General Public License as published by |  it under the terms of the GNU General Public License as published by | ||||||
| @@ -25,6 +27,11 @@ | |||||||
|  See the full license in the file "LICENSE" in the top level distribution directory |  See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
|  |  | ||||||
|  | #ifndef GEN_SIMD_WIDTH | ||||||
|  | #define GEN_SIMD_WIDTH 32u | ||||||
|  | #endif | ||||||
|  | #include "Grid_generic_types.h" // Definitions for simulated integer SIMD. | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
| namespace Optimization { | namespace Optimization { | ||||||
|   typedef struct  |   typedef struct  | ||||||
| @@ -62,8 +69,15 @@ namespace Optimization { | |||||||
|       return (vector4double){a, a, a, a}; |       return (vector4double){a, a, a, a}; | ||||||
|     } |     } | ||||||
|     //Integer |     //Integer | ||||||
|     inline int operator()(Integer a){ |     inline veci operator()(Integer a){ | ||||||
|       return a; |       veci out; | ||||||
|  |        | ||||||
|  |       VECTOR_FOR(i, W<Integer>::r, 1) | ||||||
|  |       { | ||||||
|  |         out.v[i] = a; | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |       return out; | ||||||
|     } |     } | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
| @@ -88,9 +102,10 @@ namespace Optimization { | |||||||
|     inline void operator()(vector4double a, double *d){ |     inline void operator()(vector4double a, double *d){ | ||||||
|       vec_st(a, 0, d); |       vec_st(a, 0, d); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     //Integer |     //Integer | ||||||
|     inline void operator()(int a, Integer *i){ |     inline void operator()(veci a, Integer *i){ | ||||||
|       i[0] = a; |       *((veci *)i) = a; | ||||||
|     } |     } | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
| @@ -142,11 +157,13 @@ namespace Optimization { | |||||||
|       return vec_ld(0, a); |       return vec_ld(0, a); | ||||||
|     } |     } | ||||||
|     // Integer |     // Integer | ||||||
|     inline int operator()(Integer *a){ |     inline veci operator()(Integer *a){ | ||||||
|       return a[0]; |       veci out; | ||||||
|  |        | ||||||
|  |       out = *((veci *)a); | ||||||
|  |        | ||||||
|  |       return out; | ||||||
|     }     |     }     | ||||||
|      |  | ||||||
|      |  | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|   template <typename Out_type, typename In_type> |   template <typename Out_type, typename In_type> | ||||||
| @@ -200,8 +217,15 @@ namespace Optimization { | |||||||
|     FLOAT_WRAP_2(operator(), inline) |     FLOAT_WRAP_2(operator(), inline) | ||||||
|  |  | ||||||
|     //Integer |     //Integer | ||||||
|     inline int operator()(int a, int b){ |     inline veci operator()(veci a, veci b){ | ||||||
|       return a + b; |       veci out; | ||||||
|  |        | ||||||
|  |       VECTOR_FOR(i, W<Integer>::r, 1) | ||||||
|  |       { | ||||||
|  |         out.v[i] = a.v[i] + b.v[i]; | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |       return out; | ||||||
|     } |     } | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
| @@ -215,8 +239,15 @@ namespace Optimization { | |||||||
|     FLOAT_WRAP_2(operator(), inline) |     FLOAT_WRAP_2(operator(), inline) | ||||||
|  |  | ||||||
|     //Integer |     //Integer | ||||||
|     inline int operator()(int a, int b){ |     inline veci operator()(veci a, veci b){ | ||||||
|       return a - b; |       veci out; | ||||||
|  |        | ||||||
|  |       VECTOR_FOR(i, W<Integer>::r, 1) | ||||||
|  |       { | ||||||
|  |         out.v[i] = a.v[i] - b.v[i]; | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |       return out; | ||||||
|     } |     } | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
| @@ -248,8 +279,15 @@ namespace Optimization { | |||||||
|     FLOAT_WRAP_2(operator(), inline) |     FLOAT_WRAP_2(operator(), inline) | ||||||
|  |  | ||||||
|     // Integer |     // Integer | ||||||
|     inline int operator()(int a, int b){ |     inline veci operator()(veci a, veci b){ | ||||||
|       return a*b; |       veci out; | ||||||
|  |        | ||||||
|  |       VECTOR_FOR(i, W<Integer>::r, 1) | ||||||
|  |       { | ||||||
|  |         out.v[i] = a.v[i]*b.v[i]; | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |       return out; | ||||||
|     } |     } | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
| @@ -263,8 +301,15 @@ namespace Optimization { | |||||||
|     FLOAT_WRAP_2(operator(), inline) |     FLOAT_WRAP_2(operator(), inline) | ||||||
|  |  | ||||||
|     // Integer |     // Integer | ||||||
|     inline int operator()(int a, int b){ |     inline veci operator()(veci a, veci b){ | ||||||
|       return a/b; |       veci out; | ||||||
|  |        | ||||||
|  |       VECTOR_FOR(i, W<Integer>::r, 1) | ||||||
|  |       { | ||||||
|  |         out.v[i] = a.v[i]/b.v[i]; | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |       return out; | ||||||
|     } |     } | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
| @@ -418,7 +463,7 @@ namespace Optimization { | |||||||
| // Here assign types | // Here assign types | ||||||
| typedef Optimization::vector4float SIMD_Ftype;  // Single precision type | typedef Optimization::vector4float SIMD_Ftype;  // Single precision type | ||||||
| typedef vector4double              SIMD_Dtype; // Double precision type | typedef vector4double              SIMD_Dtype; // Double precision type | ||||||
| typedef int                        SIMD_Itype; // Integer type | typedef Optimization::veci         SIMD_Itype; // Integer type | ||||||
|  |  | ||||||
| // prefetch utilities | // prefetch utilities | ||||||
| inline void v_prefetch0(int size, const char *ptr){}; | inline void v_prefetch0(int size, const char *ptr){}; | ||||||
|   | |||||||
| @@ -747,6 +747,15 @@ typedef Grid_simd<std::complex<float>, SIMD_Ftype> vComplexF; | |||||||
| typedef Grid_simd<std::complex<double>, SIMD_Dtype> vComplexD; | typedef Grid_simd<std::complex<double>, SIMD_Dtype> vComplexD; | ||||||
| typedef Grid_simd<Integer, SIMD_Itype> vInteger; | typedef Grid_simd<Integer, SIMD_Itype> vInteger; | ||||||
|  |  | ||||||
|  | // Check our vector types are of an appropriate size. | ||||||
|  | #if defined QPX | ||||||
|  | static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect"); | ||||||
|  | static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths incorrect"); | ||||||
|  | #else | ||||||
|  | static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect"); | ||||||
|  | static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths incorrect"); | ||||||
|  | #endif | ||||||
|  |  | ||||||
| ///////////////////////////////////////// | ///////////////////////////////////////// | ||||||
| // Some traits to recognise the types | // Some traits to recognise the types | ||||||
| ///////////////////////////////////////// | ///////////////////////////////////////// | ||||||
|   | |||||||
| @@ -180,6 +180,65 @@ void Tester(const functor &func) | |||||||
|   assert(ok==0); |   assert(ok==0); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template<class functor> | ||||||
|  | void IntTester(const functor &func) | ||||||
|  | { | ||||||
|  |   typedef Integer  scal; | ||||||
|  |   typedef vInteger vec; | ||||||
|  |   GridSerialRNG          sRNG; | ||||||
|  |   sRNG.SeedRandomDevice(); | ||||||
|  |  | ||||||
|  |   int Nsimd = vec::Nsimd(); | ||||||
|  |  | ||||||
|  |   std::vector<scal> input1(Nsimd); | ||||||
|  |   std::vector<scal> input2(Nsimd); | ||||||
|  |   std::vector<scal> result(Nsimd); | ||||||
|  |   std::vector<scal> reference(Nsimd); | ||||||
|  |  | ||||||
|  |   std::vector<vec,alignedAllocator<vec> > buf(3); | ||||||
|  |   vec & v_input1 = buf[0]; | ||||||
|  |   vec & v_input2 = buf[1]; | ||||||
|  |   vec & v_result = buf[2]; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   for(int i=0;i<Nsimd;i++){ | ||||||
|  |     input1[i] = (i + 1) * 30; | ||||||
|  |     input2[i] = (i + 1) * 20; | ||||||
|  |     result[i] = (i + 1) * 10; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   merge<vec,scal>(v_input1,input1); | ||||||
|  |   merge<vec,scal>(v_input2,input2); | ||||||
|  |   merge<vec,scal>(v_result,result); | ||||||
|  |  | ||||||
|  |   func(v_result,v_input1,v_input2); | ||||||
|  |  | ||||||
|  |   for(int i=0;i<Nsimd;i++) { | ||||||
|  |     func(reference[i],input1[i],input2[i]); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   extract<vec,scal>(v_result,result); | ||||||
|  |  | ||||||
|  |   std::cout << GridLogMessage << " " << func.name() << std::endl; | ||||||
|  |  | ||||||
|  |   std::cout << GridLogDebug << v_input1 << std::endl; | ||||||
|  |   std::cout << GridLogDebug << v_input2 << std::endl; | ||||||
|  |   std::cout << GridLogDebug << v_result << std::endl; | ||||||
|  |  | ||||||
|  |   int ok=0; | ||||||
|  |   for(int i=0;i<Nsimd;i++){ | ||||||
|  |     if ( reference[i]-result[i] != 0){ | ||||||
|  |       std::cout<<GridLogMessage<< "*****" << std::endl; | ||||||
|  |       std::cout<<GridLogMessage<< "["<<i<<"] "<< reference[i]-result[i] << " " <<reference[i]<< " " << result[i]<<std::endl; | ||||||
|  |       ok++; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   if ( ok==0 ) { | ||||||
|  |     std::cout<<GridLogMessage << " OK!" <<std::endl; | ||||||
|  |   } | ||||||
|  |   assert(ok==0); | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class reduced,class scal, class vec,class functor >  | template<class reduced,class scal, class vec,class functor >  | ||||||
| void ReductionTester(const functor &func) | void ReductionTester(const functor &func) | ||||||
| @@ -473,5 +532,12 @@ int main (int argc, char ** argv) | |||||||
|     PermTester<ComplexD,vComplexD>(funcRotate(r)); |     PermTester<ComplexD,vComplexD>(funcRotate(r)); | ||||||
|   } |   } | ||||||
|    |    | ||||||
|  |   std::cout<<GridLogMessage << "==================================="<<  std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "Testing vInteger                   "<<  std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "==================================="<<  std::endl; | ||||||
|  |   IntTester(funcPlus()); | ||||||
|  |   IntTester(funcMinus()); | ||||||
|  |   IntTester(funcTimes()); | ||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user