1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

Merge pull request #93 from Lanny91/hotfix/qpx

Some fixes for QPX and generic SIMD types.
This commit is contained in:
Peter Boyle 2017-03-13 09:31:26 +00:00 committed by GitHub
commit dfefc70b57
5 changed files with 250 additions and 96 deletions

View File

@ -5,8 +5,10 @@
Source file: ./lib/simd/Grid_generic.h Source file: ./lib/simd/Grid_generic.h
Copyright (C) 2015 Copyright (C) 2015
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -26,52 +28,11 @@ Author: Antonin Portelli <antonin.portelli@me.com>
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
static_assert(GEN_SIMD_WIDTH % 16u == 0, "SIMD vector size is not an integer multiple of 16 bytes"); #include "Grid_generic_types.h"
//#define VECTOR_LOOPS
// playing with compiler pragmas
#ifdef VECTOR_LOOPS
#ifdef __clang__
#define VECTOR_FOR(i, w, inc)\
_Pragma("clang loop unroll(full) vectorize(enable) interleave(enable) vectorize_width(w)")\
for (unsigned int i = 0; i < w; i += inc)
#elif defined __INTEL_COMPILER
#define VECTOR_FOR(i, w, inc)\
_Pragma("simd vectorlength(w*8)")\
for (unsigned int i = 0; i < w; i += inc)
#else
#define VECTOR_FOR(i, w, inc)\
for (unsigned int i = 0; i < w; i += inc)
#endif
#else
#define VECTOR_FOR(i, w, inc)\
for (unsigned int i = 0; i < w; i += inc)
#endif
namespace Grid { namespace Grid {
namespace Optimization { namespace Optimization {
// type traits giving the number of elements for each vector type
template <typename T> struct W;
template <> struct W<double> {
constexpr static unsigned int c = GEN_SIMD_WIDTH/16u;
constexpr static unsigned int r = GEN_SIMD_WIDTH/8u;
};
template <> struct W<float> {
constexpr static unsigned int c = GEN_SIMD_WIDTH/8u;
constexpr static unsigned int r = GEN_SIMD_WIDTH/4u;
};
// SIMD vector types
template <typename T>
struct vec {
alignas(GEN_SIMD_WIDTH) T v[W<T>::r];
};
typedef vec<float> vecf;
typedef vec<double> vecd;
struct Vsplat{ struct Vsplat{
// Complex // Complex
template <typename T> template <typename T>
@ -99,11 +60,6 @@ namespace Optimization {
return out; return out;
} }
// Integer
inline int operator()(Integer a){
return a;
}
}; };
struct Vstore{ struct Vstore{
@ -112,11 +68,6 @@ namespace Optimization {
inline void operator()(vec<T> a, T *D){ inline void operator()(vec<T> a, T *D){
*((vec<T> *)D) = a; *((vec<T> *)D) = a;
} }
//Integer
inline void operator()(int a, Integer *I){
*I = a;
}
}; };
struct Vstream{ struct Vstream{
@ -151,11 +102,6 @@ namespace Optimization {
return out; return out;
} }
// Integer
inline int operator()(Integer *a){
return *a;
}
}; };
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
@ -174,11 +120,6 @@ namespace Optimization {
return out; return out;
} }
//I nteger
inline int operator()(int a, int b){
return a + b;
}
}; };
struct Sub{ struct Sub{
@ -194,11 +135,6 @@ namespace Optimization {
return out; return out;
} }
//Integer
inline int operator()(int a, int b){
return a-b;
}
}; };
struct Mult{ struct Mult{
@ -214,11 +150,6 @@ namespace Optimization {
return out; return out;
} }
// Integer
inline int operator()(int a, int b){
return a*b;
}
}; };
#define cmul(a, b, c, i)\ #define cmul(a, b, c, i)\
@ -232,13 +163,26 @@ namespace Optimization {
VECTOR_FOR(i, W<T>::c, 1) VECTOR_FOR(i, W<T>::c, 1)
{ {
out.v[2*i] = a[2*i]*b[2*i]; out.v[2*i] = a.v[2*i]*b.v[2*i];
out.v[2*i+1] = a[2*i]*b[2*i+1]; out.v[2*i+1] = a.v[2*i]*b.v[2*i+1];
} }
return out; return out;
}; }
}; };
struct MaddRealPart{
template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
vec<T> out;
VECTOR_FOR(i, W<T>::c, 1)
{
out.v[2*i] = a.v[2*i]*b.v[2*i] + c.v[2*i];
out.v[2*i+1] = a.v[2*i]*b.v[2*i+1] + c.v[2*i+1];
}
return out;
}
};
struct MultComplex{ struct MultComplex{
// Complex // Complex
@ -369,6 +313,11 @@ namespace Optimization {
} }
struct Rotate{ struct Rotate{
template <int n, typename T> static inline vec<T> tRotate(vec<T> in){
return rotate(in, n);
}
template <typename T> template <typename T>
static inline vec<T> rotate(vec<T> in, int n){ static inline vec<T> rotate(vec<T> in, int n){
vec<T> out; vec<T> out;
@ -442,8 +391,12 @@ namespace Optimization {
//Integer Reduce //Integer Reduce
template<> template<>
inline Integer Reduce<Integer, int>::operator()(int in){ inline Integer Reduce<Integer, veci>::operator()(veci in){
return in; Integer a = 0;
acc(in.v, a, 0, 1, W<Integer>::r);
return a;
} }
} }
@ -452,7 +405,7 @@ namespace Optimization {
typedef Optimization::vecf SIMD_Ftype; // Single precision type typedef Optimization::vecf SIMD_Ftype; // Single precision type
typedef Optimization::vecd SIMD_Dtype; // Double precision type typedef Optimization::vecd SIMD_Dtype; // Double precision type
typedef int SIMD_Itype; // Integer type typedef Optimization::veci SIMD_Itype; // Integer type
// prefetch utilities // prefetch utilities
inline void v_prefetch0(int size, const char *ptr){}; inline void v_prefetch0(int size, const char *ptr){};
@ -472,6 +425,7 @@ namespace Optimization {
typedef Optimization::Mult MultSIMD; typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::MultRealPart MultRealPartSIMD; typedef Optimization::MultRealPart MultRealPartSIMD;
typedef Optimization::MaddRealPart MaddRealPartSIMD;
typedef Optimization::Conj ConjSIMD; typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD; typedef Optimization::TimesI TimesISIMD;

View File

@ -0,0 +1,80 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_generic_types.h
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com>
Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
static_assert(GEN_SIMD_WIDTH % 16u == 0, "SIMD vector size is not an integer multiple of 16 bytes");
//#define VECTOR_LOOPS
// playing with compiler pragmas
#ifdef VECTOR_LOOPS
#ifdef __clang__
#define VECTOR_FOR(i, w, inc)\
_Pragma("clang loop unroll(full) vectorize(enable) interleave(enable) vectorize_width(w)")\
for (unsigned int i = 0; i < w; i += inc)
#elif defined __INTEL_COMPILER
#define VECTOR_FOR(i, w, inc)\
_Pragma("simd vectorlength(w*8)")\
for (unsigned int i = 0; i < w; i += inc)
#else
#define VECTOR_FOR(i, w, inc)\
for (unsigned int i = 0; i < w; i += inc)
#endif
#else
#define VECTOR_FOR(i, w, inc)\
for (unsigned int i = 0; i < w; i += inc)
#endif
namespace Grid {
namespace Optimization {
// type traits giving the number of elements for each vector type
template <typename T> struct W;
template <> struct W<double> {
constexpr static unsigned int c = GEN_SIMD_WIDTH/16u;
constexpr static unsigned int r = GEN_SIMD_WIDTH/8u;
};
template <> struct W<float> {
constexpr static unsigned int c = GEN_SIMD_WIDTH/8u;
constexpr static unsigned int r = GEN_SIMD_WIDTH/4u;
};
template <> struct W<Integer> {
constexpr static unsigned int r = GEN_SIMD_WIDTH/4u;
};
// SIMD vector types
template <typename T>
struct vec {
alignas(GEN_SIMD_WIDTH) T v[W<T>::r];
};
typedef vec<float> vecf;
typedef vec<double> vecd;
typedef vec<Integer> veci;
}}

View File

@ -5,8 +5,10 @@
Source file: ./lib/simd/Grid_qpx.h Source file: ./lib/simd/Grid_qpx.h
Copyright (C) 2016 Copyright (C) 2016
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -25,6 +27,11 @@
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
******************************************************************************/ ******************************************************************************/
#ifndef GEN_SIMD_WIDTH
#define GEN_SIMD_WIDTH 32u
#endif
#include "Grid_generic_types.h" // Definitions for simulated integer SIMD.
namespace Grid { namespace Grid {
namespace Optimization { namespace Optimization {
typedef struct typedef struct
@ -62,8 +69,15 @@ namespace Optimization {
return (vector4double){a, a, a, a}; return (vector4double){a, a, a, a};
} }
//Integer //Integer
inline int operator()(Integer a){ inline veci operator()(Integer a){
return a; veci out;
VECTOR_FOR(i, W<Integer>::r, 1)
{
out.v[i] = a;
}
return out;
} }
}; };
@ -88,9 +102,10 @@ namespace Optimization {
inline void operator()(vector4double a, double *d){ inline void operator()(vector4double a, double *d){
vec_st(a, 0, d); vec_st(a, 0, d);
} }
//Integer //Integer
inline void operator()(int a, Integer *i){ inline void operator()(veci a, Integer *i){
i[0] = a; *((veci *)i) = a;
} }
}; };
@ -142,11 +157,13 @@ namespace Optimization {
return vec_ld(0, a); return vec_ld(0, a);
} }
// Integer // Integer
inline int operator()(Integer *a){ inline veci operator()(Integer *a){
return a[0]; veci out;
out = *((veci *)a);
return out;
} }
}; };
template <typename Out_type, typename In_type> template <typename Out_type, typename In_type>
@ -200,8 +217,15 @@ namespace Optimization {
FLOAT_WRAP_2(operator(), inline) FLOAT_WRAP_2(operator(), inline)
//Integer //Integer
inline int operator()(int a, int b){ inline veci operator()(veci a, veci b){
return a + b; veci out;
VECTOR_FOR(i, W<Integer>::r, 1)
{
out.v[i] = a.v[i] + b.v[i];
}
return out;
} }
}; };
@ -215,8 +239,15 @@ namespace Optimization {
FLOAT_WRAP_2(operator(), inline) FLOAT_WRAP_2(operator(), inline)
//Integer //Integer
inline int operator()(int a, int b){ inline veci operator()(veci a, veci b){
return a - b; veci out;
VECTOR_FOR(i, W<Integer>::r, 1)
{
out.v[i] = a.v[i] - b.v[i];
}
return out;
} }
}; };
@ -248,8 +279,15 @@ namespace Optimization {
FLOAT_WRAP_2(operator(), inline) FLOAT_WRAP_2(operator(), inline)
// Integer // Integer
inline int operator()(int a, int b){ inline veci operator()(veci a, veci b){
return a*b; veci out;
VECTOR_FOR(i, W<Integer>::r, 1)
{
out.v[i] = a.v[i]*b.v[i];
}
return out;
} }
}; };
@ -263,8 +301,15 @@ namespace Optimization {
FLOAT_WRAP_2(operator(), inline) FLOAT_WRAP_2(operator(), inline)
// Integer // Integer
inline int operator()(int a, int b){ inline veci operator()(veci a, veci b){
return a/b; veci out;
VECTOR_FOR(i, W<Integer>::r, 1)
{
out.v[i] = a.v[i]/b.v[i];
}
return out;
} }
}; };
@ -418,7 +463,7 @@ namespace Optimization {
// Here assign types // Here assign types
typedef Optimization::vector4float SIMD_Ftype; // Single precision type typedef Optimization::vector4float SIMD_Ftype; // Single precision type
typedef vector4double SIMD_Dtype; // Double precision type typedef vector4double SIMD_Dtype; // Double precision type
typedef int SIMD_Itype; // Integer type typedef Optimization::veci SIMD_Itype; // Integer type
// prefetch utilities // prefetch utilities
inline void v_prefetch0(int size, const char *ptr){}; inline void v_prefetch0(int size, const char *ptr){};

View File

@ -747,6 +747,15 @@ typedef Grid_simd<std::complex<float>, SIMD_Ftype> vComplexF;
typedef Grid_simd<std::complex<double>, SIMD_Dtype> vComplexD; typedef Grid_simd<std::complex<double>, SIMD_Dtype> vComplexD;
typedef Grid_simd<Integer, SIMD_Itype> vInteger; typedef Grid_simd<Integer, SIMD_Itype> vInteger;
// Check our vector types are of an appropriate size.
#if defined QPX
static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect");
static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths incorrect");
#else
static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect");
static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths incorrect");
#endif
///////////////////////////////////////// /////////////////////////////////////////
// Some traits to recognise the types // Some traits to recognise the types
///////////////////////////////////////// /////////////////////////////////////////

View File

@ -180,6 +180,65 @@ void Tester(const functor &func)
assert(ok==0); assert(ok==0);
} }
template<class functor>
void IntTester(const functor &func)
{
typedef Integer scal;
typedef vInteger vec;
GridSerialRNG sRNG;
sRNG.SeedRandomDevice();
int Nsimd = vec::Nsimd();
std::vector<scal> input1(Nsimd);
std::vector<scal> input2(Nsimd);
std::vector<scal> result(Nsimd);
std::vector<scal> reference(Nsimd);
std::vector<vec,alignedAllocator<vec> > buf(3);
vec & v_input1 = buf[0];
vec & v_input2 = buf[1];
vec & v_result = buf[2];
for(int i=0;i<Nsimd;i++){
input1[i] = (i + 1) * 30;
input2[i] = (i + 1) * 20;
result[i] = (i + 1) * 10;
}
merge<vec,scal>(v_input1,input1);
merge<vec,scal>(v_input2,input2);
merge<vec,scal>(v_result,result);
func(v_result,v_input1,v_input2);
for(int i=0;i<Nsimd;i++) {
func(reference[i],input1[i],input2[i]);
}
extract<vec,scal>(v_result,result);
std::cout << GridLogMessage << " " << func.name() << std::endl;
std::cout << GridLogDebug << v_input1 << std::endl;
std::cout << GridLogDebug << v_input2 << std::endl;
std::cout << GridLogDebug << v_result << std::endl;
int ok=0;
for(int i=0;i<Nsimd;i++){
if ( reference[i]-result[i] != 0){
std::cout<<GridLogMessage<< "*****" << std::endl;
std::cout<<GridLogMessage<< "["<<i<<"] "<< reference[i]-result[i] << " " <<reference[i]<< " " << result[i]<<std::endl;
ok++;
}
}
if ( ok==0 ) {
std::cout<<GridLogMessage << " OK!" <<std::endl;
}
assert(ok==0);
}
template<class reduced,class scal, class vec,class functor > template<class reduced,class scal, class vec,class functor >
void ReductionTester(const functor &func) void ReductionTester(const functor &func)
@ -473,5 +532,12 @@ int main (int argc, char ** argv)
PermTester<ComplexD,vComplexD>(funcRotate(r)); PermTester<ComplexD,vComplexD>(funcRotate(r));
} }
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout<<GridLogMessage << "Testing vInteger "<< std::endl;
std::cout<<GridLogMessage << "==================================="<< std::endl;
IntTester(funcPlus());
IntTester(funcMinus());
IntTester(funcTimes());
Grid_finalize(); Grid_finalize();
} }