mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	QPX single precision implementation
This commit is contained in:
		@@ -265,7 +265,7 @@
 | 
			
		||||
	 //	 _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
 | 
			
		||||
       }
 | 
			
		||||
       inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
 | 
			
		||||
	 _mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
 | 
			
		||||
	 //_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
 | 
			
		||||
	 local = _entries[ent]._is_local;
 | 
			
		||||
	 perm  = _entries[ent]._permute;
 | 
			
		||||
	 if (perm)  ptype = _permute_type[point]; 
 | 
			
		||||
 
 | 
			
		||||
@@ -27,20 +27,31 @@
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace Optimization {
 | 
			
		||||
  typedef struct 
 | 
			
		||||
  {
 | 
			
		||||
    float v0,v1,v2,v3;
 | 
			
		||||
  } vector4float;
 | 
			
		||||
 | 
			
		||||
  inline std::ostream & operator<<(std::ostream& stream, const vector4double a)
 | 
			
		||||
  {
 | 
			
		||||
    stream << "{"<<vec_extract(a,0)<<","<<vec_extract(a,1)<<","<<vec_extract(a,2)<<","<<vec_extract(a,3)<<"}";
 | 
			
		||||
    return stream;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  inline std::ostream & operator<<(std::ostream& stream, const vector4float a)
 | 
			
		||||
  {
 | 
			
		||||
    stream << "{"<< a.v0 <<","<< a.v1 <<","<< a.v2 <<","<< a.v3 <<"}";
 | 
			
		||||
    return stream;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  struct Vsplat{
 | 
			
		||||
    //Complex float
 | 
			
		||||
    inline vector4double operator()(float a, float b){
 | 
			
		||||
      return (vector4double){(double)a, (double)b, (double)a, (double)b};
 | 
			
		||||
    inline vector4float operator()(float a, float b){
 | 
			
		||||
      return (vector4float){a, b, a, b};
 | 
			
		||||
    }
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline vector4double operator()(float a){
 | 
			
		||||
      return (vector4double){(double)a, (double)a, (double)a, (double)a};
 | 
			
		||||
    inline vector4float operator()(float a){
 | 
			
		||||
      return (vector4float){a, a, a, a};
 | 
			
		||||
    }
 | 
			
		||||
    //Complex double
 | 
			
		||||
    inline vector4double operator()(double a, double b){
 | 
			
		||||
@@ -61,6 +72,18 @@ namespace Optimization {
 | 
			
		||||
    inline void operator()(vector4double a, float *f){
 | 
			
		||||
      vec_st(a, 0, f);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    inline void operator()(vector4double a, vector4float &f){
 | 
			
		||||
      vec_st(a, 0, (float *)(&f));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    inline void operator()(vector4float a, float *f){
 | 
			
		||||
      f[0] = a.v0;
 | 
			
		||||
      f[1] = a.v1;
 | 
			
		||||
      f[2] = a.v2;
 | 
			
		||||
      f[3] = a.v3;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //Double
 | 
			
		||||
    inline void operator()(vector4double a, double *d){
 | 
			
		||||
      vec_st(a, 0, d);
 | 
			
		||||
@@ -76,6 +99,18 @@ namespace Optimization {
 | 
			
		||||
    inline void operator()(float *f, vector4double a){
 | 
			
		||||
      vec_st(a, 0, f);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    inline void operator()(vector4float f, vector4double a){
 | 
			
		||||
      vec_st(a, 0, (float *)(&f));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    inline void operator()(float *f, vector4float a){
 | 
			
		||||
      f[0] = a.v0;
 | 
			
		||||
      f[1] = a.v1;
 | 
			
		||||
      f[2] = a.v2;
 | 
			
		||||
      f[3] = a.v3;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //Double
 | 
			
		||||
    inline void operator()(double *d, vector4double a){
 | 
			
		||||
      vec_st(a, 0, d);
 | 
			
		||||
@@ -85,17 +120,23 @@ namespace Optimization {
 | 
			
		||||
  
 | 
			
		||||
  struct Vset{
 | 
			
		||||
    // Complex float
 | 
			
		||||
    inline vector4double operator()(Grid::ComplexF *a){
 | 
			
		||||
      return vec_ld(0, (float *)a);
 | 
			
		||||
    inline vector4float operator()(Grid::ComplexF *a){
 | 
			
		||||
      return (vector4float){a[0].real(), a[0].imag(), a[1].real(), a[1].imag()};
 | 
			
		||||
    }
 | 
			
		||||
    // Complex double
 | 
			
		||||
    inline vector4double operator()(Grid::ComplexD *a){
 | 
			
		||||
      return vec_ld(0, (double *)a);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline vector4double operator()(float *a){
 | 
			
		||||
      return vec_ld(0, a);
 | 
			
		||||
    inline vector4float operator()(float *a){
 | 
			
		||||
      return (vector4float){a[0], a[1], a[2], a[3]};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    inline vector4double operator()(vector4float a){
 | 
			
		||||
      return vec_ld(0, (float *)(&a));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Real double
 | 
			
		||||
    inline vector4double operator()(double *a){
 | 
			
		||||
      return vec_ld(0, a);
 | 
			
		||||
@@ -122,11 +163,42 @@ namespace Optimization {
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  // Arithmetic operations
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  #define FLOAT_WRAP_2(fn, pref)\
 | 
			
		||||
  pref vector4float fn(vector4float a, vector4float b)\
 | 
			
		||||
  {\
 | 
			
		||||
    vector4double ad, bd, rd;\
 | 
			
		||||
    vector4float  r;\
 | 
			
		||||
    \
 | 
			
		||||
    ad = Vset()(a);\
 | 
			
		||||
    bd = Vset()(b);\
 | 
			
		||||
    rd = fn(ad, bd);\
 | 
			
		||||
    Vstore()(rd, r);\
 | 
			
		||||
    \
 | 
			
		||||
    return r;\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #define FLOAT_WRAP_1(fn, pref)\
 | 
			
		||||
  pref vector4float fn(vector4float a)\
 | 
			
		||||
  {\
 | 
			
		||||
    vector4double ad, rd;\
 | 
			
		||||
    vector4float  r;\
 | 
			
		||||
    \
 | 
			
		||||
    ad = Vset()(a);\
 | 
			
		||||
    rd = fn(ad);\
 | 
			
		||||
    Vstore()(rd, r);\
 | 
			
		||||
    \
 | 
			
		||||
    return r;\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  struct Sum{
 | 
			
		||||
    //Complex/Real double
 | 
			
		||||
    inline vector4double operator()(vector4double a, vector4double b){
 | 
			
		||||
      return vec_add(a, b);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //Complex/Real float
 | 
			
		||||
    FLOAT_WRAP_2(operator(), inline)
 | 
			
		||||
 | 
			
		||||
    //Integer
 | 
			
		||||
    inline int operator()(int a, int b){
 | 
			
		||||
      return a + b;
 | 
			
		||||
@@ -138,6 +210,10 @@ namespace Optimization {
 | 
			
		||||
    inline vector4double operator()(vector4double a, vector4double b){
 | 
			
		||||
      return vec_sub(a, b);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //Complex/Real float
 | 
			
		||||
    FLOAT_WRAP_2(operator(), inline)
 | 
			
		||||
 | 
			
		||||
    //Integer
 | 
			
		||||
    inline int operator()(int a, int b){
 | 
			
		||||
      return a - b;
 | 
			
		||||
@@ -149,6 +225,9 @@ namespace Optimization {
 | 
			
		||||
    inline vector4double operator()(vector4double a, vector4double b){
 | 
			
		||||
      return vec_xxnpmadd(a, b, vec_xmul(b, a));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Complex float
 | 
			
		||||
    FLOAT_WRAP_2(operator(), inline)
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  struct Mult{
 | 
			
		||||
@@ -156,6 +235,10 @@ namespace Optimization {
 | 
			
		||||
    inline vector4double operator()(vector4double a, vector4double b){
 | 
			
		||||
      return vec_mul(a, b);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Real float
 | 
			
		||||
    FLOAT_WRAP_2(operator(), inline)
 | 
			
		||||
 | 
			
		||||
    // Integer
 | 
			
		||||
    inline int operator()(int a, int b){
 | 
			
		||||
      return a*b;
 | 
			
		||||
@@ -167,6 +250,9 @@ namespace Optimization {
 | 
			
		||||
    inline vector4double operator()(vector4double v){
 | 
			
		||||
      return vec_mul(v, (vector4double){1., -1., 1., -1.});
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Complex float
 | 
			
		||||
    FLOAT_WRAP_1(operator(), inline)
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  struct TimesMinusI{
 | 
			
		||||
@@ -175,6 +261,9 @@ namespace Optimization {
 | 
			
		||||
      return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.},
 | 
			
		||||
                               (vector4double){0., 0., 0., 0.});
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Complex float
 | 
			
		||||
    FLOAT_WRAP_2(operator(), inline)
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  struct TimesI{
 | 
			
		||||
@@ -183,9 +272,13 @@ namespace Optimization {
 | 
			
		||||
      return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.},
 | 
			
		||||
                              (vector4double){0., 0., 0., 0.});
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Complex float
 | 
			
		||||
    FLOAT_WRAP_2(operator(), inline)
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Permute{
 | 
			
		||||
    //Complex double
 | 
			
		||||
    static inline vector4double Permute0(vector4double v){ //0123 -> 2301
 | 
			
		||||
      return vec_perm(v, v, vec_gpci(02301));
 | 
			
		||||
    };
 | 
			
		||||
@@ -198,6 +291,12 @@ namespace Optimization {
 | 
			
		||||
    static inline vector4double Permute3(vector4double v){
 | 
			
		||||
      return v;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // Complex float
 | 
			
		||||
    FLOAT_WRAP_1(Permute0, static inline)
 | 
			
		||||
    FLOAT_WRAP_1(Permute1, static inline)
 | 
			
		||||
    FLOAT_WRAP_1(Permute2, static inline)
 | 
			
		||||
    FLOAT_WRAP_1(Permute3, static inline)
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  struct Rotate{
 | 
			
		||||
@@ -218,31 +317,42 @@ namespace Optimization {
 | 
			
		||||
        default: assert(0);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline vector4float rotate(vector4float v, int n){
 | 
			
		||||
      vector4double vd, rd;
 | 
			
		||||
      vector4float  r;
 | 
			
		||||
 | 
			
		||||
      vd = Vset()(v);
 | 
			
		||||
      rd = rotate(vd, n);
 | 
			
		||||
      Vstore()(rd, r);
 | 
			
		||||
 | 
			
		||||
      return r;
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  //Complex float Reduce
 | 
			
		||||
  template<>
 | 
			
		||||
  inline Grid::ComplexF
 | 
			
		||||
  Reduce<Grid::ComplexF, vector4double>::operator()(vector4double v) { //2 complex
 | 
			
		||||
    vector4double v1,v2;
 | 
			
		||||
  Reduce<Grid::ComplexF, vector4float>::operator()(vector4float v) { //2 complex
 | 
			
		||||
    vector4float v1,v2;
 | 
			
		||||
    
 | 
			
		||||
    v1 = Optimization::Permute::Permute0(v);
 | 
			
		||||
    v1 = vec_add(v1, v);
 | 
			
		||||
    v1 = Optimization::Sum()(v1, v);
 | 
			
		||||
    
 | 
			
		||||
    return Grid::ComplexF((float)vec_extract(v1, 0), (float)vec_extract(v1, 1));
 | 
			
		||||
    return Grid::ComplexF(v1.v0, v1.v1);
 | 
			
		||||
  }
 | 
			
		||||
  //Real float Reduce
 | 
			
		||||
  template<>
 | 
			
		||||
  inline Grid::RealF
 | 
			
		||||
  Reduce<Grid::RealF, vector4double>::operator()(vector4double v){ //4 floats
 | 
			
		||||
    vector4double v1,v2;
 | 
			
		||||
  Reduce<Grid::RealF, vector4float>::operator()(vector4float v){ //4 floats
 | 
			
		||||
    vector4float v1,v2;
 | 
			
		||||
    
 | 
			
		||||
    v1 = Optimization::Permute::Permute0(v);
 | 
			
		||||
    v1 = vec_add(v1, v);
 | 
			
		||||
    v1 = Optimization::Sum()(v1, v);
 | 
			
		||||
    v2 = Optimization::Permute::Permute1(v1);
 | 
			
		||||
    v1 = vec_add(v1, v2);
 | 
			
		||||
    v1 = Optimization::Sum()(v1, v2);
 | 
			
		||||
    
 | 
			
		||||
    return (float)vec_extract(v1, 0);
 | 
			
		||||
    return v1.v0;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
@@ -283,10 +393,9 @@ namespace Optimization {
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Here assign types
 | 
			
		||||
 | 
			
		||||
typedef vector4double SIMD_Ftype;  // Single precision type
 | 
			
		||||
typedef vector4double SIMD_Dtype; // Double precision type
 | 
			
		||||
typedef int SIMD_Itype; // Integer type
 | 
			
		||||
typedef Optimization::vector4float SIMD_Ftype;  // Single precision type
 | 
			
		||||
typedef vector4double              SIMD_Dtype; // Double precision type
 | 
			
		||||
typedef int                        SIMD_Itype; // Integer type
 | 
			
		||||
 | 
			
		||||
// prefetch utilities
 | 
			
		||||
inline void v_prefetch0(int size, const char *ptr){};
 | 
			
		||||
 
 | 
			
		||||
@@ -157,10 +157,9 @@ void Tester(const functor &func)
 | 
			
		||||
  std::cout << GridLogMessage << " " << func.name() << std::endl;
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogDebug << v_input1 << std::endl;
 | 
			
		||||
  std::cout << GridLogDebug << v_input2 << std::endl;
 | 
			
		||||
  std::cout << GridLogDebug << v_result << std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  int ok=0;
 | 
			
		||||
  for(int i=0;i<Nsimd;i++){
 | 
			
		||||
    if ( abs(reference[i]-result[i])>1.0e-7){
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user