NAMESPACE

2026-05-29 13:34:17 +01:00 · 2018-01-12 18:24:16 +00:00
parent 6ab744c720
commit ec89714cce
1 changed files with 528 additions and 532 deletions
@@ -25,8 +25,8 @@
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
+*************************************************************************************/
-    /*  END LEGAL */
+/*  END LEGAL */
 /*
@@ -45,29 +45,29 @@
 #include "Grid_generic_types.h"
 #include <arm_neon.h>
-namespace Grid {
+NAMESPACE_BEGIN(Grid);
-namespace Optimization {
+NAMESPACE_BEGIN(Optimization);
-  template<class vtype>
+template<class vtype>
-  union uconv {
+union uconv {
  float32x4_t f;
  vtype v;
-  };
+};
-  union u128f {
+union u128f {
  float32x4_t v;
  float f[4];
-  };
+};
-  union u128d {
+union u128d {
  float64x2_t v;
  double f[2];
-  };
+};
-  // half precision
+// half precision
-  union u128h {
+union u128h {
  float16x8_t v;
  uint16_t f[8];
-  };
+};
-  struct Vsplat{
+struct Vsplat{
  //Complex float
  inline float32x4_t operator()(float a, float b){
    float tmp[4]={a,b,a,b};
@@ -90,9 +90,9 @@ namespace Optimization {
  inline uint32x4_t operator()(Integer a){
    return vdupq_n_u32(a);
  }
-  };
+};
-  struct Vstore{
+struct Vstore{
  //Float
  inline void operator()(float32x4_t a, float* F){
    vst1q_f32(F, a);
@@ -106,9 +106,9 @@ namespace Optimization {
    vst1q_u32(I, a);
  }
-  };
+};
-  struct Vstream{ // N:equivalents to _mm_stream_p* in NEON?
+struct Vstream{ // N:equivalents to _mm_stream_p* in NEON?
  //Float // N:generic
  inline void operator()(float * a, float32x4_t b){
    memcpy(a,&b,4*sizeof(float));
@@ -117,13 +117,11 @@ namespace Optimization {
  inline void operator()(double * a, float64x2_t b){
    memcpy(a,&b,2*sizeof(double));
  }
 };
-
+// Nils: Vset untested; not used currently in Grid at all;
-  };
+// git commit 4a8c4ccfba1d05159348d21a9698028ea847e77b
-
+struct Vset{
  // Nils: Vset untested; not used currently in Grid at all;
  // git commit 4a8c4ccfba1d05159348d21a9698028ea847e77b
  struct Vset{
  // Complex float
  inline float32x4_t operator()(Grid::ComplexF *a){
    float tmp[4]={a[1].imag(),a[1].real(),a[0].imag(),a[0].real()};
@@ -148,10 +146,10 @@ namespace Optimization {
  inline uint32x4_t operator()(Integer *a){
    return vld1q_dup_u32(a);
  }
-  };
+};
-  template <typename Out_type, typename In_type>
+template <typename Out_type, typename In_type>
-  struct Reduce{
+struct Reduce{
  //Need templated class to overload output type
  //General form must generate error if compiled
  inline Out_type operator()(In_type in){
@@ -159,12 +157,12 @@ namespace Optimization {
    exit(1);
    return 0;
  }
-  };
+};
-  /////////////////////////////////////////////////////
+/////////////////////////////////////////////////////
-  // Arithmetic operations
+// Arithmetic operations
-  /////////////////////////////////////////////////////
+/////////////////////////////////////////////////////
-  struct Sum{
+struct Sum{
  //Complex/Real float
  inline float32x4_t operator()(float32x4_t a, float32x4_t b){
    return vaddq_f32(a,b);
@@ -177,9 +175,9 @@ namespace Optimization {
  inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
    return vaddq_u32(a,b);
  }
-  };
+};
-  struct Sub{
+struct Sub{
  //Complex/Real float
  inline float32x4_t operator()(float32x4_t a, float32x4_t b){
    return vsubq_f32(a,b);
@@ -192,9 +190,9 @@ namespace Optimization {
  inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
    return vsubq_u32(a,b);
  }
-  };
+};
-  struct MultRealPart{
+struct MultRealPart{
  inline float32x4_t operator()(float32x4_t a, float32x4_t b){
    float32x4_t re = vtrn1q_f32(a, a);
    return vmulq_f32(re, b);
@@ -203,9 +201,9 @@ namespace Optimization {
    float64x2_t re = vzip1q_f64(a, a);
    return vmulq_f64(re, b);
  }
-  };
+};
-  struct MaddRealPart{
+struct MaddRealPart{
  inline float32x4_t operator()(float32x4_t a, float32x4_t b, float32x4_t c){
    float32x4_t re = vtrn1q_f32(a, a);
    return vfmaq_f32(c, re, b);
@@ -214,9 +212,9 @@ namespace Optimization {
    float64x2_t re = vzip1q_f64(a, a);
    return vfmaq_f64(c, re, b);
  }
-  };
+};
-  struct Div{
+struct Div{
  // Real float
  inline float32x4_t operator()(float32x4_t a, float32x4_t b){
    return vdivq_f32(a, b);
@@ -225,9 +223,9 @@ namespace Optimization {
  inline float64x2_t operator()(float64x2_t a, float64x2_t b){
    return vdivq_f64(a, b);
  }
-  };
+};
-  struct MultComplex{
+struct MultComplex{
  // Complex float
  inline float32x4_t operator()(float32x4_t a, float32x4_t b){
@@ -275,9 +273,9 @@ namespace Optimization {
    // r5 = vmulq_f64(r0, a);
    // return vaddq_f64(r4, r5);
  }
-  };
+};
-  struct Mult{
+struct Mult{
  // Real float
  inline float32x4_t mac(float32x4_t a, float32x4_t b, float32x4_t c){
    //return vaddq_f32(vmulq_f32(b,c),a);
@@ -298,9 +296,9 @@ namespace Optimization {
  inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
    return vmulq_u32(a,b);
  }
-  };
+};
-  struct Conj{
+struct Conj{
  // Complex single
  inline float32x4_t operator()(float32x4_t in){
    // ar ai br bi -> ar -ai br -bi
@@ -318,9 +316,9 @@ namespace Optimization {
    return vextq_f64(r0, r1, 1);  //  ar -ai
  }
  // do not define for integer input
-  };
+};
-  struct TimesMinusI{
+struct TimesMinusI{
  //Complex single
  inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
    // ar ai br bi -> ai -ar ai -br
@@ -336,9 +334,9 @@ namespace Optimization {
    tmp = vnegq_f64(in);
    return vextq_f64(in, tmp, 1);
  }
-  };
+};
-  struct TimesI{
+struct TimesI{
  //Complex single
  inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
    // ar ai br bi -> -ai ar -bi br
@@ -354,9 +352,9 @@ namespace Optimization {
    tmp = vnegq_f64(in);
    return vextq_f64(tmp, in, 1);
  }
-  };
+};
-  struct Permute{
+struct Permute{
  static inline float32x4_t Permute0(float32x4_t in){ // N:ok
    // AB CD -> CD AB
@@ -387,9 +385,9 @@ namespace Optimization {
    return in;
  };
-  };
+};
-  struct Rotate{
+struct Rotate{
  static inline float32x4_t rotate(float32x4_t in,int n){ // N:ok
    switch(n){
@@ -423,9 +421,9 @@ namespace Optimization {
  template<int n> static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n%4); };
  template<int n> static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n%2); };
-  };
+};
-  struct PrecisionChange {
+struct PrecisionChange {
  static inline float16x8_t StoH (const float32x4_t &a,const float32x4_t &b) {
    float16x4_t h = vcvt_f16_f32(a);
@@ -464,12 +462,12 @@ namespace Optimization {
    StoD(s1, a, b);
    StoD(s2, c, d);
  }
-  };
+};
-  //////////////////////////////////////////////
+//////////////////////////////////////////////
-  // Exchange support
+// Exchange support
-  struct Exchange{
+struct Exchange{
  static inline void Exchange0(float32x4_t &out1,float32x4_t &out2,float32x4_t in1,float32x4_t in2){
    // in1: ABCD -> out1: ABEF
    // in2: EFGH -> out2: CDGH
@@ -518,82 +516,80 @@ namespace Optimization {
    assert(0);
    return;
  };
-  };
+};
-  //////////////////////////////////////////////
+//////////////////////////////////////////////
-  // Some Template specialization
+// Some Template specialization
-  //Complex float Reduce
+//Complex float Reduce
-  template<>
+template<>
-  inline Grid::ComplexF Reduce<Grid::ComplexF, float32x4_t>::operator()(float32x4_t in){
+inline Grid::ComplexF Reduce<Grid::ComplexF, float32x4_t>::operator()(float32x4_t in){
  float32x4_t v1; // two complex
  v1 = Optimization::Permute::Permute0(in);
  v1 = vaddq_f32(v1,in);
  u128f conv;    conv.v=v1;
  return Grid::ComplexF(conv.f[0],conv.f[1]);
-  }
+}
-  //Real float Reduce
+//Real float Reduce
-  template<>
+template<>
-  inline Grid::RealF Reduce<Grid::RealF, float32x4_t>::operator()(float32x4_t in){
+inline Grid::RealF Reduce<Grid::RealF, float32x4_t>::operator()(float32x4_t in){
  return vaddvq_f32(in);
-  }
+}
-  //Complex double Reduce
+//Complex double Reduce
-  template<>
+template<>
-  inline Grid::ComplexD Reduce<Grid::ComplexD, float64x2_t>::operator()(float64x2_t in){
+inline Grid::ComplexD Reduce<Grid::ComplexD, float64x2_t>::operator()(float64x2_t in){
  u128d conv; conv.v = in;
  return Grid::ComplexD(conv.f[0],conv.f[1]);
  }
  //Real double Reduce
  template<>
  inline Grid::RealD Reduce<Grid::RealD, float64x2_t>::operator()(float64x2_t in){
    return vaddvq_f64(in);
  }
  //Integer Reduce
  template<>
  inline Integer Reduce<Integer, uint32x4_t>::operator()(uint32x4_t in){
    return vaddvq_u32(in);
  }
 }
 //Real double Reduce
 template<>
 inline Grid::RealD Reduce<Grid::RealD, float64x2_t>::operator()(float64x2_t in){
  return vaddvq_f64(in);
 }
 //Integer Reduce
 template<>
 inline Integer Reduce<Integer, uint32x4_t>::operator()(uint32x4_t in){
  return vaddvq_u32(in);
 }
 NAMESPACE_END(Optimization);
 //////////////////////////////////////////////////////////////////////////////////////
 // Here assign types
 // typedef Optimization::vech SIMD_Htype; // Reduced precision type
-  typedef float16x8_t  SIMD_Htype; // Half precision type
+typedef float16x8_t  SIMD_Htype; // Half precision type
-  typedef float32x4_t  SIMD_Ftype; // Single precision type
+typedef float32x4_t  SIMD_Ftype; // Single precision type
-  typedef float64x2_t  SIMD_Dtype; // Double precision type
+typedef float64x2_t  SIMD_Dtype; // Double precision type
-  typedef uint32x4_t   SIMD_Itype; // Integer type
+typedef uint32x4_t   SIMD_Itype; // Integer type
-  inline void v_prefetch0(int size, const char *ptr){};  // prefetch utilities
+inline void v_prefetch0(int size, const char *ptr){};  // prefetch utilities
-  inline void prefetch_HINT_T0(const char *ptr){};
+inline void prefetch_HINT_T0(const char *ptr){};
-  // Function name aliases
+// Function name aliases
-  typedef Optimization::Vsplat   VsplatSIMD;
+typedef Optimization::Vsplat   VsplatSIMD;
-  typedef Optimization::Vstore   VstoreSIMD;
+typedef Optimization::Vstore   VstoreSIMD;
-  typedef Optimization::Vset     VsetSIMD;
+typedef Optimization::Vset     VsetSIMD;
-  typedef Optimization::Vstream  VstreamSIMD;
+typedef Optimization::Vstream  VstreamSIMD;
-  template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
+template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
 // Arithmetic operations
 typedef Optimization::Sum         SumSIMD;
 typedef Optimization::Sub         SubSIMD;
 typedef Optimization::Div         DivSIMD;
 typedef Optimization::Mult        MultSIMD;
 typedef Optimization::MultComplex MultComplexSIMD;
 typedef Optimization::MultRealPart MultRealPartSIMD;
 typedef Optimization::MaddRealPart MaddRealPartSIMD;
 typedef Optimization::Conj        ConjSIMD;
 typedef Optimization::TimesMinusI TimesMinusISIMD;
 typedef Optimization::TimesI      TimesISIMD;
-
+NAMESPACE_END(Grid);
  // Arithmetic operations
  typedef Optimization::Sum         SumSIMD;
  typedef Optimization::Sub         SubSIMD;
  typedef Optimization::Div         DivSIMD;
  typedef Optimization::Mult        MultSIMD;
  typedef Optimization::MultComplex MultComplexSIMD;
  typedef Optimization::MultRealPart MultRealPartSIMD;
  typedef Optimization::MaddRealPart MaddRealPartSIMD;
  typedef Optimization::Conj        ConjSIMD;
  typedef Optimization::TimesMinusI TimesMinusISIMD;
  typedef Optimization::TimesI      TimesISIMD;
 }