NAMESPACE and formatting

2026-02-15 03:10:54 +00:00 · 2018-01-12 18:10:11 +00:00
parent fbc2380cb8
commit bbb657da5c
1 changed files with 363 additions and 361 deletions
--- a/lib/simd/Grid_generic.h
+++ b/lib/simd/Grid_generic.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
@@ -25,413 +25,413 @@ Author: Antonin Portelli <antonin.portelli@me.com>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
+*************************************************************************************/
-    /*  END LEGAL */
+/*  END LEGAL */
 #include "Grid_generic_types.h"
-namespace Grid {
+NAMESPACE_BEGIN(Grid);
-namespace Optimization {
+NAMESPACE_BEGIN(Optimization);
-  struct Vsplat{
+struct Vsplat{
-    // Complex
+  // Complex
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(T a, T b){
+  inline vec<T> operator()(T a, T b){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::r, 2)
+    VECTOR_FOR(i, W<T>::r, 2)
      {
        out.v[i]   = a;
        out.v[i+1] = b;
      }
-      return out;
+    return out;
-    }
+  }
-    // Real
+  // Real
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(T a){
+  inline vec<T> operator()(T a){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::r, 1)
+    VECTOR_FOR(i, W<T>::r, 1)
      {
        out.v[i] = a;
      }
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  struct Vstore{
+struct Vstore{
-    // Real
+  // Real
-    template <typename T>
+  template <typename T>
-    inline void operator()(vec<T> a, T *D){
+  inline void operator()(vec<T> a, T *D){
-      *((vec<T> *)D) = a;
+    *((vec<T> *)D) = a;
-    }
+  }
-  };
+};
-  struct Vstream{
+struct Vstream{
-    // Real
+  // Real
-    template <typename T>
+  template <typename T>
-    inline void operator()(T * a, vec<T> b){
+  inline void operator()(T * a, vec<T> b){
-      *((vec<T> *)a) = b;
+    *((vec<T> *)a) = b;
-    }
+  }
-  };
+};
-  struct Vset{
+struct Vset{
-    // Complex
+  // Complex
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(std::complex<T> *a){
+  inline vec<T> operator()(std::complex<T> *a){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::c, 1)
+    VECTOR_FOR(i, W<T>::c, 1)
      {
        out.v[2*i]   = a[i].real();
        out.v[2*i+1] = a[i].imag();
      }
-      return out;
+    return out;
-    }
+  }
-    // Real
+  // Real
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(T *a){
+  inline vec<T> operator()(T *a){
-      vec<T> out;
+    vec<T> out;
-      out = *((vec<T> *)a);
+    out = *((vec<T> *)a);
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  /////////////////////////////////////////////////////
+/////////////////////////////////////////////////////
-  // Arithmetic operations
+// Arithmetic operations
-  /////////////////////////////////////////////////////
+/////////////////////////////////////////////////////
-  struct Sum{
+struct Sum{
-    // Complex/Real
+  // Complex/Real
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(vec<T> a, vec<T> b){
+  inline vec<T> operator()(vec<T> a, vec<T> b){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::r, 1)
+    VECTOR_FOR(i, W<T>::r, 1)
      {
        out.v[i] = a.v[i] + b.v[i];
      }
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  struct Sub{
+struct Sub{
-    // Complex/Real
+  // Complex/Real
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(vec<T> a, vec<T> b){
+  inline vec<T> operator()(vec<T> a, vec<T> b){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::r, 1)
+    VECTOR_FOR(i, W<T>::r, 1)
      {
        out.v[i] = a.v[i] - b.v[i];
      }
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  struct Mult{
+struct Mult{
-    // Real
+  // Real
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(vec<T> a, vec<T> b){
+  inline vec<T> operator()(vec<T> a, vec<T> b){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::r, 1)
+    VECTOR_FOR(i, W<T>::r, 1)
      {
        out.v[i] = a.v[i]*b.v[i];
      }
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  #define cmul(a, b, c, i)\
+#define cmul(a, b, c, i)			\
-  c[i]   = a[i]*b[i]   - a[i+1]*b[i+1];\
+  c[i]   = a[i]*b[i]   - a[i+1]*b[i+1];		\
  c[i+1] = a[i]*b[i+1] + a[i+1]*b[i];
-  struct MultRealPart{
+struct MultRealPart{
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(vec<T> a, vec<T> b){
+  inline vec<T> operator()(vec<T> a, vec<T> b){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::c, 1)
+    VECTOR_FOR(i, W<T>::c, 1)
      {
-         out.v[2*i]   = a.v[2*i]*b.v[2*i];
+	out.v[2*i]   = a.v[2*i]*b.v[2*i];
-         out.v[2*i+1] = a.v[2*i]*b.v[2*i+1];
+	out.v[2*i+1] = a.v[2*i]*b.v[2*i+1];
      }      
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  struct MaddRealPart{
+struct MaddRealPart{
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
+  inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::c, 1)
+    VECTOR_FOR(i, W<T>::c, 1)
      {
-         out.v[2*i]   = a.v[2*i]*b.v[2*i] + c.v[2*i];
+	out.v[2*i]   = a.v[2*i]*b.v[2*i] + c.v[2*i];
-         out.v[2*i+1] = a.v[2*i]*b.v[2*i+1] + c.v[2*i+1];
+	out.v[2*i+1] = a.v[2*i]*b.v[2*i+1] + c.v[2*i+1];
      }      
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  struct MultComplex{
+struct MultComplex{
-    // Complex
+  // Complex
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(vec<T> a, vec<T> b){
+  inline vec<T> operator()(vec<T> a, vec<T> b){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::c, 1)
+    VECTOR_FOR(i, W<T>::c, 1)
      {
        cmul(a.v, b.v, out.v, 2*i);
      }      
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  #undef cmul
+#undef cmul
-  struct Div{
+struct Div{
-    // Real
+  // Real
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(vec<T> a, vec<T> b){
+  inline vec<T> operator()(vec<T> a, vec<T> b){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::r, 1)
+    VECTOR_FOR(i, W<T>::r, 1)
      {
        out.v[i] = a.v[i]/b.v[i];
      }
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  #define conj(a, b, i)\
+#define conj(a, b, i)				\
-  b[i]   = a[i];\
+  b[i]   = a[i];				\
  b[i+1] = -a[i+1];
-  struct Conj{
+struct Conj{
-    // Complex
+  // Complex
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(vec<T> a){
+  inline vec<T> operator()(vec<T> a){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::c, 1)
+    VECTOR_FOR(i, W<T>::c, 1)
      {
        conj(a.v, out.v, 2*i);
      }
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  #undef conj
+#undef conj
-  #define timesmi(a, b, i)\
+#define timesmi(a, b, i)			\
-  b[i]   = a[i+1];\
+  b[i]   = a[i+1];				\
  b[i+1] = -a[i];
-  struct TimesMinusI{
+struct TimesMinusI{
-    // Complex
+  // Complex
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(vec<T> a, vec<T> b){
+  inline vec<T> operator()(vec<T> a, vec<T> b){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::c, 1)
+    VECTOR_FOR(i, W<T>::c, 1)
      {
        timesmi(a.v, out.v, 2*i);
      }
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  #undef timesmi
+#undef timesmi
-  #define timesi(a, b, i)\
+#define timesi(a, b, i)				\
-  b[i]   = -a[i+1];\
+  b[i]   = -a[i+1];				\
  b[i+1] = a[i];
-  struct TimesI{
+struct TimesI{
-    // Complex
+  // Complex
-    template <typename T>
+  template <typename T>
-    inline vec<T> operator()(vec<T> a, vec<T> b){
+  inline vec<T> operator()(vec<T> a, vec<T> b){
-      vec<T> out;
+    vec<T> out;
-      VECTOR_FOR(i, W<T>::c, 1)
+    VECTOR_FOR(i, W<T>::c, 1)
      {
        timesi(a.v, out.v, 2*i);
      }
-      return out;
+    return out;
-    }
+  }
-  };
+};
-  #undef timesi
+#undef timesi
-  struct PrecisionChange {
+struct PrecisionChange {
-    static inline vech StoH (const vecf &a,const vecf &b) {
+  static inline vech StoH (const vecf &a,const vecf &b) {
-      vech ret;
+    vech ret;
 #ifdef USE_FP16
-      vech *ha = (vech *)&a;
+    vech *ha = (vech *)&a;
-      vech *hb = (vech *)&b;
+    vech *hb = (vech *)&b;
-      const int nf = W<float>::r;
+    const int nf = W<float>::r;
-      //      VECTOR_FOR(i, nf,1){ ret.v[i]    = ( (uint16_t *) &a.v[i])[1] ; }
+    //      VECTOR_FOR(i, nf,1){ ret.v[i]    = ( (uint16_t *) &a.v[i])[1] ; }
-      //      VECTOR_FOR(i, nf,1){ ret.v[i+nf] = ( (uint16_t *) &b.v[i])[1] ; }
+    //      VECTOR_FOR(i, nf,1){ ret.v[i+nf] = ( (uint16_t *) &b.v[i])[1] ; }
-      VECTOR_FOR(i, nf,1){ ret.v[i]    = ha->v[2*i+1]; }
+    VECTOR_FOR(i, nf,1){ ret.v[i]    = ha->v[2*i+1]; }
-      VECTOR_FOR(i, nf,1){ ret.v[i+nf] = hb->v[2*i+1]; }
+    VECTOR_FOR(i, nf,1){ ret.v[i+nf] = hb->v[2*i+1]; }
 #else
-      assert(0);
+    assert(0);
 #endif
-      return ret;
+    return ret;
-    }
+  }
-    static inline void  HtoS (vech h,vecf &sa,vecf &sb) {
+  static inline void  HtoS (vech h,vecf &sa,vecf &sb) {
 #ifdef USE_FP16
-      const int nf = W<float>::r;
+    const int nf = W<float>::r;
-      const int nh = W<uint16_t>::r;
+    const int nh = W<uint16_t>::r;
-      vech *ha = (vech *)&sa;
+    vech *ha = (vech *)&sa;
-      vech *hb = (vech *)&sb;
+    vech *hb = (vech *)&sb;
-      VECTOR_FOR(i, nf, 1){ sb.v[i]= sa.v[i] = 0; }
+    VECTOR_FOR(i, nf, 1){ sb.v[i]= sa.v[i] = 0; }
-      //      VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sa.v[i]))[1] = h.v[i];}
+    //      VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sa.v[i]))[1] = h.v[i];}
-      //      VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sb.v[i]))[1] = h.v[i+nf];}
+    //      VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sb.v[i]))[1] = h.v[i+nf];}
-      VECTOR_FOR(i, nf, 1){ ha->v[2*i+1]=h.v[i]; }
+    VECTOR_FOR(i, nf, 1){ ha->v[2*i+1]=h.v[i]; }
-      VECTOR_FOR(i, nf, 1){ hb->v[2*i+1]=h.v[i+nf]; }
+    VECTOR_FOR(i, nf, 1){ hb->v[2*i+1]=h.v[i+nf]; }
 #else
-      assert(0);
+    assert(0);
 #endif
-    }
+  }
-    static inline vecf DtoS (vecd a,vecd b) {
+  static inline vecf DtoS (vecd a,vecd b) {
-      const int nd = W<double>::r;
+    const int nd = W<double>::r;
-      const int nf = W<float>::r;
+    const int nf = W<float>::r;
-      vecf ret;
+    vecf ret;
-      VECTOR_FOR(i, nd,1){ ret.v[i]    = a.v[i] ; }
+    VECTOR_FOR(i, nd,1){ ret.v[i]    = a.v[i] ; }
-      VECTOR_FOR(i, nd,1){ ret.v[i+nd] = b.v[i] ; }
+    VECTOR_FOR(i, nd,1){ ret.v[i+nd] = b.v[i] ; }
-      return ret;
+    return ret;
-    }
+  }
-    static inline void StoD (vecf s,vecd &a,vecd &b) {
+  static inline void StoD (vecf s,vecd &a,vecd &b) {
-      const int nd = W<double>::r;
+    const int nd = W<double>::r;
-      VECTOR_FOR(i, nd,1){ a.v[i] = s.v[i] ; }
+    VECTOR_FOR(i, nd,1){ a.v[i] = s.v[i] ; }
-      VECTOR_FOR(i, nd,1){ b.v[i] = s.v[i+nd] ; }
+    VECTOR_FOR(i, nd,1){ b.v[i] = s.v[i+nd] ; }
-    }
+  }
-    static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) {
+  static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) {
-      vecf sa,sb;
+    vecf sa,sb;
-      sa = DtoS(a,b);
+    sa = DtoS(a,b);
-      sb = DtoS(c,d);
+    sb = DtoS(c,d);
-      return StoH(sa,sb);
+    return StoH(sa,sb);
-    }
+  }
-    static inline void HtoD (vech h,vecd &a,vecd &b,vecd &c,vecd &d) {
+  static inline void HtoD (vech h,vecd &a,vecd &b,vecd &c,vecd &d) {
-      vecf sa,sb;
+    vecf sa,sb;
-      HtoS(h,sa,sb);
+    HtoS(h,sa,sb);
-      StoD(sa,a,b);
+    StoD(sa,a,b);
-      StoD(sb,c,d);
+    StoD(sb,c,d);
-    }
+  }
-  };
+};
-  //////////////////////////////////////////////
+//////////////////////////////////////////////
-  // Exchange support
+// Exchange support
-  struct Exchange{
+struct Exchange{
-    template <typename T,int n>
+  template <typename T,int n>
    static inline void ExchangeN(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
-      const int w = W<T>::r;
+    const int w = W<T>::r;
-      unsigned int mask = w >> (n + 1);
+    unsigned int mask = w >> (n + 1);
-      //      std::cout << " Exchange "<<n<<" nsimd "<<w<<" mask 0x" <<std::hex<<mask<<std::dec<<std::endl;
+    //      std::cout << " Exchange "<<n<<" nsimd "<<w<<" mask 0x" <<std::hex<<mask<<std::dec<<std::endl;
-      VECTOR_FOR(i, w, 1) {	
+    VECTOR_FOR(i, w, 1) {	
-	int j1 = i&(~mask);
+      int j1 = i&(~mask);
-	if  ( (i&mask) == 0 ) { out1.v[i]=in1.v[j1];}
+      if  ( (i&mask) == 0 ) { out1.v[i]=in1.v[j1];}
-	else                  { out1.v[i]=in2.v[j1];}
+      else                  { out1.v[i]=in2.v[j1];}
-	int j2 = i|mask;
+      int j2 = i|mask;
-	if  ( (i&mask) == 0 ) { out2.v[i]=in1.v[j2];}
+      if  ( (i&mask) == 0 ) { out2.v[i]=in1.v[j2];}
-	else                  { out2.v[i]=in2.v[j2];}
+      else                  { out2.v[i]=in2.v[j2];}
-      }      
+    }      
-    }
+  }
-    template <typename T>
+  template <typename T>
-    static inline void Exchange0(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
+  static inline void Exchange0(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
-      ExchangeN<T,0>(out1,out2,in1,in2);
+    ExchangeN<T,0>(out1,out2,in1,in2);
    };
    template <typename T>
    static inline void Exchange1(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
      ExchangeN<T,1>(out1,out2,in1,in2);
    };
    template <typename T>
    static inline void Exchange2(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
      ExchangeN<T,2>(out1,out2,in1,in2);
    };
    template <typename T>
    static inline void Exchange3(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
      ExchangeN<T,3>(out1,out2,in1,in2);
    };
  };
  template <typename T>
  static inline void Exchange1(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
    ExchangeN<T,1>(out1,out2,in1,in2);
  };
  template <typename T>
  static inline void Exchange2(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
    ExchangeN<T,2>(out1,out2,in1,in2);
  };
  template <typename T>
  static inline void Exchange3(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
    ExchangeN<T,3>(out1,out2,in1,in2);
  };
 };
-  //////////////////////////////////////////////
+//////////////////////////////////////////////
-  // Some Template specialization
+// Some Template specialization
-  #define perm(a, b, n, w)\
+#define perm(a, b, n, w)			\
-  unsigned int _mask = w >> (n + 1);\
+  unsigned int _mask = w >> (n + 1);		\
-  VECTOR_FOR(i, w, 1)\
+  VECTOR_FOR(i, w, 1)				\
-  {\
+  {						\
-    b[i] = a[i^_mask];\
+    b[i] = a[i^_mask];				\
  }
-  #define DECL_PERMUTE_N(n)\
+#define DECL_PERMUTE_N(n)			\
-  template <typename T>\
+  template <typename T>				\
-  static inline vec<T> Permute##n(vec<T> in) {\
+  static inline vec<T> Permute##n(vec<T> in) {	\
-    vec<T> out;\
+    vec<T> out;					\
-    perm(in.v, out.v, n, W<T>::r);\
+    perm(in.v, out.v, n, W<T>::r);		\
-    return out;\
+    return out;					\
  }
-  struct Permute{
+struct Permute{
-    DECL_PERMUTE_N(0);
+  DECL_PERMUTE_N(0);
-    DECL_PERMUTE_N(1);
+  DECL_PERMUTE_N(1);
-    DECL_PERMUTE_N(2);
+  DECL_PERMUTE_N(2);
-    DECL_PERMUTE_N(3);
+  DECL_PERMUTE_N(3);
-  };
+};
-  #undef perm
+#undef perm
-  #undef DECL_PERMUTE_N
+#undef DECL_PERMUTE_N
-  #define rot(a, b, n, w)\
+#define rot(a, b, n, w)				\
-  VECTOR_FOR(i, w, 1)\
+  VECTOR_FOR(i, w, 1)				\
-  {\
+  {						\
-    b[i] = a[(i + n)%w];\
+    b[i] = a[(i + n)%w];			\
  }
-  struct Rotate{
+struct Rotate{
-    template <int n, typename T> static inline vec<T> tRotate(vec<T> in){
+  template <int n, typename T> static inline vec<T> tRotate(vec<T> in){
-      return rotate(in, n);
+    return rotate(in, n);
-    }
+  }
-    template <typename T>
+  template <typename T>
-    static inline vec<T> rotate(vec<T> in, int n){
+  static inline vec<T> rotate(vec<T> in, int n){
-      vec<T> out;
+    vec<T> out;
-      rot(in.v, out.v, n, W<T>::r);
+    rot(in.v, out.v, n, W<T>::r);
-      return out;
+    return out;
    }
  };
  #undef rot
  #define acc(v, a, off, step, n)\
  for (unsigned int i = off; i < n; i += step)\
  {\
    a += v[i];\
  }
 };
 #undef rot
-  template <typename Out_type, typename In_type>
+#define acc(v, a, off, step, n)			\
  for (unsigned int i = off; i < n; i += step)	\
    {						\
      a += v[i];				\
    }
 template <typename Out_type, typename In_type>
  struct Reduce{
    //Need templated class to overload output type
    //General form must generate error if compiled
@@ -442,89 +442,91 @@ namespace Optimization {
    }
  };
-  //Complex float Reduce
+//Complex float Reduce
-  template <>
+template <>
-  inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
+inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
-    float a = 0.f, b = 0.f;
+  float a = 0.f, b = 0.f;
-    acc(in.v, a, 0, 2, W<float>::r);
+  acc(in.v, a, 0, 2, W<float>::r);
-    acc(in.v, b, 1, 2, W<float>::r);
+  acc(in.v, b, 1, 2, W<float>::r);
-    return Grid::ComplexF(a, b);
+  return Grid::ComplexF(a, b);
  }
  //Real float Reduce
  template<>
  inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){
    float a = 0.;
    acc(in.v, a, 0, 1, W<float>::r);
    return a;
  }
  //Complex double Reduce
  template<>
  inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
    double a = 0., b = 0.;
    acc(in.v, a, 0, 2, W<double>::r);
    acc(in.v, b, 1, 2, W<double>::r);
    return Grid::ComplexD(a, b);
  }
  //Real double Reduce
  template<>
  inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
    double a = 0.f;
    acc(in.v, a, 0, 1, W<double>::r);
    return a;
  }
  //Integer Reduce
  template<>
  inline Integer Reduce<Integer, veci>::operator()(veci in){
    Integer a = 0;
    acc(in.v, a, 0, 1, W<Integer>::r);
    return a;
  }
  #undef acc  // EIGEN compatibility
 }
 //Real float Reduce
 template<>
 inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){
  float a = 0.;
  acc(in.v, a, 0, 1, W<float>::r);
  return a;
 }
 //Complex double Reduce
 template<>
 inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
  double a = 0., b = 0.;
  acc(in.v, a, 0, 2, W<double>::r);
  acc(in.v, b, 1, 2, W<double>::r);
  return Grid::ComplexD(a, b);
 }
 //Real double Reduce
 template<>
 inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
  double a = 0.f;
  acc(in.v, a, 0, 1, W<double>::r);
  return a;
 }
 //Integer Reduce
 template<>
 inline Integer Reduce<Integer, veci>::operator()(veci in){
  Integer a = 0;
  acc(in.v, a, 0, 1, W<Integer>::r);
  return a;
 }
 #undef acc  // EIGEN compatibility
 NAMESPACE_END(Optimization)
 //////////////////////////////////////////////////////////////////////////////////////
 // Here assign types 
-  typedef Optimization::vech SIMD_Htype; // Reduced precision type
+typedef Optimization::vech SIMD_Htype; // Reduced precision type
-  typedef Optimization::vecf SIMD_Ftype; // Single precision type
+typedef Optimization::vecf SIMD_Ftype; // Single precision type
-  typedef Optimization::vecd SIMD_Dtype; // Double precision type
+typedef Optimization::vecd SIMD_Dtype; // Double precision type
-  typedef Optimization::veci SIMD_Itype; // Integer type
+typedef Optimization::veci SIMD_Itype; // Integer type
-  // prefetch utilities
+// prefetch utilities
-  inline void v_prefetch0(int size, const char *ptr){};
+inline void v_prefetch0(int size, const char *ptr){};
-  inline void prefetch_HINT_T0(const char *ptr){};
+inline void prefetch_HINT_T0(const char *ptr){};
-  // Function name aliases
+// Function name aliases
-  typedef Optimization::Vsplat   VsplatSIMD;
+typedef Optimization::Vsplat   VsplatSIMD;
-  typedef Optimization::Vstore   VstoreSIMD;
+typedef Optimization::Vstore   VstoreSIMD;
-  typedef Optimization::Vset     VsetSIMD;
+typedef Optimization::Vset     VsetSIMD;
-  typedef Optimization::Vstream  VstreamSIMD;
+typedef Optimization::Vstream  VstreamSIMD;
-  template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
+template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
 // Arithmetic operations
 typedef Optimization::Sum         SumSIMD;
 typedef Optimization::Sub         SubSIMD;
 typedef Optimization::Div         DivSIMD;
 typedef Optimization::Mult        MultSIMD;
 typedef Optimization::MultComplex MultComplexSIMD;
 typedef Optimization::MultRealPart MultRealPartSIMD;
 typedef Optimization::MaddRealPart MaddRealPartSIMD;
 typedef Optimization::Conj        ConjSIMD;
 typedef Optimization::TimesMinusI TimesMinusISIMD;
 typedef Optimization::TimesI      TimesISIMD;
 NAMESPACE_END(Grid)
  // Arithmetic operations
  typedef Optimization::Sum         SumSIMD;
  typedef Optimization::Sub         SubSIMD;
  typedef Optimization::Div         DivSIMD;
  typedef Optimization::Mult        MultSIMD;
  typedef Optimization::MultComplex MultComplexSIMD;
  typedef Optimization::MultRealPart MultRealPartSIMD;
  typedef Optimization::MaddRealPart MaddRealPartSIMD;
  typedef Optimization::Conj        ConjSIMD;
  typedef Optimization::TimesMinusI TimesMinusISIMD;
  typedef Optimization::TimesI      TimesISIMD;
 }