Tensor reformatted with NAMESPACE too

2025-08-12 17:27:06 +01:00 · 2018-01-13 00:31:02 +00:00
parent f4272aa6fd
commit c037244874
21 changed files with 1634 additions and 1626 deletions
--- a/lib/tensors/Tensor_Ta.h
+++ b/lib/tensors/Tensor_Ta.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -24,102 +24,101 @@ Author: neo <cossu@post.kek.jp>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_TA_H
 #define GRID_MATH_TA_H


-namespace Grid {
+NAMESPACE_BEGIN(Grid);

-  /////////////////////////////////////////////// 
-  // Ta function for scalar, vector, matrix
-  /////////////////////////////////////////////// 
-  /*
+/////////////////////////////////////////////// 
+// Ta function for scalar, vector, matrix
+/////////////////////////////////////////////// 
+/*
  inline ComplexF Ta( const ComplexF &arg){    return arg;}
  inline ComplexD Ta( const ComplexD &arg){    return arg;}
  inline RealF Ta( const RealF &arg){    return arg;}
  inline RealD Ta( const RealD &arg){    return arg;}
-  */
-
-  template<class vtype> inline iScalar<vtype> Ta(const iScalar<vtype>&r)
-    {
-      iScalar<vtype> ret;
-      ret._internal = Ta(r._internal);
-      return ret;
-    }
-  template<class vtype,int N> inline iVector<vtype,N> Ta(const iVector<vtype,N>&r)
-    {
-      iVector<vtype,N> ret;
-      for(int i=0;i<N;i++){
-        ret._internal[i] = Ta(r._internal[i]);
-      }
-      return ret;
-    }
-  template<class vtype,int N> inline iMatrix<vtype,N> Ta(const iMatrix<vtype,N> &arg)
-    {
-      iMatrix<vtype,N> ret;
-
-      double factor = (1.0/(double)N);
-      ret= (arg - adj(arg))*0.5;
-      ret=ret - (trace(ret)*factor);
-      return ret;
-    }
-
-
-  /////////////////////////////////////////////// 
-  // ProjectOnGroup function for scalar, vector, matrix 
-  // Projects on orthogonal, unitary group
-  /////////////////////////////////////////////// 
-
-
-  template<class vtype> inline iScalar<vtype> ProjectOnGroup(const iScalar<vtype>&r)
-    {
-      iScalar<vtype> ret;
-      ret._internal = ProjectOnGroup(r._internal);
-      return ret;
-    }
-  template<class vtype,int N> inline iVector<vtype,N> ProjectOnGroup(const iVector<vtype,N>&r)
-    {
-      iVector<vtype,N> ret;
-      for(int i=0;i<N;i++){
-        ret._internal[i] = ProjectOnGroup(r._internal[i]);
-      }
-      return ret;
-    }
-  template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr> 
-    inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
-    {
-      // need a check for the group type?
-      iMatrix<vtype,N> ret(arg);
-      vtype nrm;
-      vtype inner;
-      for(int c1=0;c1<N;c1++){
-	zeroit(inner);	
-	for(int c2=0;c2<N;c2++)
-	  inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
-
-	nrm = rsqrt(inner);
-	for(int c2=0;c2<N;c2++)
-	  ret._internal[c1][c2]*= nrm;
-      
-	for (int b=c1+1; b<N; ++b){
-	  decltype(ret._internal[b][b]*ret._internal[b][b]) pr;
-	  zeroit(pr);
-	  for(int c=0; c<N; ++c)
-	    pr += conjugate(ret._internal[c1][c])*ret._internal[b][c];
-	  
-	  for(int c=0; c<N; ++c){
-	    ret._internal[b][c] -= pr * ret._internal[c1][c];
-	  }
-	}
-	  
-      }
-      // assuming the determinant is ok
-      return ret;
-    }
-
-
+*/

+template<class vtype> inline iScalar<vtype> Ta(const iScalar<vtype>&r)
+{
+  iScalar<vtype> ret;
+  ret._internal = Ta(r._internal);
+  return ret;
 }
+template<class vtype,int N> inline iVector<vtype,N> Ta(const iVector<vtype,N>&r)
+{
+  iVector<vtype,N> ret;
+  for(int i=0;i<N;i++){
+    ret._internal[i] = Ta(r._internal[i]);
+  }
+  return ret;
+}
+template<class vtype,int N> inline iMatrix<vtype,N> Ta(const iMatrix<vtype,N> &arg)
+{
+  iMatrix<vtype,N> ret;
+
+  double factor = (1.0/(double)N);
+  ret= (arg - adj(arg))*0.5;
+  ret=ret - (trace(ret)*factor);
+  return ret;
+}
+
+
+/////////////////////////////////////////////// 
+// ProjectOnGroup function for scalar, vector, matrix 
+// Projects on orthogonal, unitary group
+/////////////////////////////////////////////// 
+
+
+template<class vtype> inline iScalar<vtype> ProjectOnGroup(const iScalar<vtype>&r)
+{
+  iScalar<vtype> ret;
+  ret._internal = ProjectOnGroup(r._internal);
+  return ret;
+}
+template<class vtype,int N> inline iVector<vtype,N> ProjectOnGroup(const iVector<vtype,N>&r)
+{
+  iVector<vtype,N> ret;
+  for(int i=0;i<N;i++){
+    ret._internal[i] = ProjectOnGroup(r._internal[i]);
+  }
+  return ret;
+}
+template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr> 
+inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
+{
+  // need a check for the group type?
+  iMatrix<vtype,N> ret(arg);
+  vtype nrm;
+  vtype inner;
+  for(int c1=0;c1<N;c1++){
+    zeroit(inner);	
+    for(int c2=0;c2<N;c2++)
+      inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
+
+    nrm = rsqrt(inner);
+    for(int c2=0;c2<N;c2++)
+      ret._internal[c1][c2]*= nrm;
+      
+    for (int b=c1+1; b<N; ++b){
+      decltype(ret._internal[b][b]*ret._internal[b][b]) pr;
+      zeroit(pr);
+      for(int c=0; c<N; ++c)
+	pr += conjugate(ret._internal[c1][c])*ret._internal[b][c];
+	  
+      for(int c=0; c<N; ++c){
+	ret._internal[b][c] -= pr * ret._internal[c1][c];
+      }
+    }
+	  
+  }
+  // assuming the determinant is ok
+  return ret;
+}
+
+NAMESPACE_END(Grid);
+
 #endif
--- a/lib/tensors/Tensor_arith.h
+++ b/lib/tensors/Tensor_arith.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -23,8 +23,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_ARITH_H
 #define GRID_MATH_ARITH_H

--- a/lib/tensors/Tensor_arith_add.h
+++ b/lib/tensors/Tensor_arith_add.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -24,123 +24,122 @@ Author: neo <cossu@post.kek.jp>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_ARITH_ADD_H
 #define GRID_MATH_ARITH_ADD_H

-namespace Grid {
+NAMESPACE_BEGIN(Grid);

    
-    ///////////////////////////////////////////////////////////////////////////////////////////////////
-    /////////////////////////////////////////// ADD         ///////////////////////////////////////////
-    ///////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////// ADD         ///////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////
    

 // ADD is simple for now; cannot mix types and straightforward template
 // Scalar +/- Scalar
 // Vector +/- Vector
 // Matrix +/- Matrix
-  template<class vtype,class ltype,class rtype> strong_inline void add(iScalar<vtype> * __restrict__ ret,
-								const iScalar<ltype> * __restrict__ lhs,
-								const iScalar<rtype> * __restrict__ rhs)
-  {
-    add(&ret->_internal,&lhs->_internal,&rhs->_internal);
-  }
-  template<class vtype,class ltype,class rtype,int N> strong_inline void add(iVector<vtype,N> * __restrict__ ret,
-								      const iVector<ltype,N> * __restrict__ lhs,
-								      const iVector<rtype,N> * __restrict__ rhs)
-  {
-    for(int c=0;c<N;c++){
-      ret->_internal[c]=lhs->_internal[c]+rhs->_internal[c];
-    }
-    return;
+template<class vtype,class ltype,class rtype> strong_inline void add(iScalar<vtype> * __restrict__ ret,
+								     const iScalar<ltype> * __restrict__ lhs,
+								     const iScalar<rtype> * __restrict__ rhs)
+{
+  add(&ret->_internal,&lhs->_internal,&rhs->_internal);
+}
+template<class vtype,class ltype,class rtype,int N> strong_inline void add(iVector<vtype,N> * __restrict__ ret,
+									   const iVector<ltype,N> * __restrict__ lhs,
+									   const iVector<rtype,N> * __restrict__ rhs)
+{
+  for(int c=0;c<N;c++){
+    ret->_internal[c]=lhs->_internal[c]+rhs->_internal[c];
  }
+  return;
+}
  
-  template<class vtype,class ltype,class rtype, int N> strong_inline  void add(iMatrix<vtype,N> * __restrict__ ret,
-									const iMatrix<ltype,N> * __restrict__ lhs,
-									const iMatrix<rtype,N> * __restrict__ rhs)
-  {
-    for(int c2=0;c2<N;c2++){
-      for(int c1=0;c1<N;c1++){
-        add(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal[c1][c2]);
-      }}
-    return;
-  }
-  template<class vtype,class ltype,class rtype, int N> strong_inline  void add(iMatrix<vtype,N> * __restrict__ ret,
-									const iScalar<ltype>   * __restrict__ lhs,
-									const iMatrix<rtype,N> * __restrict__ rhs)
-  {
-    for(int c2=0;c2<N;c2++){
-      for(int c1=0;c1<N;c1++){
-	if ( c1==c2)
-	  add(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
-	else
-	  ret->_internal[c1][c2]=lhs->_internal[c1][c2];
-      }}
-    return;
-  }
-  template<class vtype,class ltype,class rtype, int N> strong_inline  void add(iMatrix<vtype,N> * __restrict__ ret,
-									const iMatrix<ltype,N> * __restrict__ lhs,
-									const iScalar<rtype>   * __restrict__ rhs)
-  {
-    for(int c2=0;c2<N;c2++){
-      for(int c1=0;c1<N;c1++){
-        if ( c1==c2)
-	  add(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
-        else
-	  ret->_internal[c1][c2]=lhs->_internal[c1][c2];
-      }}
-    return;
-  }
-
-
-  // + operator for scalar, vector, matrix
-  template<class ltype,class rtype>
-    //strong_inline auto operator + (iScalar<ltype>& lhs,iScalar<rtype>&& rhs) -> iScalar<decltype(lhs._internal + rhs._internal)>
-    strong_inline auto operator + (const iScalar<ltype>& lhs,const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal + rhs._internal)>
-  {
-    typedef iScalar<decltype(lhs._internal+rhs._internal)> ret_t;
-    ret_t ret;
-    add(&ret,&lhs,&rhs);
-    return ret;
-  }
-  template<class ltype,class rtype,int N>
-    strong_inline auto operator + (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]+rhs._internal[0]),N>
-    {
-      typedef iVector<decltype(lhs._internal[0]+rhs._internal[0]),N> ret_t;
-    ret_t ret;
-    add(&ret,&lhs,&rhs);
-    return ret;
-    }
-  template<class ltype,class rtype,int N>
-    strong_inline auto operator + (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N>
-    {
-      typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N> ret_t;
-      ret_t ret;
-      add(&ret,&lhs,&rhs);
-      return ret;
-    }
-  template<class ltype,class rtype,int N>
-strong_inline auto operator + (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N>
-    {
-      typedef iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N> ret_t;
-      ret_t ret;
-      add(&ret,&lhs,&rhs);
-      return ret;
-    }
-
-  template<class ltype,class rtype,int N>
-    strong_inline auto operator + (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N>
-    {
-      typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N> ret_t;
-      ret_t ret;
-      add(&ret,&lhs,&rhs);
-      return ret;
-    }
-
-
-
+template<class vtype,class ltype,class rtype, int N> strong_inline  void add(iMatrix<vtype,N> * __restrict__ ret,
+									     const iMatrix<ltype,N> * __restrict__ lhs,
+									     const iMatrix<rtype,N> * __restrict__ rhs)
+{
+  for(int c2=0;c2<N;c2++){
+    for(int c1=0;c1<N;c1++){
+      add(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal[c1][c2]);
+    }}
+  return;
+}
+template<class vtype,class ltype,class rtype, int N> strong_inline  void add(iMatrix<vtype,N> * __restrict__ ret,
+									     const iScalar<ltype>   * __restrict__ lhs,
+									     const iMatrix<rtype,N> * __restrict__ rhs)
+{
+  for(int c2=0;c2<N;c2++){
+    for(int c1=0;c1<N;c1++){
+      if ( c1==c2)
+	add(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
+      else
+	ret->_internal[c1][c2]=lhs->_internal[c1][c2];
+    }}
+  return;
+}
+template<class vtype,class ltype,class rtype, int N> strong_inline  void add(iMatrix<vtype,N> * __restrict__ ret,
+									     const iMatrix<ltype,N> * __restrict__ lhs,
+									     const iScalar<rtype>   * __restrict__ rhs)
+{
+  for(int c2=0;c2<N;c2++){
+    for(int c1=0;c1<N;c1++){
+      if ( c1==c2)
+	add(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
+      else
+	ret->_internal[c1][c2]=lhs->_internal[c1][c2];
+    }}
+  return;
 }

+
+// + operator for scalar, vector, matrix
+template<class ltype,class rtype>
+//strong_inline auto operator + (iScalar<ltype>& lhs,iScalar<rtype>&& rhs) -> iScalar<decltype(lhs._internal + rhs._internal)>
+strong_inline auto operator + (const iScalar<ltype>& lhs,const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal + rhs._internal)>
+{
+  typedef iScalar<decltype(lhs._internal+rhs._internal)> ret_t;
+  ret_t ret;
+  add(&ret,&lhs,&rhs);
+  return ret;
+}
+template<class ltype,class rtype,int N>
+strong_inline auto operator + (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]+rhs._internal[0]),N>
+{
+  typedef iVector<decltype(lhs._internal[0]+rhs._internal[0]),N> ret_t;
+  ret_t ret;
+  add(&ret,&lhs,&rhs);
+  return ret;
+}
+template<class ltype,class rtype,int N>
+strong_inline auto operator + (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N>
+{
+  typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N> ret_t;
+  ret_t ret;
+  add(&ret,&lhs,&rhs);
+  return ret;
+}
+template<class ltype,class rtype,int N>
+strong_inline auto operator + (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N>
+{
+  typedef iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N> ret_t;
+  ret_t ret;
+  add(&ret,&lhs,&rhs);
+  return ret;
+}
+
+template<class ltype,class rtype,int N>
+strong_inline auto operator + (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N>
+{
+  typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N> ret_t;
+  ret_t ret;
+  add(&ret,&lhs,&rhs);
+  return ret;
+}
+
+NAMESPACE_END(Grid);
+
+
 #endif
--- a/lib/tensors/Tensor_arith_mac.h
+++ b/lib/tensors/Tensor_arith_mac.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -23,86 +23,86 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_ARITH_MAC_H
 #define GRID_MATH_ARITH_MAC_H

-namespace Grid {
-
+NAMESPACE_BEGIN(Grid);

 ///////////////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////////////////// MAC         ///////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////
-    ///////////////////////////
+///////////////////////////

-    ///////////////////////////
-    // Legal multiplication table
-    ///////////////////////////
-    // scal x scal = scal
-    // mat x  mat  = mat
-    // mat  x scal = mat
-    // scal x mat  = mat
-    // mat  x vec  = vec
-    // vec  x scal = vec
-    // scal x vec  = vec
-    ///////////////////////////
+///////////////////////////
+// Legal multiplication table
+///////////////////////////
+// scal x scal = scal
+// mat x  mat  = mat
+// mat  x scal = mat
+// scal x mat  = mat
+// mat  x vec  = vec
+// vec  x scal = vec
+// scal x vec  = vec
+///////////////////////////
 template<class rtype,class vtype,class mtype>
 strong_inline  void mac(iScalar<rtype> * __restrict__ ret,const iScalar<vtype> * __restrict__ lhs,const iScalar<mtype> * __restrict__ rhs)
 {
-    mac(&ret->_internal,&lhs->_internal,&rhs->_internal);
+  mac(&ret->_internal,&lhs->_internal,&rhs->_internal);
 }
 template<class rrtype,class ltype,class rtype,int N>
 strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
-    for(int c3=0;c3<N;c3++){
+  for(int c3=0;c3<N;c3++){
    for(int c1=0;c1<N;c1++){
-    for(int c2=0;c2<N;c2++){
+      for(int c2=0;c2<N;c2++){
        mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
-    }}}
-    return;
+      }}}
+  return;
 }

 template<class rrtype,class ltype,class rtype,int N>
 strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
-    for(int c1=0;c1<N;c1++){
+  for(int c1=0;c1<N;c1++){
    for(int c2=0;c2<N;c2++){
-        mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
+      mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
    }}
-    return;
+  return;
 }
 template<class rrtype,class ltype,class rtype,int N>
 strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
-    for(int c1=0;c1<N;c1++){
+  for(int c1=0;c1<N;c1++){
    for(int c2=0;c2<N;c2++){
-        mac(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
+      mac(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
    }}
-    return;
+  return;
 }
 template<class rrtype,class ltype,class rtype,int N>
 strong_inline void mac(iVector<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iVector<rtype,N> * __restrict__ rhs)
 {
-    for(int c1=0;c1<N;c1++){
+  for(int c1=0;c1<N;c1++){
    for(int c2=0;c2<N;c2++){
-        mac(&ret->_internal[c1],&lhs->_internal[c1][c2],&rhs->_internal[c2]);
+      mac(&ret->_internal[c1],&lhs->_internal[c1][c2],&rhs->_internal[c2]);
    }}
-    return;
+  return;
 }
 template<class rrtype,class ltype,class rtype,int N>
 strong_inline void mac(iVector<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iVector<rtype,N> * __restrict__ rhs)
 {
-    for(int c1=0;c1<N;c1++){
-        mac(&ret->_internal[c1],&lhs->_internal,&rhs->_internal[c1]);
-    }
-    return;
+  for(int c1=0;c1<N;c1++){
+    mac(&ret->_internal[c1],&lhs->_internal,&rhs->_internal[c1]);
+  }
+  return;
 }
 template<class rrtype,class ltype,class rtype,int N>
 strong_inline void mac(iVector<rrtype,N> * __restrict__ ret,const iVector<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs)
 {
-    for(int c1=0;c1<N;c1++){
-        mac(&ret->_internal[c1],&lhs->_internal[c1],&rhs->_internal);
-    }
-    return;
-}
+  for(int c1=0;c1<N;c1++){
+    mac(&ret->_internal[c1],&lhs->_internal[c1],&rhs->_internal);
+  }
+  return;
 }
+NAMESPACE_END(Grid);
+

 #endif
--- a/lib/tensors/Tensor_arith_mul.h
+++ b/lib/tensors/Tensor_arith_mul.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -23,21 +23,20 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_ARITH_MUL_H
 #define GRID_MATH_ARITH_MUL_H

-namespace Grid {
+NAMESPACE_BEGIN(Grid);

-
-    ///////////////////////////////////////////////////////////////////////////////////////////////////
-    /////////////////////////////////////////// MUL         ///////////////////////////////////////////
-    ///////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////// MUL         ///////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////
    
 template<class rtype,class vtype,class mtype>
 strong_inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> * __restrict__ lhs,const iScalar<vtype> * __restrict__ rhs){
-    mult(&ret->_internal,&lhs->_internal,&rhs->_internal);
+  mult(&ret->_internal,&lhs->_internal,&rhs->_internal);
 }

 template<class rrtype,class ltype,class rtype,int N>
@@ -59,48 +58,48 @@ strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype

 template<class rrtype,class ltype,class rtype,int N>
 strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
-    for(int c2=0;c2<N;c2++){
+  for(int c2=0;c2<N;c2++){
    for(int c1=0;c1<N;c1++){
-        mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
+      mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
    }}
-    return;
+  return;
 }

 template<class rrtype,class ltype,class rtype, int N>
 strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype>   * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
-    for(int c2=0;c2<N;c2++){
+  for(int c2=0;c2<N;c2++){
    for(int c1=0;c1<N;c1++){
-        mult(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
+      mult(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
    }}
-    return;
+  return;
 }
 // Matrix left multiplies vector
 template<class rtype,class vtype,class mtype,int N>
 strong_inline void mult(iVector<rtype,N> * __restrict__ ret,const iMatrix<mtype,N> * __restrict__ lhs,const iVector<vtype,N> * __restrict__ rhs)
 {
-    for(int c1=0;c1<N;c1++){
-        mult(&ret->_internal[c1],&lhs->_internal[c1][0],&rhs->_internal[0]);
-        for(int c2=1;c2<N;c2++){
-            mac(&ret->_internal[c1],&lhs->_internal[c1][c2],&rhs->_internal[c2]);
-        }
+  for(int c1=0;c1<N;c1++){
+    mult(&ret->_internal[c1],&lhs->_internal[c1][0],&rhs->_internal[0]);
+    for(int c2=1;c2<N;c2++){
+      mac(&ret->_internal[c1],&lhs->_internal[c1][c2],&rhs->_internal[c2]);
    }
-    return;
+  }
+  return;
 }
 template<class rtype,class vtype,class mtype,int N>
 strong_inline void mult(iVector<rtype,N> * __restrict__ ret,
-                 const iScalar<mtype>   * __restrict__ lhs,
-                 const iVector<vtype,N> * __restrict__ rhs){
-    for(int c1=0;c1<N;c1++){
-        mult(&ret->_internal[c1],&lhs->_internal,&rhs->_internal[c1]);
-    }
+			const iScalar<mtype>   * __restrict__ lhs,
+			const iVector<vtype,N> * __restrict__ rhs){
+  for(int c1=0;c1<N;c1++){
+    mult(&ret->_internal[c1],&lhs->_internal,&rhs->_internal[c1]);
+  }
 }
 template<class rtype,class vtype,class mtype,int N>
 strong_inline void mult(iVector<rtype,N> * __restrict__ ret,
-                 const iVector<vtype,N> * __restrict__ rhs,
-                 const iScalar<mtype> * __restrict__ lhs){
-    for(int c1=0;c1<N;c1++){
-        mult(&ret->_internal[c1],&rhs->_internal[c1],&lhs->_internal);
-    }                 
+			const iVector<vtype,N> * __restrict__ rhs,
+			const iScalar<mtype> * __restrict__ lhs){
+  for(int c1=0;c1<N;c1++){
+    mult(&ret->_internal[c1],&rhs->_internal[c1],&lhs->_internal);
+  }                 
 }
    

@@ -108,25 +107,25 @@ strong_inline void mult(iVector<rtype,N> * __restrict__ ret,
 template<class rtype,class vtype,class mtype,int N> strong_inline
 iVector<rtype,N> operator * (const iMatrix<mtype,N>& lhs,const iVector<vtype,N>& rhs)
 {
-    iVector<rtype,N> ret;
-    mult(&ret,&lhs,&rhs);
-    return ret;
+  iVector<rtype,N> ret;
+  mult(&ret,&lhs,&rhs);
+  return ret;
 }

 template<class rtype,class vtype,class mtype,int N> strong_inline
 iVector<rtype,N> operator * (const iScalar<mtype>& lhs,const iVector<vtype,N>& rhs)
 {
-    iVector<rtype,N> ret;
-    mult(&ret,&lhs,&rhs);
-    return ret;
+  iVector<rtype,N> ret;
+  mult(&ret,&lhs,&rhs);
+  return ret;
 }

 template<class rtype,class vtype,class mtype,int N> strong_inline
 iVector<rtype,N> operator * (const iVector<mtype,N>& lhs,const iScalar<vtype>& rhs)
 {
-    iVector<rtype,N> ret;
-    mult(&ret,&lhs,&rhs);
-    return ret;
+  iVector<rtype,N> ret;
+  mult(&ret,&lhs,&rhs);
+  return ret;
 }

 //////////////////////////////////////////////////////////////////
@@ -135,119 +134,119 @@ iVector<rtype,N> operator * (const iVector<mtype,N>& lhs,const iScalar<vtype>& r
 template<class rtype,class vtype> strong_inline
 iScalar<rtype> operator / (const iScalar<rtype>& lhs,const iScalar<vtype>& rhs)
 {
-    iScalar<rtype> ret;
-    ret._internal = lhs._internal/rhs._internal;
-    return ret;
+  iScalar<rtype> ret;
+  ret._internal = lhs._internal/rhs._internal;
+  return ret;
 }
 template<class rtype,class vtype,int N> strong_inline
 iVector<rtype,N> operator / (const iVector<rtype,N>& lhs,const iScalar<vtype>& rhs)
 {
-    iVector<rtype,N> ret;
-    for(int i=0;i<N;i++){
-      ret._internal[i] = lhs._internal[i]/rhs._internal;
-    }
-    return ret;
+  iVector<rtype,N> ret;
+  for(int i=0;i<N;i++){
+    ret._internal[i] = lhs._internal[i]/rhs._internal;
+  }
+  return ret;
 }
 template<class rtype,class vtype,int N> strong_inline
 iMatrix<rtype,N> operator / (const iMatrix<rtype,N>& lhs,const iScalar<vtype>& rhs)
 {
-    iMatrix<rtype,N> ret;
-    for(int i=0;i<N;i++){
+  iMatrix<rtype,N> ret;
+  for(int i=0;i<N;i++){
    for(int j=0;j<N;j++){
      ret._internal[i][j] = lhs._internal[i][j]/rhs._internal;
    }}
-    return ret;
+  return ret;
 }
    
-    //////////////////////////////////////////////////////////////////
-    // Glue operators to mult routines. Must resolve return type cleverly from typeof(internal)
-    // since nesting matrix<scalar> x matrix<matrix>-> matrix<matrix>
-    // while         matrix<scalar> x matrix<scalar>-> matrix<scalar>
-    // so return type depends on argument types in nasty way.
-    //////////////////////////////////////////////////////////////////
-    // scal x scal = scal
-    // mat x  mat  = mat
-    // mat  x scal = mat
-    // scal x mat  = mat
-    // mat  x vec  = vec
-    // vec  x scal = vec
-    // scal x vec  = vec
-    //
-    // We can special case scalar_type ??
+//////////////////////////////////////////////////////////////////
+// Glue operators to mult routines. Must resolve return type cleverly from typeof(internal)
+// since nesting matrix<scalar> x matrix<matrix>-> matrix<matrix>
+// while         matrix<scalar> x matrix<scalar>-> matrix<scalar>
+// so return type depends on argument types in nasty way.
+//////////////////////////////////////////////////////////////////
+// scal x scal = scal
+// mat x  mat  = mat
+// mat  x scal = mat
+// scal x mat  = mat
+// mat  x vec  = vec
+// vec  x scal = vec
+// scal x vec  = vec
+//
+// We can special case scalar_type ??
 template<class l,class r>
 strong_inline auto operator * (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(lhs._internal * rhs._internal)>
 {
-    typedef iScalar<decltype(lhs._internal*rhs._internal)> ret_t;
-    ret_t ret;
-    mult(&ret,&lhs,&rhs);
-    return ret;
+  typedef iScalar<decltype(lhs._internal*rhs._internal)> ret_t;
+  ret_t ret;
+  mult(&ret,&lhs,&rhs);
+  return ret;
 }
 template<class l,class r,int N> strong_inline
 auto operator * (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal[0][0]),N>
 {
-    typedef decltype(lhs._internal[0][0]*rhs._internal[0][0]) ret_t;
-    iMatrix<ret_t,N> ret;
-    mult(&ret,&lhs,&rhs);
-    return ret;
+  typedef decltype(lhs._internal[0][0]*rhs._internal[0][0]) ret_t;
+  iMatrix<ret_t,N> ret;
+  mult(&ret,&lhs,&rhs);
+  return ret;
 }
 template<class l,class r, int N> strong_inline
 auto operator * (const iMatrix<r,N>& lhs,const iScalar<l>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal),N>
 {
-    typedef decltype(lhs._internal[0][0]*rhs._internal) ret_t;
+  typedef decltype(lhs._internal[0][0]*rhs._internal) ret_t;
        
-    iMatrix<ret_t,N> ret;
-    for(int c1=0;c1<N;c1++){
+  iMatrix<ret_t,N> ret;
+  for(int c1=0;c1<N;c1++){
    for(int c2=0;c2<N;c2++){
-        mult(&ret._internal[c1][c2],&lhs._internal[c1][c2],&rhs._internal);
+      mult(&ret._internal[c1][c2],&lhs._internal[c1][c2],&rhs._internal);
    }}
-    return ret;
+  return ret;
 }
 template<class l,class r,int N> strong_inline
 auto operator * (const iScalar<l>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal*rhs._internal[0][0]),N>
 {
-    typedef decltype(lhs._internal*rhs._internal[0][0]) ret_t;
-    iMatrix<ret_t,N> ret;
-    for(int c1=0;c1<N;c1++){
+  typedef decltype(lhs._internal*rhs._internal[0][0]) ret_t;
+  iMatrix<ret_t,N> ret;
+  for(int c1=0;c1<N;c1++){
    for(int c2=0;c2<N;c2++){
-        mult(&ret._internal[c1][c2],&lhs._internal,&rhs._internal[c1][c2]);
+      mult(&ret._internal[c1][c2],&lhs._internal,&rhs._internal[c1][c2]);
    }}
-    return ret;
+  return ret;
 }
 template<class l,class r,int N> strong_inline
 auto operator * (const iMatrix<l,N>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal[0][0]*rhs._internal[0]),N>
 {
-    typedef decltype(lhs._internal[0][0]*rhs._internal[0]) ret_t;
-    iVector<ret_t,N> ret;
-    for(int c1=0;c1<N;c1++){
-        mult(&ret._internal[c1],&lhs._internal[c1][0],&rhs._internal[0]);
-        for(int c2=1;c2<N;c2++){
-            mac(&ret._internal[c1],&lhs._internal[c1][c2],&rhs._internal[c2]);
-        }
+  typedef decltype(lhs._internal[0][0]*rhs._internal[0]) ret_t;
+  iVector<ret_t,N> ret;
+  for(int c1=0;c1<N;c1++){
+    mult(&ret._internal[c1],&lhs._internal[c1][0],&rhs._internal[0]);
+    for(int c2=1;c2<N;c2++){
+      mac(&ret._internal[c1],&lhs._internal[c1][c2],&rhs._internal[c2]);
    }
-    return ret;
+  }
+  return ret;
 }
 template<class l,class r,int N> strong_inline
 auto operator * (const iScalar<l>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal*rhs._internal[0]),N>
 {
-    typedef decltype(lhs._internal*rhs._internal[0]) ret_t;
-    iVector<ret_t,N> ret;
-    for(int c1=0;c1<N;c1++){
-        mult(&ret._internal[c1],&lhs._internal,&rhs._internal[c1]);
-    }
-    return ret;
+  typedef decltype(lhs._internal*rhs._internal[0]) ret_t;
+  iVector<ret_t,N> ret;
+  for(int c1=0;c1<N;c1++){
+    mult(&ret._internal[c1],&lhs._internal,&rhs._internal[c1]);
+  }
+  return ret;
 }
 template<class l,class r,int N> strong_inline
 auto operator * (const iVector<l,N>& lhs,const iScalar<r>& rhs) -> iVector<decltype(lhs._internal[0]*rhs._internal),N>
 {
-    typedef decltype(lhs._internal[0]*rhs._internal) ret_t;
-    iVector<ret_t,N> ret;
-    for(int c1=0;c1<N;c1++){
-        mult(&ret._internal[c1],&lhs._internal[c1],&rhs._internal);
-    }
-    return ret;
+  typedef decltype(lhs._internal[0]*rhs._internal) ret_t;
+  iVector<ret_t,N> ret;
+  for(int c1=0;c1<N;c1++){
+    mult(&ret._internal[c1],&lhs._internal[c1],&rhs._internal);
+  }
+  return ret;
 }

+NAMESPACE_END(Grid);

-}

 #endif
--- a/lib/tensors/Tensor_arith_scalar.h
+++ b/lib/tensors/Tensor_arith_scalar.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -24,13 +24,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_ARITH_SCALAR_H
 #define GRID_MATH_ARITH_SCALAR_H

-namespace Grid {
-
+NAMESPACE_BEGIN(Grid);

 //////////////////////////////////////////////////////////////////////////////////////////
 // Must support native C++ types Integer, Complex, Real
@@ -283,6 +282,6 @@ template<class l,int N> strong_inline iMatrix<l,N> operator - (Integer lhs,const
  return slhs-rhs;
 }

+NAMESPACE_END(Grid);

-}
 #endif
--- a/lib/tensors/Tensor_arith_sub.h
+++ b/lib/tensors/Tensor_arith_sub.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -24,17 +24,16 @@ Author: neo <cossu@post.kek.jp>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_ARITH_SUB_H
 #define GRID_MATH_ARITH_SUB_H

-namespace Grid {
+NAMESPACE_BEGIN(Grid);

-
-    ///////////////////////////////////////////////////////////////////////////////////////////////////
-    /////////////////////////////////////////// SUB         ///////////////////////////////////////////
-    ///////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////// SUB         ///////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////
    

 // SUB is simple for now; cannot mix types and straightforward template
@@ -43,102 +42,101 @@ namespace Grid {
 // Matrix +/- Matrix
 // Matrix /- scalar
 template<class vtype,class ltype,class rtype> strong_inline void sub(iScalar<vtype> * __restrict__ ret,
-                                                              const iScalar<ltype> * __restrict__ lhs,
-                                                              const iScalar<rtype> * __restrict__ rhs)
+								     const iScalar<ltype> * __restrict__ lhs,
+								     const iScalar<rtype> * __restrict__ rhs)
 {
-    sub(&ret->_internal,&lhs->_internal,&rhs->_internal);
+  sub(&ret->_internal,&lhs->_internal,&rhs->_internal);
 }

 template<class vtype,class ltype,class rtype,int N> strong_inline void sub(iVector<vtype,N> * __restrict__ ret,
-                                                                    const iVector<ltype,N> * __restrict__ lhs,
-                                                                    const iVector<rtype,N> * __restrict__ rhs)
+									   const iVector<ltype,N> * __restrict__ lhs,
+									   const iVector<rtype,N> * __restrict__ rhs)
 {
-    for(int c=0;c<N;c++){
-        ret->_internal[c]=lhs->_internal[c]-rhs->_internal[c];
-    }
-    return;
+  for(int c=0;c<N;c++){
+    ret->_internal[c]=lhs->_internal[c]-rhs->_internal[c];
+  }
+  return;
 }
 template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
-                                                                     const iMatrix<ltype,N> * __restrict__ lhs,
-                                                                     const iMatrix<rtype,N> * __restrict__ rhs){
-    for(int c2=0;c2<N;c2++){
+									    const iMatrix<ltype,N> * __restrict__ lhs,
+									    const iMatrix<rtype,N> * __restrict__ rhs){
+  for(int c2=0;c2<N;c2++){
    for(int c1=0;c1<N;c1++){
-        sub(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal[c1][c2]);
+      sub(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal[c1][c2]);
    }}
-    return;
+  return;
 }
 template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
-                                                                     const iScalar<ltype> * __restrict__ lhs,
-                                                                     const iMatrix<rtype,N> * __restrict__ rhs){
-    for(int c2=0;c2<N;c2++){
+									    const iScalar<ltype> * __restrict__ lhs,
+									    const iMatrix<rtype,N> * __restrict__ rhs){
+  for(int c2=0;c2<N;c2++){
    for(int c1=0;c1<N;c1++){
-        if ( c1==c2) {
-            sub(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
-        } else {
-            // Fails -- need unary minus. Catalogue other unops?
-            ret->_internal[c1][c2]=zero;
-            ret->_internal[c1][c2]=ret->_internal[c1][c2]-rhs->_internal[c1][c2];
+      if ( c1==c2) {
+	sub(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
+      } else {
+	// Fails -- need unary minus. Catalogue other unops?
+	ret->_internal[c1][c2]=zero;
+	ret->_internal[c1][c2]=ret->_internal[c1][c2]-rhs->_internal[c1][c2];

-        }
+      }
    }}
-    return;
+  return;
 }
 template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
-                                                                     const iMatrix<ltype,N> * __restrict__ lhs,
-                                                                     const iScalar<rtype> * __restrict__ rhs){
-    for(int c2=0;c2<N;c2++){
+									    const iMatrix<ltype,N> * __restrict__ lhs,
+									    const iScalar<rtype> * __restrict__ rhs){
+  for(int c2=0;c2<N;c2++){
    for(int c1=0;c1<N;c1++){
-        if ( c1==c2)
-            sub(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
-        else
-            ret->_internal[c1][c2]=lhs->_internal[c1][c2];
+      if ( c1==c2)
+	sub(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
+      else
+	ret->_internal[c1][c2]=lhs->_internal[c1][c2];
    }}
-    return;
+  return;
 }

-    // - operator for scalar, vector, matrix
+// - operator for scalar, vector, matrix
 template<class ltype,class rtype> strong_inline auto
 operator - (const iScalar<ltype>& lhs, const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal - rhs._internal)>
 {
-    typedef iScalar<decltype(lhs._internal-rhs._internal)> ret_t;
-    ret_t ret;
-    sub(&ret,&lhs,&rhs);
-    return ret;
+  typedef iScalar<decltype(lhs._internal-rhs._internal)> ret_t;
+  ret_t ret;
+  sub(&ret,&lhs,&rhs);
+  return ret;
 }
 template<class ltype,class rtype,int N>
 strong_inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]-rhs._internal[0]),N>
 {
-    typedef iVector<decltype(lhs._internal[0]-rhs._internal[0]),N> ret_t;
-    ret_t ret;
-    sub(&ret,&lhs,&rhs);
-    return ret;
+  typedef iVector<decltype(lhs._internal[0]-rhs._internal[0]),N> ret_t;
+  ret_t ret;
+  sub(&ret,&lhs,&rhs);
+  return ret;
 }
 template<class ltype,class rtype,int N>
 strong_inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N>
 {
-    typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N> ret_t;
-    ret_t ret;
-    sub(&ret,&lhs,&rhs);
-    return ret;
+  typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N> ret_t;
+  ret_t ret;
+  sub(&ret,&lhs,&rhs);
+  return ret;
 }
 template<class ltype,class rtype,int N>
 strong_inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N>
 {
-    typedef iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N> ret_t;
-    ret_t ret;
-    sub(&ret,&lhs,&rhs);
-    return ret;
+  typedef iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N> ret_t;
+  ret_t ret;
+  sub(&ret,&lhs,&rhs);
+  return ret;
 }
 template<class ltype,class rtype,int N>
 strong_inline auto operator - (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N>
 {
-    typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N> ret_t;
-    ret_t ret;
-    sub(&ret,&lhs,&rhs);
-    return ret;
+  typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N> ret_t;
+  ret_t ret;
+  sub(&ret,&lhs,&rhs);
+  return ret;
 }

-
-}
+NAMESPACE_END(Grid);

 #endif
--- a/lib/tensors/Tensor_class.h
+++ b/lib/tensors/Tensor_class.h
@@ -20,11 +20,11 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
-/*  END LEGAL */
+			   /*  END LEGAL */
 #ifndef GRID_MATH_TENSORS_H
 #define GRID_MATH_TENSORS_H

-namespace Grid {
+NAMESPACE_BEGIN(Grid);

 ///////////////////////////////////////////////////
 // Scalar, Vector, Matrix objects.
@@ -44,7 +44,7 @@ class GridTensorBase {};

 template <class vtype>
 class iScalar {
- public:
+public:
  vtype _internal;

  typedef vtype element;
@@ -69,10 +69,10 @@ class iScalar {
  //  iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
  iScalar() = default;
  /*
-  iScalar(const iScalar<vtype> &copyme)=default;
-  iScalar(iScalar<vtype> &&copyme)=default;
-  iScalar<vtype> & operator= (const iScalar<vtype> &copyme) = default;
-  iScalar<vtype> & operator= (iScalar<vtype> &&copyme) = default;
+    iScalar(const iScalar<vtype> &copyme)=default;
+    iScalar(iScalar<vtype> &&copyme)=default;
+    iScalar<vtype> & operator= (const iScalar<vtype> &copyme) = default;
+    iScalar<vtype> & operator= (iScalar<vtype> &&copyme) = default;
  */

  //  template<int N=0>
@@ -109,7 +109,7 @@ class iScalar {
  friend strong_inline void exchange(iScalar<vtype> &out1,iScalar<vtype> &out2,
 				     const iScalar<vtype> &in1,const iScalar<vtype> &in2,int type){
    exchange(out1._internal,out2._internal,
-	      in1._internal, in2._internal,type);
+	     in1._internal, in2._internal,type);
  }

  // Unary negation
@@ -185,13 +185,13 @@ TensorRemove(T arg) {
 }
 template <class vtype>
 strong_inline auto TensorRemove(iScalar<vtype> arg)
-    -> decltype(TensorRemove(arg._internal)) {
+  -> decltype(TensorRemove(arg._internal)) {
  return TensorRemove(arg._internal);
 }

 template <class vtype, int N>
 class iVector {
- public:
+public:
  vtype _internal[N];

  typedef vtype element;
@@ -211,7 +211,7 @@ class iVector {
  typedef iVector<typename GridTypeMapper<vtype>::DoublePrecision, N> DoublePrecision;
  
  template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type
-                         * = nullptr>
+	    * = nullptr>
  strong_inline auto operator=(T arg) -> iVector<vtype, N> {
    zeroit(*this);
    for (int i = 0; i < N; i++) _internal[i] = arg;
@@ -222,10 +222,10 @@ class iVector {
  iVector(const Zero &z) { *this = zero; };
  iVector() = default;
  /*
-  iVector(const iVector<vtype,N> &copyme)=default;
-  iVector(iVector<vtype,N> &&copyme)=default;
-  iVector<vtype,N> & operator= (const iVector<vtype,N> &copyme) = default;
-  iVector<vtype,N> & operator= (iVector<vtype,N> &&copyme) = default;
+    iVector(const iVector<vtype,N> &copyme)=default;
+    iVector(iVector<vtype,N> &&copyme)=default;
+    iVector<vtype,N> & operator= (const iVector<vtype,N> &copyme) = default;
+    iVector<vtype,N> & operator= (iVector<vtype,N> &&copyme) = default;
  */

  iVector<vtype, N> &operator=(const Zero &hero) {
@@ -265,7 +265,7 @@ class iVector {
 				     const iVector<vtype,N> &in1,const iVector<vtype,N> &in2,int type){
    for(int i=0;i<N;i++){
      exchange(out1._internal[i],out2._internal[i],
-	        in1._internal[i], in2._internal[i],type);
+	       in1._internal[i], in2._internal[i],type);
    }
  }

@@ -307,7 +307,7 @@ class iVector {

 template <class vtype, int N>
 class iMatrix {
- public:
+public:
  vtype _internal[N][N];

  typedef vtype element;
@@ -344,10 +344,10 @@ class iMatrix {
  };  // recurse down and hit the constructor for vector_type

  /*
-  iMatrix(const iMatrix<vtype,N> &copyme)=default;
-  iMatrix(iMatrix<vtype,N> &&copyme)=default;
-  iMatrix<vtype,N> & operator= (const iMatrix<vtype,N> &copyme) = default;
-  iMatrix<vtype,N> & operator= (iMatrix<vtype,N> &&copyme) = default;
+    iMatrix(const iMatrix<vtype,N> &copyme)=default;
+    iMatrix(iMatrix<vtype,N> &&copyme)=default;
+    iMatrix<vtype,N> & operator= (const iMatrix<vtype,N> &copyme) = default;
+    iMatrix<vtype,N> & operator= (iMatrix<vtype,N> &&copyme) = default;
  */

  iMatrix<vtype, N> &operator=(const Zero &hero) {
@@ -355,109 +355,109 @@ class iMatrix {
    return *this;
  }
  template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type
-                         * = nullptr>
+	    * = nullptr>
  strong_inline auto operator=(T arg) -> iMatrix<vtype, N> {
    zeroit(*this);
    for (int i = 0; i < N; i++) _internal[i][i] = arg;
    return *this;
  }

-  friend strong_inline void zeroit(iMatrix<vtype,N> &that){
-    for(int i=0;i<N;i++){
-      for(int j=0;j<N;j++){
-	zeroit(that._internal[i][j]);
+friend strong_inline void zeroit(iMatrix<vtype,N> &that){
+  for(int i=0;i<N;i++){
+    for(int j=0;j<N;j++){
+      zeroit(that._internal[i][j]);
    }}
-  }
-  friend strong_inline void prefetch(iMatrix<vtype,N> &that){
-    for(int i=0;i<N;i++) 
+}
+friend strong_inline void prefetch(iMatrix<vtype,N> &that){
+  for(int i=0;i<N;i++) 
    for(int j=0;j<N;j++) 
      prefetch(that._internal[i][j]);
-  }
-  friend strong_inline void vstream(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in){
-      for(int i=0;i<N;i++){
-      for(int j=0;j<N;j++){
-	vstream(out._internal[i][j],in._internal[i][j]);
-      }}
-  }
-  friend strong_inline void vbroadcast(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int lane){
-      for(int i=0;i<N;i++){
-      for(int j=0;j<N;j++){
-	vbroadcast(out._internal[i][j],in._internal[i][j],lane);
-      }}
-  }
+}
+friend strong_inline void vstream(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in){
+  for(int i=0;i<N;i++){
+    for(int j=0;j<N;j++){
+      vstream(out._internal[i][j],in._internal[i][j]);
+    }}
+}
+friend strong_inline void vbroadcast(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int lane){
+  for(int i=0;i<N;i++){
+    for(int j=0;j<N;j++){
+      vbroadcast(out._internal[i][j],in._internal[i][j],lane);
+    }}
+}

-  friend strong_inline void permute(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int permutetype){
-    for(int i=0;i<N;i++){
-      for(int j=0;j<N;j++){
-	permute(out._internal[i][j],in._internal[i][j],permutetype);
+friend strong_inline void permute(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int permutetype){
+  for(int i=0;i<N;i++){
+    for(int j=0;j<N;j++){
+      permute(out._internal[i][j],in._internal[i][j],permutetype);
    }}
-  }
-  friend strong_inline void rotate(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int rot){
-    for(int i=0;i<N;i++){
-      for(int j=0;j<N;j++){
-	rotate(out._internal[i][j],in._internal[i][j],rot);
+}
+friend strong_inline void rotate(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int rot){
+  for(int i=0;i<N;i++){
+    for(int j=0;j<N;j++){
+      rotate(out._internal[i][j],in._internal[i][j],rot);
    }}
-  }
-  friend strong_inline void exchange(iMatrix<vtype,N> &out1,iMatrix<vtype,N> &out2,
-				     const iMatrix<vtype,N> &in1,const iMatrix<vtype,N> &in2,int type){
-    for(int i=0;i<N;i++){
-      for(int j=0;j<N;j++){
-	exchange(out1._internal[i][j],out2._internal[i][j],
-		  in1._internal[i][j], in2._internal[i][j],type);
+}
+friend strong_inline void exchange(iMatrix<vtype,N> &out1,iMatrix<vtype,N> &out2,
+				   const iMatrix<vtype,N> &in1,const iMatrix<vtype,N> &in2,int type){
+  for(int i=0;i<N;i++){
+    for(int j=0;j<N;j++){
+      exchange(out1._internal[i][j],out2._internal[i][j],
+	       in1._internal[i][j], in2._internal[i][j],type);
    }}
-  }
+}

-  // Unary negation
-  friend strong_inline iMatrix<vtype, N> operator-(const iMatrix<vtype, N> &r) {
-    iMatrix<vtype, N> ret;
-    for (int i = 0; i < N; i++) {
-      for (int j = 0; j < N; j++) {
-        ret._internal[i][j] = -r._internal[i][j];
-      }
+// Unary negation
+friend strong_inline iMatrix<vtype, N> operator-(const iMatrix<vtype, N> &r) {
+  iMatrix<vtype, N> ret;
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < N; j++) {
+      ret._internal[i][j] = -r._internal[i][j];
    }
-    return ret;
-  }
-  // *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
-  template <class T>
-  strong_inline iMatrix<vtype, N> &operator*=(const T &r) {
-    *this = (*this) * r;
-    return *this;
-  }
-  template <class T>
-  strong_inline iMatrix<vtype, N> &operator-=(const T &r) {
-    *this = (*this) - r;
-    return *this;
-  }
-  template <class T>
-  strong_inline iMatrix<vtype, N> &operator+=(const T &r) {
-    *this = (*this) + r;
-    return *this;
  }
+  return ret;
+}
+// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
+template <class T>
+strong_inline iMatrix<vtype, N> &operator*=(const T &r) {
+  *this = (*this) * r;
+  return *this;
+}
+template <class T>
+strong_inline iMatrix<vtype, N> &operator-=(const T &r) {
+  *this = (*this) - r;
+  return *this;
+}
+template <class T>
+strong_inline iMatrix<vtype, N> &operator+=(const T &r) {
+  *this = (*this) + r;
+  return *this;
+}

-  // returns an lvalue reference
-  strong_inline vtype &operator()(int i, int j) { return _internal[i][j]; }
-  strong_inline const vtype &operator()(int i, int j) const {
-    return _internal[i][j];
-  }
-  friend std::ostream &operator<<(std::ostream &stream,
-                                  const iMatrix<vtype, N> &o) {
-    stream << "M<" << N << ">{";
-    for (int i = 0; i < N; i++) {
-      stream << "{";
-      for (int j = 0; j < N; j++) {
-        stream << o._internal[i][j];
-        if (i < N - 1) stream << ",";
-      }
-      stream << "}";
-      if (i != N - 1) stream << "\n\t\t";
+// returns an lvalue reference
+strong_inline vtype &operator()(int i, int j) { return _internal[i][j]; }
+strong_inline const vtype &operator()(int i, int j) const {
+  return _internal[i][j];
+}
+friend std::ostream &operator<<(std::ostream &stream,
+				const iMatrix<vtype, N> &o) {
+  stream << "M<" << N << ">{";
+  for (int i = 0; i < N; i++) {
+    stream << "{";
+    for (int j = 0; j < N; j++) {
+      stream << o._internal[i][j];
+      if (i < N - 1) stream << ",";
    }
    stream << "}";
-    return stream;
-  };
+    if (i != N - 1) stream << "\n\t\t";
+  }
+  stream << "}";
+  return stream;
+};

-  //  strong_inline vtype && operator ()(int i,int j) {
-  //    return _internal[i][j];
-  //  }
+//  strong_inline vtype && operator ()(int i,int j) {
+//    return _internal[i][j];
+//  }
 };

 template <class v>
@@ -478,7 +478,9 @@ void vprefetch(const iMatrix<v, N> &vv) {
    }
  }
 }
-}
+
+NAMESPACE_END(Grid);
+
 #endif


--- a/lib/tensors/Tensor_determinant.h
+++ b/lib/tensors/Tensor_determinant.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -23,50 +23,51 @@ Author: neo <cossu@post.kek.jp>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_DET_H
 #define GRID_MATH_DET_H
-namespace Grid {
-  /////////////////////////////////////////////// 
-  // Determinant function for scalar, vector, matrix
-  /////////////////////////////////////////////// 
-  inline ComplexF Determinant( const ComplexF &arg){    return arg;}
-  inline ComplexD Determinant( const ComplexD &arg){    return arg;}
-  inline RealF Determinant( const RealF &arg){    return arg;}
-  inline RealD Determinant( const RealD &arg){    return arg;}
-
-  template<class vtype> inline auto Determinant(const iScalar<vtype>&r) -> iScalar<decltype(Determinant(r._internal))>
-    {
-      iScalar<decltype(Determinant(r._internal))> ret;
-      ret._internal = Determinant(r._internal);
-      return ret;
-    }
-
-  template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr> 
-    inline iScalar<vtype> Determinant(const iMatrix<vtype,N> &arg)
-    {
-      iMatrix<vtype,N> ret(arg);
-      iScalar<vtype> det = vtype(1.0);
-      /* Conversion of matrix to upper triangular */
-      for(int i = 0; i < N; i++){
-        for(int j = 0; j < N; j++){
-	  if(j>i){
-	    vtype ratio = ret._internal[j][i]/ret._internal[i][i];
-	    for(int k = 0; k < N; k++){
-	      ret._internal[j][k] -= ratio * ret._internal[i][k];
-	    }
-	  }
-        }
-      }      
-
-      for(int i = 0; i < N; i++)
-	det *= ret._internal[i][i];   
-
-      return det;
-    }

+NAMESPACE_BEGIN(Grid);

+/////////////////////////////////////////////// 
+// Determinant function for scalar, vector, matrix
+/////////////////////////////////////////////// 
+inline ComplexF Determinant( const ComplexF &arg){    return arg;}
+inline ComplexD Determinant( const ComplexD &arg){    return arg;}
+inline RealF Determinant( const RealF &arg){    return arg;}
+inline RealD Determinant( const RealD &arg){    return arg;}

+template<class vtype> inline auto Determinant(const iScalar<vtype>&r) -> iScalar<decltype(Determinant(r._internal))>
+{
+  iScalar<decltype(Determinant(r._internal))> ret;
+  ret._internal = Determinant(r._internal);
+  return ret;
 }
+
+template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr> 
+inline iScalar<vtype> Determinant(const iMatrix<vtype,N> &arg)
+{
+  iMatrix<vtype,N> ret(arg);
+  iScalar<vtype> det = vtype(1.0);
+  /* Conversion of matrix to upper triangular */
+  for(int i = 0; i < N; i++){
+    for(int j = 0; j < N; j++){
+      if(j>i){
+	vtype ratio = ret._internal[j][i]/ret._internal[i][i];
+	for(int k = 0; k < N; k++){
+	  ret._internal[j][k] -= ratio * ret._internal[i][k];
+	}
+      }
+    }
+  }      
+
+  for(int i = 0; i < N; i++)
+    det *= ret._internal[i][i];   
+
+  return det;
+}
+
+NAMESPACE_END(Grid);
+
 #endif
--- a/lib/tensors/Tensor_exp.h
+++ b/lib/tensors/Tensor_exp.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -23,122 +23,120 @@ Author: neo <cossu@post.kek.jp>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_EXP_H
 #define GRID_MATH_EXP_H

 #define DEFAULT_MAT_EXP 12

-namespace Grid {
+NAMESPACE_BEGIN(Grid);

-  /////////////////////////////////////////////// 
-  // Exponentiate function for scalar, vector, matrix
-  /////////////////////////////////////////////// 
+/////////////////////////////////////////////// 
+// Exponentiate function for scalar, vector, matrix
+/////////////////////////////////////////////// 


-  template<class vtype> inline iScalar<vtype> Exponentiate(const iScalar<vtype>&r, RealD alpha ,  Integer Nexp = DEFAULT_MAT_EXP)
-    {
-      iScalar<vtype> ret;
-      ret._internal = Exponentiate(r._internal, alpha, Nexp);
-      return ret;
-    }
+template<class vtype> inline iScalar<vtype> Exponentiate(const iScalar<vtype>&r, RealD alpha ,  Integer Nexp = DEFAULT_MAT_EXP)
+{
+  iScalar<vtype> ret;
+  ret._internal = Exponentiate(r._internal, alpha, Nexp);
+  return ret;
+}

 template<class vtype, int N> inline iVector<vtype, N> Exponentiate(const iVector<vtype,N>&r, RealD alpha ,  Integer Nexp = DEFAULT_MAT_EXP)
-    {
-      iVector<vtype, N> ret;
-      for (int i = 0; i < N; i++)
-        ret._internal[i] = Exponentiate(r._internal[i], alpha, Nexp);
-      return ret;
-    }
+{
+  iVector<vtype, N> ret;
+  for (int i = 0; i < N; i++)
+    ret._internal[i] = Exponentiate(r._internal[i], alpha, Nexp);
+  return ret;
+}



-    // Specialisation: Cayley-Hamilton exponential for SU(3)
-    template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr> 
-    inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha  , Integer Nexp = DEFAULT_MAT_EXP )
-    {
-    // for SU(3) 2x faster than the std implementation using Nexp=12
-    // notice that it actually computes
-    // exp ( input matrix )
-    // the i sign is coming from outside
-    // input matrix is anti-hermitian NOT hermitian
-      typedef iMatrix<vtype,3> mat;
-      typedef iScalar<vtype> scalar;
-      mat unit(1.0);
-      mat temp(unit);
-      const Complex one_over_three = 1.0 / 3.0;
-      const Complex one_over_two = 1.0 / 2.0;
+// Specialisation: Cayley-Hamilton exponential for SU(3)
+template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr> 
+inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha  , Integer Nexp = DEFAULT_MAT_EXP )
+{
+  // for SU(3) 2x faster than the std implementation using Nexp=12
+  // notice that it actually computes
+  // exp ( input matrix )
+  // the i sign is coming from outside
+  // input matrix is anti-hermitian NOT hermitian
+  typedef iMatrix<vtype,3> mat;
+  typedef iScalar<vtype> scalar;
+  mat unit(1.0);
+  mat temp(unit);
+  const Complex one_over_three = 1.0 / 3.0;
+  const Complex one_over_two = 1.0 / 2.0;

-      scalar c0, c1, tmp, c0max, theta, u, w;
-      scalar xi0, u2, w2, cosw;
-      scalar fden, h0, h1, h2;
-      scalar e2iu, emiu, ixi0, qt;
-      scalar f0, f1, f2;
-      scalar unity(1.0);
+  scalar c0, c1, tmp, c0max, theta, u, w;
+  scalar xi0, u2, w2, cosw;
+  scalar fden, h0, h1, h2;
+  scalar e2iu, emiu, ixi0, qt;
+  scalar f0, f1, f2;
+  scalar unity(1.0);
      
-      mat iQ2 = arg*arg*alpha*alpha;
-      mat iQ3 = arg*iQ2*alpha;   
-      // sign in c0 from the conventions on the Ta
-      scalar imQ3, reQ2;
-      imQ3 = imag( trace(iQ3) );
-      reQ2 = real( trace(iQ2) );
-      c0 = -imQ3 * one_over_three;  
-      c1 = -reQ2 * one_over_two;
+  mat iQ2 = arg*arg*alpha*alpha;
+  mat iQ3 = arg*iQ2*alpha;   
+  // sign in c0 from the conventions on the Ta
+  scalar imQ3, reQ2;
+  imQ3 = imag( trace(iQ3) );
+  reQ2 = real( trace(iQ2) );
+  c0 = -imQ3 * one_over_three;  
+  c1 = -reQ2 * one_over_two;

-      // Cayley Hamilton checks to machine precision, tested
-      tmp = c1 * one_over_three;
-      c0max = 2.0 * pow(tmp, 1.5);
+  // Cayley Hamilton checks to machine precision, tested
+  tmp = c1 * one_over_three;
+  c0max = 2.0 * pow(tmp, 1.5);

-      theta = acos(c0 / c0max) * one_over_three;
-      u = sqrt(tmp) * cos(theta);
-      w = sqrt(c1) * sin(theta);
+  theta = acos(c0 / c0max) * one_over_three;
+  u = sqrt(tmp) * cos(theta);
+  w = sqrt(c1) * sin(theta);

-      xi0 = sin(w) / w;
-      u2 = u * u;
-      w2 = w * w;
-      cosw = cos(w);
+  xi0 = sin(w) / w;
+  u2 = u * u;
+  w2 = w * w;
+  cosw = cos(w);

-      ixi0 = timesI(xi0);
-      emiu = cos(u) - timesI(sin(u));
-      e2iu = cos(2.0 * u) + timesI(sin(2.0 * u));
+  ixi0 = timesI(xi0);
+  emiu = cos(u) - timesI(sin(u));
+  e2iu = cos(2.0 * u) + timesI(sin(2.0 * u));

-      h0 = e2iu * (u2 - w2) +
-           emiu * ((8.0 * u2 * cosw) + (2.0 * u * (3.0 * u2 + w2) * ixi0));
-      h1 = e2iu * (2.0 * u) - emiu * ((2.0 * u * cosw) - (3.0 * u2 - w2) * ixi0);
-      h2 = e2iu - emiu * (cosw + (3.0 * u) * ixi0);
+  h0 = e2iu * (u2 - w2) +
+    emiu * ((8.0 * u2 * cosw) + (2.0 * u * (3.0 * u2 + w2) * ixi0));
+  h1 = e2iu * (2.0 * u) - emiu * ((2.0 * u * cosw) - (3.0 * u2 - w2) * ixi0);
+  h2 = e2iu - emiu * (cosw + (3.0 * u) * ixi0);

-      fden = unity / (9.0 * u2 - w2);  // reals
-      f0 = h0 * fden;
-      f1 = h1 * fden;
-      f2 = h2 * fden;
+  fden = unity / (9.0 * u2 - w2);  // reals
+  f0 = h0 * fden;
+  f1 = h1 * fden;
+  f2 = h2 * fden;

-      return (f0 * unit + timesMinusI(f1) * arg*alpha - f2 * iQ2);
-    }
+  return (f0 * unit + timesMinusI(f1) * arg*alpha - f2 * iQ2);
+}



 // General exponential
 template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr> 
-    inline iMatrix<vtype,N> Exponentiate(const iMatrix<vtype,N> &arg, RealD alpha  , Integer Nexp = DEFAULT_MAT_EXP )
-    {
-    // notice that it actually computes
-    // exp ( input matrix )
-    // the i sign is coming from outside
-    // input matrix is anti-hermitian NOT hermitian
-      typedef iMatrix<vtype,N> mat;
-      mat unit(1.0);
-      mat temp(unit);
-      for(int i=Nexp; i>=1;--i){
-	      temp *= alpha/RealD(i);
-	      temp = unit + temp*arg;
-      }
-      return temp;
-
-    }
-
-
-
+inline iMatrix<vtype,N> Exponentiate(const iMatrix<vtype,N> &arg, RealD alpha  , Integer Nexp = DEFAULT_MAT_EXP )
+{
+  // notice that it actually computes
+  // exp ( input matrix )
+  // the i sign is coming from outside
+  // input matrix is anti-hermitian NOT hermitian
+  typedef iMatrix<vtype,N> mat;
+  mat unit(1.0);
+  mat temp(unit);
+  for(int i=Nexp; i>=1;--i){
+    temp *= alpha/RealD(i);
+    temp = unit + temp*arg;
+  }
+  return temp;

 }
+
+NAMESPACE_END(Grid);
+
 #endif
--- a/lib/tensors/Tensor_extract_merge.h
+++ b/lib/tensors/Tensor_extract_merge.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -27,8 +27,8 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_EXTRACT_H
 #define GRID_EXTRACT_H
 /////////////////////////////////////////////////////////////////
@@ -37,256 +37,256 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu>

 namespace Grid{

-////////////////////////////////////////////////////////////////////////////////////////////////
-// Extract/merge a fundamental vector type, to pointer array with offset
-////////////////////////////////////////////////////////////////////////////////////////////////
+  ////////////////////////////////////////////////////////////////////////////////////////////////
+  // Extract/merge a fundamental vector type, to pointer array with offset
+  ////////////////////////////////////////////////////////////////////////////////////////////////

-template<class vsimd,class scalar>
-inline void extract(typename std::enable_if<!isGridTensor<vsimd>::value, const vsimd >::type * y, 
+  template<class vsimd,class scalar>
+  inline void extract(typename std::enable_if<!isGridTensor<vsimd>::value, const vsimd >::type * y, 
+		      std::vector<scalar *> &extracted,int offset){
+    // FIXME: bounce off memory is painful
+    static const int Nsimd=sizeof(vsimd)/sizeof(scalar);
+    int Nextr=extracted.size();
+    int s=Nsimd/Nextr;
+
+    scalar*buf = (scalar *)y;
+    for(int i=0;i<Nextr;i++){
+      extracted[i][offset] = buf[i*s];
+    }
+  };
+  ////////////////////////////////////////////////////////////////////////
+  // Merge simd vector from array of scalars to pointer array with offset
+  ////////////////////////////////////////////////////////////////////////
+  template<class vsimd,class scalar>
+  inline void merge(typename std::enable_if<!isGridTensor<vsimd>::value, vsimd >::type * y, 
 		    std::vector<scalar *> &extracted,int offset){
-  // FIXME: bounce off memory is painful
-  static const int Nsimd=sizeof(vsimd)/sizeof(scalar);
-  int Nextr=extracted.size();
-  int s=Nsimd/Nextr;

-  scalar*buf = (scalar *)y;
-  for(int i=0;i<Nextr;i++){
-    extracted[i][offset] = buf[i*s];
-  }
-};
-////////////////////////////////////////////////////////////////////////
-// Merge simd vector from array of scalars to pointer array with offset
-////////////////////////////////////////////////////////////////////////
-template<class vsimd,class scalar>
-inline void merge(typename std::enable_if<!isGridTensor<vsimd>::value, vsimd >::type * y, 
-		  std::vector<scalar *> &extracted,int offset){
+    static const int Nsimd=sizeof(vsimd)/sizeof(scalar);

-  static const int Nsimd=sizeof(vsimd)/sizeof(scalar);
-
-  int Nextr=extracted.size();
-  int s=Nsimd/Nextr; // can have sparse occupation of simd vector if simd_layout does not fill it
-                     // replicate n-fold. Use to allow Integer masks to 
-                     // predicate floating point of various width assignments and maintain conformable.
-  scalar *buf =(scalar *) y;
-  for(int i=0;i<Nextr;i++){
-    for(int ii=0;ii<s;ii++){
-      buf[i*s+ii]=extracted[i][offset];
-    }
-  }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////
-// Extract a fundamental vector type to scalar array 
-////////////////////////////////////////////////////////////////////////////////////////////////
-template<class vsimd,class scalar>
-inline void extract(typename std::enable_if<!isGridTensor<vsimd>::value, const vsimd >::type  &y,std::vector<scalar> &extracted){
-
-  int Nextr=extracted.size();
-  int Nsimd=vsimd::Nsimd();
-  int s=Nsimd/Nextr;
-
-  scalar *buf = (scalar *)&y;
-  for(int i=0;i<Nextr;i++){
-    extracted[i]=buf[i*s];
-#ifdef PARANOID
-    for(int ii=1;ii<s;ii++){
-      if ( buf[i*s]!=buf[i*s+ii] ){
-	std::cout<<GridLogMessage << " SIMD extract failure splat = "<<s<<" ii "<<ii<<" " <<Nextr<<" "<< Nsimd<<" "<<std::endl;
-	for(int vv=0;vv<Nsimd;vv++) {
-	  std::cout<<GridLogMessage<< buf[vv]<<" ";
-	}
-	std::cout<<GridLogMessage<<std::endl;
-	assert(0);
-      }
-      assert(buf[i*s]==buf[i*s+ii]);
-    }
-#endif
-  }
-
-};
-
-////////////////////////////////////////////////////////////////////////
-// Merge simd vector from array of scalars
-////////////////////////////////////////////////////////////////////////
-template<class vsimd,class scalar>
-inline void merge(typename std::enable_if<!isGridTensor<vsimd>::value, vsimd >::type  &y,std::vector<scalar> &extracted){
-  int Nextr=extracted.size();
-  static const int Nsimd=vsimd::Nsimd();
-  int s=Nsimd/Nextr;
-  scalar *buf = (scalar *)&y;
-
-  for(int i=0;i<Nextr;i++){
-    for(int ii=0;ii<s;ii++){
-      buf[i*s+ii]=extracted[i]; // replicates value
-    }
-  }
-};
-
-////////////////////////////////////////////////////////////////////////
-// Extract to contiguous array scalar object
-////////////////////////////////////////////////////////////////////////
-template<class vobj> inline void extract(const vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
-{
-  typedef typename vobj::scalar_type scalar_type ;
-  typedef typename vobj::vector_type vector_type ;
-
-  static const int Nsimd=sizeof(vector_type)/sizeof(scalar_type);
-  static const int words=sizeof(vobj)/sizeof(vector_type);
-  int Nextr=extracted.size();
-  int s=Nsimd/Nextr;
-
-  std::vector<scalar_type *> pointers(Nextr);
-  for(int i=0;i<Nextr;i++) 
-    pointers[i] =(scalar_type *)& extracted[i];
-
-  vector_type *vp = (vector_type *)&vec;
-  for(int w=0;w<words;w++){
-    extract<vector_type,scalar_type>(&vp[w],pointers,w);
-  }
-}
-////////////////////////////////////////////////////////////////////////
-// Extract to a bunch of scalar object pointers, with offset
-////////////////////////////////////////////////////////////////////////
-template<class vobj> inline 
-void extract(const vobj &vec,std::vector<typename vobj::scalar_object *> &extracted, int offset)
-{
-  typedef typename vobj::scalar_type scalar_type ;
-  typedef typename vobj::vector_type vector_type ;
-
-  static const int words=sizeof(vobj)/sizeof(vector_type);
-  static const int Nsimd=vobj::vector_type::Nsimd();
-
-  int Nextr=extracted.size();
-  int s = Nsimd/Nextr;
-  scalar_type * vp = (scalar_type *)&vec;
-
-  for(int w=0;w<words;w++){
-    for(int i=0;i<Nextr;i++){
-      scalar_type * pointer = (scalar_type *)& extracted[i][offset];
-      pointer[w] = vp[i*s+w*Nsimd];
-    }
-  }
-}
-
-////////////////////////////////////////////////////////////////////////
-// Extract to a bunch of scalar object pointers of different scalar type, with offset. Useful for precision change
-////////////////////////////////////////////////////////////////////////
-template<class vobj, class sobj> inline 
-void extract1(const vobj &vec,std::vector<sobj*> &extracted, int offset)
-{
-  typedef typename vobj::scalar_type vobj_scalar_type ;
-  typedef typename vobj::vector_type vobj_vector_type ;
-
-  typedef typename sobj::scalar_type sobj_scalar_type ;
-  
-  static const int words=sizeof(vobj)/sizeof(vobj_vector_type);
-  static const int Nsimd=vobj_vector_type::Nsimd();
-
-  int Nextr=extracted.size();
-  int s = Nsimd/Nextr;
-  vobj_scalar_type * vp = (vobj_scalar_type *)&vec;
-
-  for(int w=0;w<words;w++){
-    for(int i=0;i<Nextr;i++){
-      sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
-      pointer[w] = vp[i*s+w*Nsimd];
-    }
-  }
-}
-
-  
-////////////////////////////////////////////////////////////////////////
-// Merge a contiguous array of scalar objects
-////////////////////////////////////////////////////////////////////////
-template<class vobj> inline 
-void merge(vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
-{
-  typedef typename vobj::scalar_type scalar_type ;
-  typedef typename vobj::vector_type vector_type ;
-  
-  static const int Nsimd=sizeof(vector_type)/sizeof(scalar_type);
-  static const int words=sizeof(vobj)/sizeof(vector_type);
-
-  int Nextr = extracted.size();
-  int splat=Nsimd/Nextr;
-
-  std::vector<scalar_type *> pointers(Nextr);
-  for(int i=0;i<Nextr;i++) 
-    pointers[i] =(scalar_type *)& extracted[i];
-  
-  vector_type *vp = (vector_type *)&vec;
-  for(int w=0;w<words;w++){
-    merge<vector_type,scalar_type>(&vp[w],pointers,w);
-  }
-}
-
-////////////////////////////////////////////////////////////////////////
-// Merge a bunch of different scalar object pointers, with offset
-////////////////////////////////////////////////////////////////////////
-template<class vobj> inline 
-void merge(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
-{
-  typedef typename vobj::scalar_type scalar_type ;
-  typedef typename vobj::vector_type vector_type ;
-  
-  const int Nsimd=sizeof(vector_type)/sizeof(scalar_type);
-  const int words=sizeof(vobj)/sizeof(vector_type);
-
-  int Nextr=extracted.size();
-  int s=Nsimd/Nextr;
-
-  scalar_type *pointer;
-  scalar_type *vp = (scalar_type *)&vec;
-
-  //  assert( (((uint64_t)vp)&(sizeof(scalar_type)-1)) == 0);
-
-  for(int w=0;w<words;w++){
+    int Nextr=extracted.size();
+    int s=Nsimd/Nextr; // can have sparse occupation of simd vector if simd_layout does not fill it
+    // replicate n-fold. Use to allow Integer masks to 
+    // predicate floating point of various width assignments and maintain conformable.
+    scalar *buf =(scalar *) y;
    for(int i=0;i<Nextr;i++){
      for(int ii=0;ii<s;ii++){
-	pointer=(scalar_type *)&extracted[i][offset];
-	vp[w*Nsimd+i*s+ii] = pointer[w];
+	buf[i*s+ii]=extracted[i][offset];
+      }
+    }
+  };
+
+  ////////////////////////////////////////////////////////////////////////////////////////////////
+  // Extract a fundamental vector type to scalar array 
+  ////////////////////////////////////////////////////////////////////////////////////////////////
+  template<class vsimd,class scalar>
+  inline void extract(typename std::enable_if<!isGridTensor<vsimd>::value, const vsimd >::type  &y,std::vector<scalar> &extracted){
+
+    int Nextr=extracted.size();
+    int Nsimd=vsimd::Nsimd();
+    int s=Nsimd/Nextr;
+
+    scalar *buf = (scalar *)&y;
+    for(int i=0;i<Nextr;i++){
+      extracted[i]=buf[i*s];
+#ifdef PARANOID
+      for(int ii=1;ii<s;ii++){
+	if ( buf[i*s]!=buf[i*s+ii] ){
+	  std::cout<<GridLogMessage << " SIMD extract failure splat = "<<s<<" ii "<<ii<<" " <<Nextr<<" "<< Nsimd<<" "<<std::endl;
+	  for(int vv=0;vv<Nsimd;vv++) {
+	    std::cout<<GridLogMessage<< buf[vv]<<" ";
+	  }
+	  std::cout<<GridLogMessage<<std::endl;
+	  assert(0);
+	}
+	assert(buf[i*s]==buf[i*s+ii]);
+      }
+#endif
+    }
+
+  };
+
+  ////////////////////////////////////////////////////////////////////////
+  // Merge simd vector from array of scalars
+  ////////////////////////////////////////////////////////////////////////
+  template<class vsimd,class scalar>
+  inline void merge(typename std::enable_if<!isGridTensor<vsimd>::value, vsimd >::type  &y,std::vector<scalar> &extracted){
+    int Nextr=extracted.size();
+    static const int Nsimd=vsimd::Nsimd();
+    int s=Nsimd/Nextr;
+    scalar *buf = (scalar *)&y;
+
+    for(int i=0;i<Nextr;i++){
+      for(int ii=0;ii<s;ii++){
+	buf[i*s+ii]=extracted[i]; // replicates value
+      }
+    }
+  };
+
+  ////////////////////////////////////////////////////////////////////////
+  // Extract to contiguous array scalar object
+  ////////////////////////////////////////////////////////////////////////
+  template<class vobj> inline void extract(const vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
+  {
+    typedef typename vobj::scalar_type scalar_type ;
+    typedef typename vobj::vector_type vector_type ;
+
+    static const int Nsimd=sizeof(vector_type)/sizeof(scalar_type);
+    static const int words=sizeof(vobj)/sizeof(vector_type);
+    int Nextr=extracted.size();
+    int s=Nsimd/Nextr;
+
+    std::vector<scalar_type *> pointers(Nextr);
+    for(int i=0;i<Nextr;i++) 
+      pointers[i] =(scalar_type *)& extracted[i];
+
+    vector_type *vp = (vector_type *)&vec;
+    for(int w=0;w<words;w++){
+      extract<vector_type,scalar_type>(&vp[w],pointers,w);
+    }
+  }
+  ////////////////////////////////////////////////////////////////////////
+  // Extract to a bunch of scalar object pointers, with offset
+  ////////////////////////////////////////////////////////////////////////
+  template<class vobj> inline 
+  void extract(const vobj &vec,std::vector<typename vobj::scalar_object *> &extracted, int offset)
+  {
+    typedef typename vobj::scalar_type scalar_type ;
+    typedef typename vobj::vector_type vector_type ;
+
+    static const int words=sizeof(vobj)/sizeof(vector_type);
+    static const int Nsimd=vobj::vector_type::Nsimd();
+
+    int Nextr=extracted.size();
+    int s = Nsimd/Nextr;
+    scalar_type * vp = (scalar_type *)&vec;
+
+    for(int w=0;w<words;w++){
+      for(int i=0;i<Nextr;i++){
+	scalar_type * pointer = (scalar_type *)& extracted[i][offset];
+	pointer[w] = vp[i*s+w*Nsimd];
      }
    }
  }
- }

-template<class vobj> inline void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
-{
-  typedef typename vobj::scalar_type scalar_type ;
-  typedef typename vobj::vector_type vector_type ;
+  ////////////////////////////////////////////////////////////////////////
+  // Extract to a bunch of scalar object pointers of different scalar type, with offset. Useful for precision change
+  ////////////////////////////////////////////////////////////////////////
+  template<class vobj, class sobj> inline 
+  void extract1(const vobj &vec,std::vector<sobj*> &extracted, int offset)
+  {
+    typedef typename vobj::scalar_type vobj_scalar_type ;
+    typedef typename vobj::vector_type vobj_vector_type ;
+
+    typedef typename sobj::scalar_type sobj_scalar_type ;
  
-  static const int Nsimd=vobj::vector_type::Nsimd();
-  static const int words=sizeof(vobj)/sizeof(vector_type);
+    static const int words=sizeof(vobj)/sizeof(vobj_vector_type);
+    static const int Nsimd=vobj_vector_type::Nsimd();

-  scalar_type *vp = (scalar_type *)&vec;
+    int Nextr=extracted.size();
+    int s = Nsimd/Nextr;
+    vobj_scalar_type * vp = (vobj_scalar_type *)&vec;

-  //  assert( (((uint64_t)vp)&(sizeof(scalar_type)-1)) == 0);
-
-  for(int w=0;w<words;w++){
-  for(int i=0;i<Nsimd;i++){
-      vp[w*Nsimd+i] = ((scalar_type *)&extracted[i][offset])[w];
-  }}
-}
-
-template<class vobj> inline void merge2(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
-{
-  typedef typename vobj::scalar_type scalar_type ;
-  typedef typename vobj::vector_type vector_type ;
-  
-  const int Nsimd=vobj::vector_type::Nsimd();
-  const int words=sizeof(vobj)/sizeof(vector_type);
-
-  scalar_type *pointer;
-  scalar_type *vp = (scalar_type *)&vec;
-  //  assert( (((uint64_t)vp)&(sizeof(scalar_type)-1)) == 0);
-
-  for(int w=0;w<words;w++){
-    for(int i=0;i<Nsimd;i++){
-      pointer=(scalar_type *)&extracted[i][offset];
-      vp[w*Nsimd+i] =pointer[w];
+    for(int w=0;w<words;w++){
+      for(int i=0;i<Nextr;i++){
+	sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
+	pointer[w] = vp[i*s+w*Nsimd];
+      }
+    }
+  }
+
+  
+  ////////////////////////////////////////////////////////////////////////
+  // Merge a contiguous array of scalar objects
+  ////////////////////////////////////////////////////////////////////////
+  template<class vobj> inline 
+  void merge(vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
+  {
+    typedef typename vobj::scalar_type scalar_type ;
+    typedef typename vobj::vector_type vector_type ;
+  
+    static const int Nsimd=sizeof(vector_type)/sizeof(scalar_type);
+    static const int words=sizeof(vobj)/sizeof(vector_type);
+
+    int Nextr = extracted.size();
+    int splat=Nsimd/Nextr;
+
+    std::vector<scalar_type *> pointers(Nextr);
+    for(int i=0;i<Nextr;i++) 
+      pointers[i] =(scalar_type *)& extracted[i];
+  
+    vector_type *vp = (vector_type *)&vec;
+    for(int w=0;w<words;w++){
+      merge<vector_type,scalar_type>(&vp[w],pointers,w);
+    }
+  }
+
+  ////////////////////////////////////////////////////////////////////////
+  // Merge a bunch of different scalar object pointers, with offset
+  ////////////////////////////////////////////////////////////////////////
+  template<class vobj> inline 
+  void merge(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
+  {
+    typedef typename vobj::scalar_type scalar_type ;
+    typedef typename vobj::vector_type vector_type ;
+  
+    const int Nsimd=sizeof(vector_type)/sizeof(scalar_type);
+    const int words=sizeof(vobj)/sizeof(vector_type);
+
+    int Nextr=extracted.size();
+    int s=Nsimd/Nextr;
+
+    scalar_type *pointer;
+    scalar_type *vp = (scalar_type *)&vec;
+
+    //  assert( (((uint64_t)vp)&(sizeof(scalar_type)-1)) == 0);
+
+    for(int w=0;w<words;w++){
+      for(int i=0;i<Nextr;i++){
+	for(int ii=0;ii<s;ii++){
+	  pointer=(scalar_type *)&extracted[i][offset];
+	  vp[w*Nsimd+i*s+ii] = pointer[w];
+	}
+      }
+    }
+  }
+
+  template<class vobj> inline void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
+  {
+    typedef typename vobj::scalar_type scalar_type ;
+    typedef typename vobj::vector_type vector_type ;
+  
+    static const int Nsimd=vobj::vector_type::Nsimd();
+    static const int words=sizeof(vobj)/sizeof(vector_type);
+
+    scalar_type *vp = (scalar_type *)&vec;
+
+    //  assert( (((uint64_t)vp)&(sizeof(scalar_type)-1)) == 0);
+
+    for(int w=0;w<words;w++){
+      for(int i=0;i<Nsimd;i++){
+	vp[w*Nsimd+i] = ((scalar_type *)&extracted[i][offset])[w];
+      }}
+  }
+
+  template<class vobj> inline void merge2(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
+  {
+    typedef typename vobj::scalar_type scalar_type ;
+    typedef typename vobj::vector_type vector_type ;
+  
+    const int Nsimd=vobj::vector_type::Nsimd();
+    const int words=sizeof(vobj)/sizeof(vector_type);
+
+    scalar_type *pointer;
+    scalar_type *vp = (scalar_type *)&vec;
+    //  assert( (((uint64_t)vp)&(sizeof(scalar_type)-1)) == 0);
+
+    for(int w=0;w<words;w++){
+      for(int i=0;i<Nsimd;i++){
+	pointer=(scalar_type *)&extracted[i][offset];
+	vp[w*Nsimd+i] =pointer[w];
+      }
    }
  }
-}

 }

--- a/lib/tensors/Tensor_index.h
+++ b/lib/tensors/Tensor_index.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -23,8 +23,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_TENSOR_INDEX_H
 #define GRID_TENSOR_INDEX_H

@@ -35,18 +35,18 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 // trace of a different index can distribute across the vector index in a replicated way
 // but we do not trace a vector index.

-namespace Grid {
+NAMESPACE_BEGIN(Grid);

-  /* Needed?
-template<int Level> inline ComplexF traceIndex(const ComplexF arg) { return arg;}
-template<int Level> inline ComplexD traceIndex(const ComplexD arg) { return arg;}
-template<int Level> inline RealF traceIndex(const RealF arg) { return arg;}
-template<int Level> inline RealD traceIndex(const RealD arg) { return arg;}
-  */
+/* Needed?
+   template<int Level> inline ComplexF traceIndex(const ComplexF arg) { return arg;}
+   template<int Level> inline ComplexD traceIndex(const ComplexD arg) { return arg;}
+   template<int Level> inline RealF traceIndex(const RealF arg) { return arg;}
+   template<int Level> inline RealD traceIndex(const RealD arg) { return arg;}
+*/
 template<int Level> 
 class TensorIndexRecursion {

- public:
+public:

  ////////////////////////////////////////////////////
  // Type Queries
@@ -76,158 +76,158 @@ class TensorIndexRecursion {
    ret._internal = TensorIndexRecursion<Level-1>::traceIndex(arg._internal);
    return ret;
  }
-  template<class vtype,int N>
-  static auto traceIndex(const iVector<vtype,N> arg) ->  iVector<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0])),N> 
-  {
-    iVector<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0])),N> ret;
-    for(int i=0;i<N;i++){
-      ret._internal[i] = TensorIndexRecursion<Level-1>::traceIndex(arg._internal[i]);
-    }
-    return ret;
+template<class vtype,int N>
+static auto traceIndex(const iVector<vtype,N> arg) ->  iVector<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0])),N> 
+{
+  iVector<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0])),N> ret;
+  for(int i=0;i<N;i++){
+    ret._internal[i] = TensorIndexRecursion<Level-1>::traceIndex(arg._internal[i]);
  }
+  return ret;
+}
  template<class vtype,int N>
  static auto traceIndex(const iMatrix<vtype,N> arg) ->  iMatrix<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0][0])),N> 
  {
    iMatrix<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0][0])),N> ret;
    for(int i=0;i<N;i++){
-    for(int j=0;j<N;j++){
-      ret._internal[i][j] = TensorIndexRecursion<Level-1>::traceIndex(arg._internal[i][j]);
-    }}
-    return ret;
-  }
-  ////////////////////////////////////////////
-  // Recursion for peeking a specific index
-  ////////////////////////////////////////////
-  template<class vtype>
-  static auto peekIndex(const iScalar<vtype> arg,int i) ->  iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0))> 
-  {
-    iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0))> ret;
-    ret._internal = TensorIndexRecursion<Level-1>::peekIndex(arg._internal,i);
-    return ret;
-  }
-  template<class vtype>
-  static auto peekIndex(const iScalar<vtype> arg,int i,int j) ->  iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0,0))> 
-  {
-    iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0,0))> ret;
-    ret._internal = TensorIndexRecursion<Level-1>::peekIndex(arg._internal,i,j);
+      for(int j=0;j<N;j++){
+	ret._internal[i][j] = TensorIndexRecursion<Level-1>::traceIndex(arg._internal[i][j]);
+      }}
    return ret;
  }
+////////////////////////////////////////////
+// Recursion for peeking a specific index
+////////////////////////////////////////////
+template<class vtype>
+static auto peekIndex(const iScalar<vtype> arg,int i) ->  iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0))> 
+{
+  iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0))> ret;
+  ret._internal = TensorIndexRecursion<Level-1>::peekIndex(arg._internal,i);
+  return ret;
+}
+template<class vtype>
+static auto peekIndex(const iScalar<vtype> arg,int i,int j) ->  iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0,0))> 
+{
+  iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0,0))> ret;
+  ret._internal = TensorIndexRecursion<Level-1>::peekIndex(arg._internal,i,j);
+  return ret;
+}

-  template<class vtype,int N>
-  static auto peekIndex(const iVector<vtype,N> arg,int ii) ->  iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0)),N> 
-  {
-    iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0)),N> ret;
-    for(int i=0;i<N;i++){
-      ret._internal[i] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i],ii);
-    }
-    return ret;
+template<class vtype,int N>
+static auto peekIndex(const iVector<vtype,N> arg,int ii) ->  iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0)),N> 
+{
+  iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0)),N> ret;
+  for(int i=0;i<N;i++){
+    ret._internal[i] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i],ii);
  }
-  template<class vtype,int N>
-  static auto peekIndex(const iVector<vtype,N> arg,int ii,int jj) ->  iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0,0)),N> 
-  {
-    iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0,0)),N> ret;
-    for(int i=0;i<N;i++){
-      ret._internal[i] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i],ii,jj);
-    }
-    return ret;
+  return ret;
+}
+template<class vtype,int N>
+static auto peekIndex(const iVector<vtype,N> arg,int ii,int jj) ->  iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0,0)),N> 
+{
+  iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0,0)),N> ret;
+  for(int i=0;i<N;i++){
+    ret._internal[i] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i],ii,jj);
  }
+  return ret;
+}

-  template<class vtype,int N>
-  static auto peekIndex(const iMatrix<vtype,N> arg,int ii) ->  iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0)),N> 
-  {
-    iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0)),N> ret;
-    for(int i=0;i<N;i++){
+template<class vtype,int N>
+static auto peekIndex(const iMatrix<vtype,N> arg,int ii) ->  iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0)),N> 
+{
+  iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0)),N> ret;
+  for(int i=0;i<N;i++){
    for(int j=0;j<N;j++){
      ret._internal[i][j] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i][j],ii);
    }}
-    return ret;
-  }
-  template<class vtype,int N>
-  static auto peekIndex(const iMatrix<vtype,N> arg,int ii,int jj) ->  iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0,0)),N> 
-  {
-    iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0,0)),N> ret;
-    for(int i=0;i<N;i++){
+  return ret;
+}
+template<class vtype,int N>
+static auto peekIndex(const iMatrix<vtype,N> arg,int ii,int jj) ->  iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0,0)),N> 
+{
+  iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0,0)),N> ret;
+  for(int i=0;i<N;i++){
    for(int j=0;j<N;j++){
      ret._internal[i][j] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i][j],ii,jj);
    }}
-    return ret;
+  return ret;
+}
+////////////////////////////////////////////
+// Recursion for poking a specific index
+////////////////////////////////////////////
+
+template<class vtype> inline static 
+void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0))> &arg, int i)
+{
+  TensorIndexRecursion<Level-1>::pokeIndex(ret._internal,arg._internal,i);
+}
+template<class vtype> inline static 
+void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0,0))> &arg, int i,int j)
+{
+  TensorIndexRecursion<Level-1>::pokeIndex(ret._internal,arg._internal,i,j);
+}
+
+template<class vtype,int N> inline static 
+void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0)),N> &arg, int i)
+{
+  for(int ii=0;ii<N;ii++){
+    TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii],arg._internal[ii],i);
  }
-  ////////////////////////////////////////////
-  // Recursion for poking a specific index
-  ////////////////////////////////////////////
-
-  template<class vtype> inline static 
-    void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0))> &arg, int i)
-    {
-      TensorIndexRecursion<Level-1>::pokeIndex(ret._internal,arg._internal,i);
-    }
-  template<class vtype> inline static 
-    void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0,0))> &arg, int i,int j)
-    {
-      TensorIndexRecursion<Level-1>::pokeIndex(ret._internal,arg._internal,i,j);
-    }
-
-  template<class vtype,int N> inline static 
-    void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0)),N> &arg, int i)
-    {
-      for(int ii=0;ii<N;ii++){
-	TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii],arg._internal[ii],i);
-      }
-    }
-  template<class vtype,int N> inline static 
-    void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0,0)),N> &arg, int i,int j)
-    {
-      for(int ii=0;ii<N;ii++){
-	TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii],arg._internal[ii],i,j);
-      }
-    }
-
-  template<class vtype,int N> inline static 
-    void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0)),N> &arg, int i)
-    {
-      for(int ii=0;ii<N;ii++){
-      for(int jj=0;jj<N;jj++){
-	TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii][jj],arg._internal[ii][jj],i);
-      }}
-    }
-  template<class vtype,int N> inline static 
-    void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0,0)),N> &arg, int i,int j)
-    {
-      for(int ii=0;ii<N;ii++){
-      for(int jj=0;jj<N;jj++){
-	TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii][jj],arg._internal[ii][jj],i,j);
-      }}
-    }
-
-  ////////////////////////////////////////////
-  // Recursion for transposing a specific index
-  ////////////////////////////////////////////
-  template<class vtype>
-  static auto transposeIndex(const iScalar<vtype> arg) ->  iScalar<vtype> 
-  {
-    iScalar<vtype> ret;
-    ret._internal = TensorIndexRecursion<Level-1>::transposeIndex(arg._internal);
-    return ret;
+}
+template<class vtype,int N> inline static 
+void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0,0)),N> &arg, int i,int j)
+{
+  for(int ii=0;ii<N;ii++){
+    TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii],arg._internal[ii],i,j);
  }
-  template<class vtype,int N>
-  static auto transposeIndex(const iVector<vtype,N> arg) ->  iVector<vtype,N> 
-  {
-    iVector<vtype,N> ret;
-    for(int i=0;i<N;i++){
-      ret._internal[i] = TensorIndexRecursion<Level-1>::transposeIndex(arg._internal[i]);
-    }
-    return ret;
+}
+
+template<class vtype,int N> inline static 
+void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0)),N> &arg, int i)
+{
+  for(int ii=0;ii<N;ii++){
+    for(int jj=0;jj<N;jj++){
+      TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii][jj],arg._internal[ii][jj],i);
+    }}
+}
+template<class vtype,int N> inline static 
+void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0,0)),N> &arg, int i,int j)
+{
+  for(int ii=0;ii<N;ii++){
+    for(int jj=0;jj<N;jj++){
+      TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii][jj],arg._internal[ii][jj],i,j);
+    }}
+}
+
+////////////////////////////////////////////
+// Recursion for transposing a specific index
+////////////////////////////////////////////
+template<class vtype>
+static auto transposeIndex(const iScalar<vtype> arg) ->  iScalar<vtype> 
+{
+  iScalar<vtype> ret;
+  ret._internal = TensorIndexRecursion<Level-1>::transposeIndex(arg._internal);
+  return ret;
+}
+template<class vtype,int N>
+static auto transposeIndex(const iVector<vtype,N> arg) ->  iVector<vtype,N> 
+{
+  iVector<vtype,N> ret;
+  for(int i=0;i<N;i++){
+    ret._internal[i] = TensorIndexRecursion<Level-1>::transposeIndex(arg._internal[i]);
  }
-  template<class vtype,int N>
-  static auto transposeIndex(const iMatrix<vtype,N> arg) ->  iMatrix<vtype,N> 
-  {
-    iMatrix<vtype,N> ret;
-    for(int i=0;i<N;i++){
+  return ret;
+}
+template<class vtype,int N>
+static auto transposeIndex(const iMatrix<vtype,N> arg) ->  iMatrix<vtype,N> 
+{
+  iMatrix<vtype,N> ret;
+  for(int i=0;i<N;i++){
    for(int j=0;j<N;j++){
      ret._internal[i][j] = TensorIndexRecursion<Level-1>::transposeIndex(arg._internal[i][j]);
    }}
-    return ret;
-  }
+  return ret;
+}
 };

 ////////////////////////////
@@ -236,7 +236,7 @@ class TensorIndexRecursion {
 #define RemoveCRV(a) typename std::remove_const<typename std::remove_reference<decltype(a)>::type>::type
 template<>
 class TensorIndexRecursion<0> {
- public:
+public:
  ////////////////////////////////////////////////////
  // Type Queries
  ////////////////////////////////////////////////////
@@ -266,16 +266,16 @@ class TensorIndexRecursion<0> {
    ret._internal = arg._internal;
    return ret;
  }
-  template<class vtype,int N>
-  static auto traceIndex(const iVector<vtype,N> arg) ->  iScalar<RemoveCRV(arg._internal[0])>
-  {
-    iScalar<RemoveCRV(arg._internal[0])> ret;
-    ret._internal=zero;
-    for(int i=0;i<N;i++){
-      ret._internal = ret._internal+ arg._internal[i];
-    }
-    return ret;
+template<class vtype,int N>
+static auto traceIndex(const iVector<vtype,N> arg) ->  iScalar<RemoveCRV(arg._internal[0])>
+{
+  iScalar<RemoveCRV(arg._internal[0])> ret;
+  ret._internal=zero;
+  for(int i=0;i<N;i++){
+    ret._internal = ret._internal+ arg._internal[i];
  }
+  return ret;
+}
  template<class vtype,int N>
  static auto traceIndex(const iMatrix<vtype,N> arg) ->  iScalar<RemoveCRV(arg._internal[0][0])> 
  {
@@ -286,56 +286,56 @@ class TensorIndexRecursion<0> {
    }
    return ret;
  }
-  /////////////////////////////////////////
-  // Ends recursion for transpose scalar/matrix ; no way to terminate on vector
-  /////////////////////////////////////////
-  template<class vtype>
-  static auto transposeIndex(const iScalar<vtype> arg) ->  iScalar<vtype>
-  {
-    iScalar<vtype> ret;
-    ret._internal = arg._internal;
-    return ret;
-  }
-  template<class vtype,int N>
-  static auto transposeIndex(const iMatrix<vtype,N> arg)  ->  iMatrix<vtype,N> 
-  {
-    iMatrix<vtype,N> ret;
-    ret=zero;
-    for(int i=0;i<N;i++){
+/////////////////////////////////////////
+// Ends recursion for transpose scalar/matrix ; no way to terminate on vector
+/////////////////////////////////////////
+template<class vtype>
+static auto transposeIndex(const iScalar<vtype> arg) ->  iScalar<vtype>
+{
+  iScalar<vtype> ret;
+  ret._internal = arg._internal;
+  return ret;
+}
+template<class vtype,int N>
+static auto transposeIndex(const iMatrix<vtype,N> arg)  ->  iMatrix<vtype,N> 
+{
+  iMatrix<vtype,N> ret;
+  ret=zero;
+  for(int i=0;i<N;i++){
    for(int j=0;j<N;j++){
      ret._internal[i][j] = ret._internal[i][j]+arg._internal[i][j];
    }}
-    return ret;
-  }
-  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-  // End recursion for peeking a specific index; single index on vector, double index on matrix
-  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-  template<class vtype,int N>
-  static auto peekIndex(const iVector<vtype,N> arg,int ii) ->  iScalar<vtype> 
-  {
-    iScalar<vtype> ret;
-    ret._internal = arg._internal[ii];
-    return ret;
-  }
-  template<class vtype,int N>
-  static auto peekIndex(const iMatrix<vtype,N> arg,int ii,int jj) ->  iScalar<vtype>
-  {
-    iScalar<vtype> ret;
-    ret._internal = arg._internal[ii][jj];
-    return ret;
-  }
-  // Vector poke, one index
-  template<class vtype,int N> inline static 
-    void pokeIndex(iVector<vtype,N> &ret, const iScalar<vtype> &arg,int i)
-    {
-      ret._internal[i] = arg._internal;
-    }
-  // Matrix poke two indices
-  template<class vtype,int N> inline static 
-    void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j)
-    {
-      ret._internal[i][j] = arg._internal;
-    }
+  return ret;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// End recursion for peeking a specific index; single index on vector, double index on matrix
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<class vtype,int N>
+static auto peekIndex(const iVector<vtype,N> arg,int ii) ->  iScalar<vtype> 
+{
+  iScalar<vtype> ret;
+  ret._internal = arg._internal[ii];
+  return ret;
+}
+template<class vtype,int N>
+static auto peekIndex(const iMatrix<vtype,N> arg,int ii,int jj) ->  iScalar<vtype>
+{
+  iScalar<vtype> ret;
+  ret._internal = arg._internal[ii][jj];
+  return ret;
+}
+// Vector poke, one index
+template<class vtype,int N> inline static 
+void pokeIndex(iVector<vtype,N> &ret, const iScalar<vtype> &arg,int i)
+{
+  ret._internal[i] = arg._internal;
+}
+// Matrix poke two indices
+template<class vtype,int N> inline static 
+void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j)
+{
+  ret._internal[i][j] = arg._internal;
+}

 };

@@ -404,5 +404,6 @@ void pokeIndex (vtype &ret,const decltype(TensorIndexRecursion<Level>::peekIndex

 #undef RemoveCRV

-}
+NAMESPACE_END(Grid);
+
 #endif
--- a/lib/tensors/Tensor_inner.h
+++ b/lib/tensors/Tensor_inner.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -24,24 +24,26 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_INNER_H
 #define GRID_MATH_INNER_H
-namespace Grid {
-  ///////////////////////////////////////////////////////////////////////////////////////
-  // innerProduct Scalar x Scalar -> Scalar
-  // innerProduct Vector x Vector -> Scalar
-  // innerProduct Matrix x Matrix -> Scalar
-  ///////////////////////////////////////////////////////////////////////////////////////
-  template<class sobj> inline RealD norm2(const sobj &arg){
-    auto nrm = innerProductD(arg,arg);
-    RealD ret = real(nrm);
-    return ret;
-  }
-  //////////////////////////////////////
-  // If single promote to double and sum 2x
-  //////////////////////////////////////
+
+NAMESPACE_BEGIN(Grid);
+
+///////////////////////////////////////////////////////////////////////////////////////
+// innerProduct Scalar x Scalar -> Scalar
+// innerProduct Vector x Vector -> Scalar
+// innerProduct Matrix x Matrix -> Scalar
+///////////////////////////////////////////////////////////////////////////////////////
+template<class sobj> inline RealD norm2(const sobj &arg){
+  auto nrm = innerProductD(arg,arg);
+  RealD ret = real(nrm);
+  return ret;
+}
+//////////////////////////////////////
+// If single promote to double and sum 2x
+//////////////////////////////////////

 inline ComplexD innerProductD(const ComplexF &l,const ComplexF &r){  return innerProduct(l,r); }
 inline ComplexD innerProductD(const ComplexD &l,const ComplexD &r){  return innerProduct(l,r); }
@@ -65,73 +67,74 @@ inline vRealD innerProductD(const vRealF &l,const vRealF &r){
  return innerProduct(la,ra) + innerProduct(lb,rb); 
 }

-  template<class l,class r,int N> inline
-  auto innerProductD (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0],rhs._internal[0]))>
-  {
-    typedef decltype(innerProductD(lhs._internal[0],rhs._internal[0])) ret_t;
-    iScalar<ret_t> ret;
-    ret=zero;
-    for(int c1=0;c1<N;c1++){
-      ret._internal += innerProductD(lhs._internal[c1],rhs._internal[c1]);
-    }
-    return ret;
+template<class l,class r,int N> inline
+auto innerProductD (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0],rhs._internal[0]))>
+{
+  typedef decltype(innerProductD(lhs._internal[0],rhs._internal[0])) ret_t;
+  iScalar<ret_t> ret;
+  ret=zero;
+  for(int c1=0;c1<N;c1++){
+    ret._internal += innerProductD(lhs._internal[c1],rhs._internal[c1]);
  }
-  template<class l,class r,int N> inline
-  auto innerProductD (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0]))>
-  {
-    typedef decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
-    iScalar<ret_t> ret;
-    iScalar<ret_t> tmp;
-    ret=zero;
-    for(int c1=0;c1<N;c1++){
+  return ret;
+}
+template<class l,class r,int N> inline
+auto innerProductD (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0]))>
+{
+  typedef decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
+  iScalar<ret_t> ret;
+  iScalar<ret_t> tmp;
+  ret=zero;
+  for(int c1=0;c1<N;c1++){
    for(int c2=0;c2<N;c2++){
      ret._internal+=innerProductD(lhs._internal[c1][c2],rhs._internal[c1][c2]);
    }}
-    return ret;
+  return ret;
+}
+template<class l,class r> inline
+auto innerProductD (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProductD(lhs._internal,rhs._internal))>
+{
+  typedef decltype(innerProductD(lhs._internal,rhs._internal)) ret_t;
+  iScalar<ret_t> ret;
+  ret._internal = innerProductD(lhs._internal,rhs._internal);
+  return ret;
+}
+//////////////////////
+// Keep same precison
+//////////////////////
+template<class l,class r,int N> inline
+auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
+{
+  typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
+  iScalar<ret_t> ret;
+  ret=zero;
+  for(int c1=0;c1<N;c1++){
+    ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
  }
-  template<class l,class r> inline
-  auto innerProductD (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProductD(lhs._internal,rhs._internal))>
-  {
-    typedef decltype(innerProductD(lhs._internal,rhs._internal)) ret_t;
-    iScalar<ret_t> ret;
-    ret._internal = innerProductD(lhs._internal,rhs._internal);
-    return ret;
-  }
-  //////////////////////
-  // Keep same precison
-  //////////////////////
-  template<class l,class r,int N> inline
-  auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
-  {
-    typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
-    iScalar<ret_t> ret;
-    ret=zero;
-    for(int c1=0;c1<N;c1++){
-      ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
-    }
-    return ret;
-  }
-  template<class l,class r,int N> inline
-  auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
-  {
-    typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
-    iScalar<ret_t> ret;
-    iScalar<ret_t> tmp;
-    ret=zero;
-    for(int c1=0;c1<N;c1++){
+  return ret;
+}
+template<class l,class r,int N> inline
+auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
+{
+  typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
+  iScalar<ret_t> ret;
+  iScalar<ret_t> tmp;
+  ret=zero;
+  for(int c1=0;c1<N;c1++){
    for(int c2=0;c2<N;c2++){
      ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);
    }}
-    return ret;
-  }
-  template<class l,class r> inline
-  auto innerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProduct(lhs._internal,rhs._internal))>
-  {
-    typedef decltype(innerProduct(lhs._internal,rhs._internal)) ret_t;
-    iScalar<ret_t> ret;
-    ret._internal = innerProduct(lhs._internal,rhs._internal);
-    return ret;
-  }
-
+  return ret;
 }
+template<class l,class r> inline
+auto innerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProduct(lhs._internal,rhs._internal))>
+{
+  typedef decltype(innerProduct(lhs._internal,rhs._internal)) ret_t;
+  iScalar<ret_t> ret;
+  ret._internal = innerProduct(lhs._internal,rhs._internal);
+  return ret;
+}
+
+NAMESPACE_END(Grid);
+
 #endif
--- a/lib/tensors/Tensor_logical.h
+++ b/lib/tensors/Tensor_logical.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -23,37 +23,38 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_TENSOR_LOGICAL_H
 #define GRID_TENSOR_LOGICAL_H

-namespace Grid {
+NAMESPACE_BEGIN(Grid);

-#define LOGICAL_BINOP(Op)\
-template<class v> strong_inline iScalar<v> operator Op (const iScalar<v>& lhs,const iScalar<v>& rhs) \
-{\
-  iScalar<v> ret;\
-  ret._internal = lhs._internal Op rhs._internal ;\
-  return ret;\
-}\
-template<class l> strong_inline iScalar<l> operator Op (const iScalar<l>& lhs,Integer rhs) \
-{\
-  typename iScalar<l>::scalar_type t; t=rhs;\
-  typename iScalar<l>::tensor_reduced srhs; srhs=t;\
-  return lhs Op srhs;\
-}\
-template<class l> strong_inline iScalar<l> operator Op (Integer lhs,const iScalar<l>& rhs) \
-{\
-  typename iScalar<l>::scalar_type t;t=lhs;\
-  typename iScalar<l>::tensor_reduced slhs;slhs=t;\
-  return slhs Op rhs;\
-}
+#define LOGICAL_BINOP(Op)						\
+  template<class v> strong_inline iScalar<v> operator Op (const iScalar<v>& lhs,const iScalar<v>& rhs) \
+  {									\
+    iScalar<v> ret;							\
+    ret._internal = lhs._internal Op rhs._internal ;			\
+    return ret;								\
+  }									\
+  template<class l> strong_inline iScalar<l> operator Op (const iScalar<l>& lhs,Integer rhs) \
+  {									\
+    typename iScalar<l>::scalar_type t; t=rhs;				\
+    typename iScalar<l>::tensor_reduced srhs; srhs=t;			\
+    return lhs Op srhs;							\
+  }									\
+  template<class l> strong_inline iScalar<l> operator Op (Integer lhs,const iScalar<l>& rhs) \
+  {									\
+    typename iScalar<l>::scalar_type t;t=lhs;				\
+    typename iScalar<l>::tensor_reduced slhs;slhs=t;			\
+    return slhs Op rhs;							\
+  }

 LOGICAL_BINOP(|);
 LOGICAL_BINOP(&);
 LOGICAL_BINOP(||);
 LOGICAL_BINOP(&&);

-}
+NAMESPACE_END(Grid);
+
 #endif
--- a/lib/tensors/Tensor_outer.h
+++ b/lib/tensors/Tensor_outer.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -23,36 +23,38 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_OUTER_H
 #define GRID_MATH_OUTER_H
-namespace Grid {
-    ///////////////////////////////////////////////////////////////////////////////////////
-    // outerProduct Scalar x Scalar -> Scalar
-    //              Vector x Vector -> Matrix
-    ///////////////////////////////////////////////////////////////////////////////////////
+
+NAMESPACE_BEGIN(Grid);
+
+///////////////////////////////////////////////////////////////////////////////////////
+// outerProduct Scalar x Scalar -> Scalar
+//              Vector x Vector -> Matrix
+///////////////////////////////////////////////////////////////////////////////////////

 template<class l,class r,int N> inline
 auto outerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iMatrix<decltype(outerProduct(lhs._internal[0],rhs._internal[0])),N>
 {
-    typedef decltype(outerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
-    iMatrix<ret_t,N> ret;
-    for(int c1=0;c1<N;c1++){
+  typedef decltype(outerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
+  iMatrix<ret_t,N> ret;
+  for(int c1=0;c1<N;c1++){
    for(int c2=0;c2<N;c2++){
-        ret._internal[c1][c2] = outerProduct(lhs._internal[c1],rhs._internal[c2]);
+      ret._internal[c1][c2] = outerProduct(lhs._internal[c1],rhs._internal[c2]);
    }}
-    return ret;
+  return ret;
 }


 template<class l,class r> inline
 auto outerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(outerProduct(lhs._internal,rhs._internal))>
 {
-    typedef decltype(outerProduct(lhs._internal,rhs._internal)) ret_t;
-    iScalar<ret_t> ret;
-    ret._internal = outerProduct(lhs._internal,rhs._internal);
-    return ret;
+  typedef decltype(outerProduct(lhs._internal,rhs._internal)) ret_t;
+  iScalar<ret_t> ret;
+  ret._internal = outerProduct(lhs._internal,rhs._internal);
+  return ret;
 }

  
@@ -75,5 +77,6 @@ inline RealD outerProduct(const RealD &l, const RealD& r)
  return l*r;
 }

-}
+NAMESPACE_END(Grid);
+
 #endif
--- a/lib/tensors/Tensor_reality.h
+++ b/lib/tensors/Tensor_reality.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -24,20 +24,21 @@ Author: neo <cossu@post.kek.jp>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_REALITY_H
 #define GRID_MATH_REALITY_H
-namespace Grid {
+
+NAMESPACE_BEGIN(Grid);

 /////////////////////////////////////////////// 
 // multiply by I; make recursive.
 /////////////////////////////////////////////// 
 template<class vtype> inline iScalar<vtype> timesI(const iScalar<vtype>&r) 
 {
-    iScalar<vtype> ret;
-    timesI(ret._internal,r._internal);
-    return ret;
+  iScalar<vtype> ret;
+  timesI(ret._internal,r._internal);
+  return ret;
 }
 template<class vtype,int N> inline iVector<vtype,N> timesI(const iVector<vtype,N>&r) 
 {
@@ -51,9 +52,9 @@ template<class vtype,int N> inline iMatrix<vtype,N> timesI(const iMatrix<vtype,N
 {
  iMatrix<vtype,N> ret;
  for(int i=0;i<N;i++){
-  for(int j=0;j<N;j++){
-    timesI(ret._internal[i][j],r._internal[i][j]);
-  }}
+    for(int j=0;j<N;j++){
+      timesI(ret._internal[i][j],r._internal[i][j]);
+    }}
  return ret;
 }

@@ -70,17 +71,17 @@ template<class vtype,int N> inline void timesI(iVector<vtype,N> &ret,const iVect
 template<class vtype,int N> inline void  timesI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r)
 {
  for(int i=0;i<N;i++){
-  for(int j=0;j<N;j++){
-    timesI(ret._internal[i][j],r._internal[i][j]);
-  }}
+    for(int j=0;j<N;j++){
+      timesI(ret._internal[i][j],r._internal[i][j]);
+    }}
 }


 template<class vtype> inline iScalar<vtype> timesMinusI(const iScalar<vtype>&r) 
 {
-    iScalar<vtype> ret;
-    timesMinusI(ret._internal,r._internal);
-    return ret;
+  iScalar<vtype> ret;
+  timesMinusI(ret._internal,r._internal);
+  return ret;
 }
 template<class vtype,int N> inline iVector<vtype,N> timesMinusI(const iVector<vtype,N>&r) 
 {
@@ -94,9 +95,9 @@ template<class vtype,int N> inline iMatrix<vtype,N> timesMinusI(const iMatrix<vt
 {
  iMatrix<vtype,N> ret;
  for(int i=0;i<N;i++){
-  for(int j=0;j<N;j++){
-    timesMinusI(ret._internal[i][j],r._internal[i][j]);
-  }}
+    for(int j=0;j<N;j++){
+      timesMinusI(ret._internal[i][j],r._internal[i][j]);
+    }}
  return ret;
 }

@@ -113,9 +114,9 @@ template<class vtype,int N> inline void timesMinusI(iVector<vtype,N> &ret,const
 template<class vtype,int N> inline void  timesMinusI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r)
 {
  for(int i=0;i<N;i++){
-  for(int j=0;j<N;j++){
-    timesMinusI(ret._internal[i][j],r._internal[i][j]);
-  }}
+    for(int j=0;j<N;j++){
+      timesMinusI(ret._internal[i][j],r._internal[i][j]);
+    }}
 }


@@ -124,9 +125,9 @@ template<class vtype,int N> inline void  timesMinusI(iMatrix<vtype,N> &ret,const
 /////////////////////////////////////////////// 
 template<class vtype> inline iScalar<vtype> conjugate(const iScalar<vtype>&r)
 {
-    iScalar<vtype> ret;
-    ret._internal = conjugate(r._internal);
-    return ret;
+  iScalar<vtype> ret;
+  ret._internal = conjugate(r._internal);
+  return ret;
 }
 template<class vtype,int N> inline iVector<vtype,N> conjugate(const iVector<vtype,N>&r)
 {
@@ -140,9 +141,9 @@ template<class vtype,int N> inline iMatrix<vtype,N> conjugate(const iMatrix<vtyp
 {
  iMatrix<vtype,N> ret;
  for(int i=0;i<N;i++){
-  for(int j=0;j<N;j++){
-    ret._internal[i][j] = conjugate(r._internal[i][j]);
-  }}
+    for(int j=0;j<N;j++){
+      ret._internal[i][j] = conjugate(r._internal[i][j]);
+    }}
  return ret;
 }

@@ -151,26 +152,26 @@ template<class vtype,int N> inline iMatrix<vtype,N> conjugate(const iMatrix<vtyp
 /////////////////////////////////////////////// 
 template<class vtype> inline iScalar<vtype> adj(const iScalar<vtype>&r)
 {
-    iScalar<vtype> ret;
-    ret._internal = adj(r._internal);
-    return ret;
+  iScalar<vtype> ret;
+  ret._internal = adj(r._internal);
+  return ret;
 }
 template<class vtype,int N> inline iVector<vtype,N> adj(const iVector<vtype,N>&r)
 {
-    iVector<vtype,N> ret;
-    for(int i=0;i<N;i++){
-        ret._internal[i] = adj(r._internal[i]);
-    }
-    return ret;
+  iVector<vtype,N> ret;
+  for(int i=0;i<N;i++){
+    ret._internal[i] = adj(r._internal[i]);
+  }
+  return ret;
 }
 template<class vtype,int N> inline iMatrix<vtype,N> adj(const iMatrix<vtype,N> &arg)
 {
-    iMatrix<vtype,N> ret;
-    for(int c1=0;c1<N;c1++){
+  iMatrix<vtype,N> ret;
+  for(int c1=0;c1<N;c1++){
    for(int c2=0;c2<N;c2++){
-        ret._internal[c1][c2]=adj(arg._internal[c2][c1]);
+      ret._internal[c1][c2]=adj(arg._internal[c2][c1]);
    }}
-    return ret;
+  return ret;
 }


@@ -184,52 +185,52 @@ template<class vtype,int N> inline iMatrix<vtype,N> adj(const iMatrix<vtype,N> &
 /////////////////////////////////////////////////////////////////
 template<class itype> inline auto real(const iScalar<itype> &z) -> iScalar<decltype(real(z._internal))>
 {
-    iScalar<decltype(real(z._internal))> ret;
-    ret._internal = real(z._internal);
-    return ret;
+  iScalar<decltype(real(z._internal))> ret;
+  ret._internal = real(z._internal);
+  return ret;
 }
 template<class itype,int N> inline auto real(const iMatrix<itype,N> &z) -> iMatrix<decltype(real(z._internal[0][0])),N>
 {
-    iMatrix<decltype(real(z._internal[0][0])),N> ret;
-    for(int c1=0;c1<N;c1++){
+  iMatrix<decltype(real(z._internal[0][0])),N> ret;
+  for(int c1=0;c1<N;c1++){
    for(int c2=0;c2<N;c2++){
-        ret._internal[c1][c2] = real(z._internal[c1][c2]);
+      ret._internal[c1][c2] = real(z._internal[c1][c2]);
    }}
-    return ret;
+  return ret;
 }
 template<class itype,int N> inline auto real(const iVector<itype,N> &z) -> iVector<decltype(real(z._internal[0])),N>
 {
-    iVector<decltype(real(z._internal[0])),N> ret;
-    for(int c1=0;c1<N;c1++){
-        ret._internal[c1] = real(z._internal[c1]);
-    }
-    return ret;
+  iVector<decltype(real(z._internal[0])),N> ret;
+  for(int c1=0;c1<N;c1++){
+    ret._internal[c1] = real(z._internal[c1]);
+  }
+  return ret;
 }
    
 template<class itype> inline auto imag(const iScalar<itype> &z) -> iScalar<decltype(imag(z._internal))>
 {
-    iScalar<decltype(imag(z._internal))> ret;
-    ret._internal = imag(z._internal);
-    return ret;
+  iScalar<decltype(imag(z._internal))> ret;
+  ret._internal = imag(z._internal);
+  return ret;
 }
 template<class itype,int N> inline auto imag(const iMatrix<itype,N> &z) -> iMatrix<decltype(imag(z._internal[0][0])),N>
 {
-    iMatrix<decltype(imag(z._internal[0][0])),N> ret;
-    for(int c1=0;c1<N;c1++){
+  iMatrix<decltype(imag(z._internal[0][0])),N> ret;
+  for(int c1=0;c1<N;c1++){
    for(int c2=0;c2<N;c2++){
-        ret._internal[c1][c2] = imag(z._internal[c1][c2]);
+      ret._internal[c1][c2] = imag(z._internal[c1][c2]);
    }}
-    return ret;
+  return ret;
 }
 template<class itype,int N> inline auto imag(const iVector<itype,N> &z) -> iVector<decltype(imag(z._internal[0])),N>
 {
-    iVector<decltype(imag(z._internal[0])),N> ret;
-    for(int c1=0;c1<N;c1++){
-        ret._internal[c1] = imag(z._internal[c1]);
-    }
-    return ret;
+  iVector<decltype(imag(z._internal[0])),N> ret;
+  for(int c1=0;c1<N;c1++){
+    ret._internal[c1] = imag(z._internal[c1]);
+  }
+  return ret;
 }

+NAMESPACE_END(Grid);

-}
 #endif
--- a/lib/tensors/Tensor_trace.h
+++ b/lib/tensors/Tensor_trace.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -24,11 +24,12 @@ Author: neo <cossu@post.kek.jp>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_TRACE_H
 #define GRID_MATH_TRACE_H
-namespace Grid {
+
+NAMESPACE_BEGIN(Grid);

 //////////////////////////////////////////////////////////////////
 // Traces: both all indices and a specific index. Indices must be
@@ -43,24 +44,24 @@ inline RealD trace( const RealD &arg){    return arg;}
 template<class vtype,int N>
 inline auto trace(const iMatrix<vtype,N> &arg) -> iScalar<decltype(trace(arg._internal[0][0]))>
 {
-    iScalar<decltype( trace(arg._internal[0][0] )) > ret;
-    zeroit(ret._internal);
-    for(int i=0;i<N;i++){
-        ret._internal=ret._internal+trace(arg._internal[i][i]);
-    }
-    return ret;
+  iScalar<decltype( trace(arg._internal[0][0] )) > ret;
+  zeroit(ret._internal);
+  for(int i=0;i<N;i++){
+    ret._internal=ret._internal+trace(arg._internal[i][i]);
+  }
+  return ret;
 }

 template<class vtype>
 inline auto trace(const iScalar<vtype> &arg) -> iScalar<decltype(trace(arg._internal))>
 {
-    iScalar<decltype(trace(arg._internal))> ret;
-    ret._internal=trace(arg._internal);
-    return ret;
+  iScalar<decltype(trace(arg._internal))> ret;
+  ret._internal=trace(arg._internal);
+  return ret;
 }

 template<class vtype,int N>
-  inline auto trace(const iVector<vtype,N> &arg) -> iVector<decltype(trace(arg._internal[0])),N>
+inline auto trace(const iVector<vtype,N> &arg) -> iVector<decltype(trace(arg._internal[0])),N>
 {
  iVector<decltype(trace(arg._internal[0])),N> ret;
  for(int i=0;i<N;i++){
@@ -69,6 +70,6 @@ template<class vtype,int N>
  return ret;
 }

+NAMESPACE_END(Grid);

-}
 #endif
--- a/lib/tensors/Tensor_traits.h
+++ b/lib/tensors/Tensor_traits.h
@@ -1,4 +1,4 @@
-   /*************************************************************************************
+/*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./lib/tensors/Tensor_traits.h
    Copyright (C) 2015
@@ -17,14 +17,14 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu>
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_TRAITS_H
 #define GRID_MATH_TRAITS_H

 #include <type_traits>

-namespace Grid {
+NAMESPACE_BEGIN(Grid);

 //////////////////////////////////////////////////////////////////////////////////
 // Want to recurse: GridTypeMapper<Matrix<vComplexD> >::scalar_type == ComplexD.
@@ -41,254 +41,256 @@ namespace Grid {
 //
 //////////////////////////////////////////////////////////////////////////////////
  
-  template <class T> class GridTypeMapper {
-  public:
-    typedef typename T::scalar_type scalar_type;
-    typedef typename T::vector_type vector_type;
-    typedef typename T::vector_typeD vector_typeD;
-    typedef typename T::tensor_reduced tensor_reduced;
-    typedef typename T::scalar_object scalar_object;
-    typedef typename T::Complexified Complexified;
-    typedef typename T::Realified Realified;
-    typedef typename T::DoublePrecision DoublePrecision;
-    enum { TensorLevel = T::TensorLevel };
-  };
+template <class T> class GridTypeMapper {
+public:
+  typedef typename T::scalar_type scalar_type;
+  typedef typename T::vector_type vector_type;
+  typedef typename T::vector_typeD vector_typeD;
+  typedef typename T::tensor_reduced tensor_reduced;
+  typedef typename T::scalar_object scalar_object;
+  typedef typename T::Complexified Complexified;
+  typedef typename T::Realified Realified;
+  typedef typename T::DoublePrecision DoublePrecision;
+  enum { TensorLevel = T::TensorLevel };
+};

 //////////////////////////////////////////////////////////////////////////////////
 // Recursion stops with these template specialisations
 //////////////////////////////////////////////////////////////////////////////////
-  template<> class GridTypeMapper<RealF> {
-  public:
-    typedef RealF scalar_type;
-    typedef RealF vector_type;
-    typedef RealD vector_typeD;
-    typedef RealF tensor_reduced ;
-    typedef RealF scalar_object;
-    typedef ComplexF Complexified;
-    typedef RealF Realified;
-    typedef RealD DoublePrecision;
-    enum { TensorLevel = 0 };
-  };
-  template<> class GridTypeMapper<RealD> {
-  public:
-    typedef RealD scalar_type;
-    typedef RealD vector_type;
-    typedef RealD vector_typeD;
-    typedef RealD tensor_reduced;
-    typedef RealD scalar_object;
-    typedef ComplexD Complexified;
-    typedef RealD Realified;
-    typedef RealD DoublePrecision;
-    enum { TensorLevel = 0 };
-  };
-  template<> class GridTypeMapper<ComplexF> {
-  public:
-    typedef ComplexF scalar_type;
-    typedef ComplexF vector_type;
-    typedef ComplexD vector_typeD;
-    typedef ComplexF tensor_reduced;
-    typedef ComplexF scalar_object;
-    typedef ComplexF Complexified;
-    typedef RealF Realified;
-    typedef ComplexD DoublePrecision;
-    enum { TensorLevel = 0 };
-  };
-  template<> class GridTypeMapper<ComplexD> {
-  public:
-    typedef ComplexD scalar_type;
-    typedef ComplexD vector_type;
-    typedef ComplexD vector_typeD;
-    typedef ComplexD tensor_reduced;
-    typedef ComplexD scalar_object;
-    typedef ComplexD Complexified;
-    typedef RealD Realified;
-    typedef ComplexD DoublePrecision;
-    enum { TensorLevel = 0 };
-  };
-  template<> class GridTypeMapper<Integer> {
-  public:
-    typedef Integer scalar_type;
-    typedef Integer vector_type;
-    typedef Integer vector_typeD;
-    typedef Integer tensor_reduced;
-    typedef Integer scalar_object;
-    typedef void Complexified;
-    typedef void Realified;
-    typedef void DoublePrecision;
-    enum { TensorLevel = 0 };
-  };
+template<> class GridTypeMapper<RealF> {
+public:
+  typedef RealF scalar_type;
+  typedef RealF vector_type;
+  typedef RealD vector_typeD;
+  typedef RealF tensor_reduced ;
+  typedef RealF scalar_object;
+  typedef ComplexF Complexified;
+  typedef RealF Realified;
+  typedef RealD DoublePrecision;
+  enum { TensorLevel = 0 };
+};
+template<> class GridTypeMapper<RealD> {
+public:
+  typedef RealD scalar_type;
+  typedef RealD vector_type;
+  typedef RealD vector_typeD;
+  typedef RealD tensor_reduced;
+  typedef RealD scalar_object;
+  typedef ComplexD Complexified;
+  typedef RealD Realified;
+  typedef RealD DoublePrecision;
+  enum { TensorLevel = 0 };
+};
+template<> class GridTypeMapper<ComplexF> {
+public:
+  typedef ComplexF scalar_type;
+  typedef ComplexF vector_type;
+  typedef ComplexD vector_typeD;
+  typedef ComplexF tensor_reduced;
+  typedef ComplexF scalar_object;
+  typedef ComplexF Complexified;
+  typedef RealF Realified;
+  typedef ComplexD DoublePrecision;
+  enum { TensorLevel = 0 };
+};
+template<> class GridTypeMapper<ComplexD> {
+public:
+  typedef ComplexD scalar_type;
+  typedef ComplexD vector_type;
+  typedef ComplexD vector_typeD;
+  typedef ComplexD tensor_reduced;
+  typedef ComplexD scalar_object;
+  typedef ComplexD Complexified;
+  typedef RealD Realified;
+  typedef ComplexD DoublePrecision;
+  enum { TensorLevel = 0 };
+};
+template<> class GridTypeMapper<Integer> {
+public:
+  typedef Integer scalar_type;
+  typedef Integer vector_type;
+  typedef Integer vector_typeD;
+  typedef Integer tensor_reduced;
+  typedef Integer scalar_object;
+  typedef void Complexified;
+  typedef void Realified;
+  typedef void DoublePrecision;
+  enum { TensorLevel = 0 };
+};

-  template<> class GridTypeMapper<vRealF> {
-  public:
-    typedef RealF  scalar_type;
-    typedef vRealF vector_type;
-    typedef vRealD vector_typeD;
-    typedef vRealF tensor_reduced;
-    typedef RealF  scalar_object;
-    typedef vComplexF Complexified;
-    typedef vRealF Realified;
-    typedef vRealD DoublePrecision;
-    enum { TensorLevel = 0 };
-  };
-  template<> class GridTypeMapper<vRealD> {
-  public:
-    typedef RealD  scalar_type;
-    typedef vRealD vector_type;
-    typedef vRealD vector_typeD;
-    typedef vRealD tensor_reduced;
-    typedef RealD  scalar_object;
-    typedef vComplexD Complexified;
-    typedef vRealD Realified;
-    typedef vRealD DoublePrecision;
-    enum { TensorLevel = 0 };
-  };
-  template<> class GridTypeMapper<vComplexH> {
-  public:
-    typedef ComplexF  scalar_type;
-    typedef vComplexH vector_type;
-    typedef vComplexD vector_typeD;
-    typedef vComplexH tensor_reduced;
-    typedef ComplexF  scalar_object;
-    typedef vComplexH Complexified;
-    typedef vRealH Realified;
-    typedef vComplexD DoublePrecision;
-    enum { TensorLevel = 0 };
-  };
-  template<> class GridTypeMapper<vComplexF> {
-  public:
-    typedef ComplexF  scalar_type;
-    typedef vComplexF vector_type;
-    typedef vComplexD vector_typeD;
-    typedef vComplexF tensor_reduced;
-    typedef ComplexF  scalar_object;
-    typedef vComplexF Complexified;
-    typedef vRealF Realified;
-    typedef vComplexD DoublePrecision;
-    enum { TensorLevel = 0 };
-  };
-  template<> class GridTypeMapper<vComplexD> {
-  public:
-    typedef ComplexD  scalar_type;
-    typedef vComplexD vector_type;
-    typedef vComplexD vector_typeD;
-    typedef vComplexD tensor_reduced;
-    typedef ComplexD  scalar_object;
-    typedef vComplexD Complexified;
-    typedef vRealD Realified;
-    typedef vComplexD DoublePrecision;
-    enum { TensorLevel = 0 };
-  };
-  template<> class GridTypeMapper<vInteger> {
-  public:
-    typedef  Integer scalar_type;
-    typedef vInteger vector_type;
-    typedef vInteger vector_typeD;
-    typedef vInteger tensor_reduced;
-    typedef  Integer scalar_object;
-    typedef void Complexified;
-    typedef void Realified;
-    typedef void DoublePrecision;
-    enum { TensorLevel = 0 };
-  };
+template<> class GridTypeMapper<vRealF> {
+public:
+  typedef RealF  scalar_type;
+  typedef vRealF vector_type;
+  typedef vRealD vector_typeD;
+  typedef vRealF tensor_reduced;
+  typedef RealF  scalar_object;
+  typedef vComplexF Complexified;
+  typedef vRealF Realified;
+  typedef vRealD DoublePrecision;
+  enum { TensorLevel = 0 };
+};
+template<> class GridTypeMapper<vRealD> {
+public:
+  typedef RealD  scalar_type;
+  typedef vRealD vector_type;
+  typedef vRealD vector_typeD;
+  typedef vRealD tensor_reduced;
+  typedef RealD  scalar_object;
+  typedef vComplexD Complexified;
+  typedef vRealD Realified;
+  typedef vRealD DoublePrecision;
+  enum { TensorLevel = 0 };
+};
+template<> class GridTypeMapper<vComplexH> {
+public:
+  typedef ComplexF  scalar_type;
+  typedef vComplexH vector_type;
+  typedef vComplexD vector_typeD;
+  typedef vComplexH tensor_reduced;
+  typedef ComplexF  scalar_object;
+  typedef vComplexH Complexified;
+  typedef vRealH Realified;
+  typedef vComplexD DoublePrecision;
+  enum { TensorLevel = 0 };
+};
+template<> class GridTypeMapper<vComplexF> {
+public:
+  typedef ComplexF  scalar_type;
+  typedef vComplexF vector_type;
+  typedef vComplexD vector_typeD;
+  typedef vComplexF tensor_reduced;
+  typedef ComplexF  scalar_object;
+  typedef vComplexF Complexified;
+  typedef vRealF Realified;
+  typedef vComplexD DoublePrecision;
+  enum { TensorLevel = 0 };
+};
+template<> class GridTypeMapper<vComplexD> {
+public:
+  typedef ComplexD  scalar_type;
+  typedef vComplexD vector_type;
+  typedef vComplexD vector_typeD;
+  typedef vComplexD tensor_reduced;
+  typedef ComplexD  scalar_object;
+  typedef vComplexD Complexified;
+  typedef vRealD Realified;
+  typedef vComplexD DoublePrecision;
+  enum { TensorLevel = 0 };
+};
+template<> class GridTypeMapper<vInteger> {
+public:
+  typedef  Integer scalar_type;
+  typedef vInteger vector_type;
+  typedef vInteger vector_typeD;
+  typedef vInteger tensor_reduced;
+  typedef  Integer scalar_object;
+  typedef void Complexified;
+  typedef void Realified;
+  typedef void DoublePrecision;
+  enum { TensorLevel = 0 };
+};

-  // First some of my own traits
-  template<typename T> struct isGridTensor {
-    static const bool value = true;
-    static const bool notvalue = false;
-  };
-  template<> struct isGridTensor<int > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<RealD > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<RealF > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<ComplexD > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<ComplexF > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<Integer > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<vRealD > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<vRealF > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<vComplexD > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<vComplexF > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<vInteger > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
+// First some of my own traits
+template<typename T> struct isGridTensor {
+  static const bool value = true;
+  static const bool notvalue = false;
+};
+template<> struct isGridTensor<int > {
+  static const bool value = false;
+  static const bool notvalue = true;
+};
+template<> struct isGridTensor<RealD > {
+  static const bool value = false;
+  static const bool notvalue = true;
+};
+template<> struct isGridTensor<RealF > {
+  static const bool value = false;
+  static const bool notvalue = true;
+};
+template<> struct isGridTensor<ComplexD > {
+  static const bool value = false;
+  static const bool notvalue = true;
+};
+template<> struct isGridTensor<ComplexF > {
+  static const bool value = false;
+  static const bool notvalue = true;
+};
+template<> struct isGridTensor<Integer > {
+  static const bool value = false;
+  static const bool notvalue = true;
+};
+template<> struct isGridTensor<vRealD > {
+  static const bool value = false;
+  static const bool notvalue = true;
+};
+template<> struct isGridTensor<vRealF > {
+  static const bool value = false;
+  static const bool notvalue = true;
+};
+template<> struct isGridTensor<vComplexD > {
+  static const bool value = false;
+  static const bool notvalue = true;
+};
+template<> struct isGridTensor<vComplexF > {
+  static const bool value = false;
+  static const bool notvalue = true;
+};
+template<> struct isGridTensor<vInteger > {
+  static const bool value = false;
+  static const bool notvalue = true;
+};

-  // Match the index
-  template<typename T,int Level> struct matchGridTensorIndex {
-    static const bool value = (Level==T::TensorLevel);
-    static const bool notvalue = (Level!=T::TensorLevel);
-  };
-  // What is the vtype
-  template<typename T> struct isComplex {
-    static const bool value = false;
-  };
-  template<> struct isComplex<ComplexF> {
-    static const bool value = true;
-  };
-  template<> struct isComplex<ComplexD> {
-    static const bool value = true;
-  };
+// Match the index
+template<typename T,int Level> struct matchGridTensorIndex {
+  static const bool value = (Level==T::TensorLevel);
+  static const bool notvalue = (Level!=T::TensorLevel);
+};
+// What is the vtype
+template<typename T> struct isComplex {
+  static const bool value = false;
+};
+template<> struct isComplex<ComplexF> {
+  static const bool value = true;
+};
+template<> struct isComplex<ComplexD> {
+  static const bool value = true;
+};

-  //Get the SIMD vector type from a Grid tensor or Lattice<Tensor>
-  template<typename T>
-  struct getVectorType{
-    typedef T type;
-  };
+//Get the SIMD vector type from a Grid tensor or Lattice<Tensor>
+template<typename T>
+struct getVectorType{
+  typedef T type;
+};
  
-  //Query if a tensor or Lattice<Tensor> is SIMD vector or scalar
-  template<typename T>
-  class isSIMDvectorized{
-    template<typename U>
-    static typename std::enable_if< !std::is_same< typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,   
-      typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, char>::type test(void *);
+//Query if a tensor or Lattice<Tensor> is SIMD vector or scalar
+template<typename T>
+class isSIMDvectorized{
+  template<typename U>
+  static typename std::enable_if< !std::is_same< typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,   
+						 typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, char>::type test(void *);

-    template<typename U>
-    static double test(...);
+  template<typename U>
+  static double test(...);
  
-  public:
-    enum {value = sizeof(test<T>(0)) == sizeof(char) };
-  };
+public:
+  enum {value = sizeof(test<T>(0)) == sizeof(char) };
+};
  
-  //Get the precision of a Lattice, tensor or scalar type in units of sizeof(float)
-  template<typename T>
-  class getPrecision{
-  public:
-    //get the vector_obj (i.e. a grid Tensor) if its a Lattice<vobj>, do nothing otherwise (i.e. if fundamental or grid Tensor)
-    typedef typename getVectorType<T>::type vector_obj; 
-    typedef typename GridTypeMapper<vector_obj>::scalar_type scalar_type; //get the associated scalar type. Works on fundamental and tensor types
-    typedef typename GridTypeMapper<scalar_type>::Realified real_scalar_type; //remove any std::complex wrapper, should get us to the fundamental type
+//Get the precision of a Lattice, tensor or scalar type in units of sizeof(float)
+template<typename T>
+class getPrecision{
+public:
+  //get the vector_obj (i.e. a grid Tensor) if its a Lattice<vobj>, do nothing otherwise (i.e. if fundamental or grid Tensor)
+  typedef typename getVectorType<T>::type vector_obj; 
+  typedef typename GridTypeMapper<vector_obj>::scalar_type scalar_type; //get the associated scalar type. Works on fundamental and tensor types
+  typedef typename GridTypeMapper<scalar_type>::Realified real_scalar_type; //remove any std::complex wrapper, should get us to the fundamental type
+
+  enum { value = sizeof(real_scalar_type)/sizeof(float) };
+};
+
+NAMESPACE_END(Grid);

-    enum { value = sizeof(real_scalar_type)/sizeof(float) };
-  };
-}

 #endif

--- a/lib/tensors/Tensor_transpose.h
+++ b/lib/tensors/Tensor_transpose.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -23,13 +23,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_TRANSPOSE_H
 #define GRID_MATH_TRANSPOSE_H
-namespace Grid {
-

+NAMESPACE_BEGIN(Grid);

 /////////////////////////////////////////////////////////////////
 // Transpose all indices
@@ -41,45 +40,45 @@ inline RealD transpose(RealD &rhs){  return rhs;}
 inline RealF transpose(RealF &rhs){  return rhs;}

 template<class vtype,int N>
-  inline typename std::enable_if<isGridTensor<vtype>::value, iMatrix<vtype,N> >::type 
-  transpose(iMatrix<vtype,N> arg)
-  {
-    iMatrix<vtype,N> ret;
-    for(int i=0;i<N;i++){
-      for(int j=0;j<N;j++){
-	ret._internal[i][j] = transpose(arg._internal[j][i]); // NB recurses
-      }}
-    return ret;
-  }
+inline typename std::enable_if<isGridTensor<vtype>::value, iMatrix<vtype,N> >::type 
+transpose(iMatrix<vtype,N> arg)
+{
+  iMatrix<vtype,N> ret;
+  for(int i=0;i<N;i++){
+    for(int j=0;j<N;j++){
+      ret._internal[i][j] = transpose(arg._internal[j][i]); // NB recurses
+    }}
+  return ret;
+}
 template<class vtype,int N>
-  inline typename std::enable_if<isGridTensor<vtype>::notvalue, iMatrix<vtype,N> >::type 
-  transpose(iMatrix<vtype,N> arg)
-  {
-    iMatrix<vtype,N> ret;
-    for(int i=0;i<N;i++){
-      for(int j=0;j<N;j++){
-	ret._internal[i][j] = arg._internal[j][i]; // Stop recursion if not a tensor type
-      }}
-    return ret;
-  }
+inline typename std::enable_if<isGridTensor<vtype>::notvalue, iMatrix<vtype,N> >::type 
+transpose(iMatrix<vtype,N> arg)
+{
+  iMatrix<vtype,N> ret;
+  for(int i=0;i<N;i++){
+    for(int j=0;j<N;j++){
+      ret._internal[i][j] = arg._internal[j][i]; // Stop recursion if not a tensor type
+    }}
+  return ret;
+}

 template<class vtype>
-  inline typename std::enable_if<isGridTensor<vtype>::value, iScalar<vtype> >::type 
-  transpose(iScalar<vtype> arg)
-  {
-    iScalar<vtype> ret;
-    ret._internal = transpose(arg._internal); // NB recurses
-    return ret;
-  }
+inline typename std::enable_if<isGridTensor<vtype>::value, iScalar<vtype> >::type 
+transpose(iScalar<vtype> arg)
+{
+  iScalar<vtype> ret;
+  ret._internal = transpose(arg._internal); // NB recurses
+  return ret;
+}

 template<class vtype>
-  inline typename std::enable_if<isGridTensor<vtype>::notvalue, iScalar<vtype> >::type 
-  transpose(iScalar<vtype> arg)
-  {
-    iScalar<vtype> ret;
-    ret._internal = arg._internal; // NB recursion stops
-    return ret;
-  }
+inline typename std::enable_if<isGridTensor<vtype>::notvalue, iScalar<vtype> >::type 
+transpose(iScalar<vtype> arg)
+{
+  iScalar<vtype> ret;
+  ret._internal = arg._internal; // NB recursion stops
+  return ret;
+}


 ////////////////////////////////////////////////////////////////////////////////////////////
@@ -88,14 +87,14 @@ template<class vtype>
 ////////////////////////////////////////////////////////////////////////////////////////////
 #if 0
 template<int Level,class vtype,int N> inline 
-  typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,Level>::value, iMatrix<vtype,N> >::type 
+typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,Level>::value, iMatrix<vtype,N> >::type 
 transposeIndex (const iMatrix<vtype,N> &arg)
 {
  iMatrix<vtype,N> ret;
  for(int i=0;i<N;i++){
    for(int j=0;j<N;j++){
      ret._internal[i][j] = arg._internal[j][i]; 
-  }}
+    }}
  return ret;
 }
 // or not
@@ -107,7 +106,7 @@ transposeIndex (const iMatrix<vtype,N> &arg)
  for(int i=0;i<N;i++){
    for(int j=0;j<N;j++){
      ret._internal[i][j] = transposeIndex<Level>(arg._internal[i][j]); 
-  }}
+    }}
  return ret;
 }
 template<int Level,class vtype> inline 
@@ -126,5 +125,6 @@ transposeIndex (const iScalar<vtype> &arg)
 }
 #endif

-}
+NAMESPACE_END(Grid);
+
 #endif
--- a/lib/tensors/Tensor_unary.h
+++ b/lib/tensors/Tensor_unary.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -25,62 +25,63 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_TENSOR_UNARY_H
 #define GRID_TENSOR_UNARY_H
-namespace Grid {

-#define UNARY(func)\
-template<class obj> inline auto func(const iScalar<obj> &z) -> iScalar<obj>\
-{\
-    iScalar<obj> ret;\
-    ret._internal = func( (z._internal));\
-    return ret;\
-}\
-template<class obj,int N> inline auto func(const iVector<obj,N> &z) -> iVector<obj,N>\
-{\
-    iVector<obj,N> ret;\
-    for(int c1=0;c1<N;c1++){\
-      ret._internal[c1] = func( (z._internal[c1]));\
-    }\
-    return ret;\
-}\
-template<class obj,int N> inline auto func(const iMatrix<obj,N> &z) -> iMatrix<obj,N>\
-{\
-    iMatrix<obj,N> ret;\
-    for(int c1=0;c1<N;c1++){\
-    for(int c2=0;c2<N;c2++){\
-      ret._internal[c1][c2] = func( (z._internal[c1][c2]));\
-    }}\
-    return ret;\
-}
+NAMESPACE_BEGIN(Grid);
+
+#define UNARY(func)							\
+  template<class obj> inline auto func(const iScalar<obj> &z) -> iScalar<obj> \
+  {									\
+    iScalar<obj> ret;							\
+    ret._internal = func( (z._internal));				\
+    return ret;								\
+  }									\
+  template<class obj,int N> inline auto func(const iVector<obj,N> &z) -> iVector<obj,N>	\
+  {									\
+    iVector<obj,N> ret;							\
+    for(int c1=0;c1<N;c1++){						\
+      ret._internal[c1] = func( (z._internal[c1]));			\
+    }									\
+    return ret;								\
+  }									\
+  template<class obj,int N> inline auto func(const iMatrix<obj,N> &z) -> iMatrix<obj,N>	\
+  {									\
+    iMatrix<obj,N> ret;							\
+    for(int c1=0;c1<N;c1++){						\
+      for(int c2=0;c2<N;c2++){						\
+	ret._internal[c1][c2] = func( (z._internal[c1][c2]));		\
+      }}								\
+    return ret;								\
+  }


 #define BINARY_RSCALAR(func,scal)					\
-template<class obj> inline iScalar<obj> func(const iScalar<obj> &z,scal y)	\
-{\
-    iScalar<obj> ret;\
-    ret._internal = func(z._internal,y);	\
-    return ret;\
-}\
- template<class obj,int N> inline iVector<obj,N> func(const iVector<obj,N> &z,scal y) \
-{\
-    iVector<obj,N> ret;\
-    for(int c1=0;c1<N;c1++){\
-      ret._internal[c1] = func(z._internal[c1],y);	\
-    }\
-    return ret;\
-}\
- template<class obj,int N> inline  iMatrix<obj,N> func(const iMatrix<obj,N> &z, scal y) \
-{\
-    iMatrix<obj,N> ret;\
-    for(int c1=0;c1<N;c1++){\
-    for(int c2=0;c2<N;c2++){\
-      ret._internal[c1][c2] = func(z._internal[c1][c2],y);	\
-    }}\
-    return ret;\
-}
+  template<class obj> inline iScalar<obj> func(const iScalar<obj> &z,scal y) \
+  {									\
+    iScalar<obj> ret;							\
+    ret._internal = func(z._internal,y);				\
+    return ret;								\
+  }									\
+  template<class obj,int N> inline iVector<obj,N> func(const iVector<obj,N> &z,scal y) \
+  {									\
+    iVector<obj,N> ret;							\
+    for(int c1=0;c1<N;c1++){						\
+      ret._internal[c1] = func(z._internal[c1],y);			\
+    }									\
+    return ret;								\
+  }									\
+  template<class obj,int N> inline  iMatrix<obj,N> func(const iMatrix<obj,N> &z, scal y) \
+  {									\
+    iMatrix<obj,N> ret;							\
+    for(int c1=0;c1<N;c1++){						\
+      for(int c2=0;c2<N;c2++){						\
+	ret._internal[c1][c2] = func(z._internal[c1][c2],y);		\
+      }}								\
+    return ret;								\
+  }

 UNARY(sqrt);
 UNARY(rsqrt);
@@ -100,7 +101,7 @@ template<class obj> inline auto toReal(const iScalar<obj> &z) -> typename iScala
  ret._internal = toReal(z._internal);
  return ret;
 }
- template<class obj,int N> inline auto toReal(const iVector<obj,N> &z) -> typename iVector<obj,N>::Realified
+template<class obj,int N> inline auto toReal(const iVector<obj,N> &z) -> typename iVector<obj,N>::Realified
 {
  typename iVector<obj,N>::Realified ret;
  for(int c1=0;c1<N;c1++){  
@@ -112,9 +113,9 @@ template<class obj,int N> inline auto toReal(const iMatrix<obj,N> &z) -> typenam
 {
  typename iMatrix<obj,N>::Realified ret;
  for(int c1=0;c1<N;c1++){
-  for(int c2=0;c2<N;c2++){
-    ret._internal[c1][c2] = toReal(z._internal[c1][c2]);
-  }}
+    for(int c2=0;c2<N;c2++){
+      ret._internal[c1][c2] = toReal(z._internal[c1][c2]);
+    }}
  return ret;
 }

@@ -124,7 +125,7 @@ template<class obj> inline auto toComplex(const iScalar<obj> &z) -> typename iSc
  ret._internal = toComplex(z._internal);
  return ret;
 }
- template<class obj,int N> inline auto toComplex(const iVector<obj,N> &z) -> typename iVector<obj,N>::Complexified
+template<class obj,int N> inline auto toComplex(const iVector<obj,N> &z) -> typename iVector<obj,N>::Complexified
 {
  typename iVector<obj,N>::Complexified ret;
  for(int c1=0;c1<N;c1++){  
@@ -136,9 +137,9 @@ template<class obj,int N> inline auto toComplex(const iMatrix<obj,N> &z) -> type
 {
  typename iMatrix<obj,N>::Complexified ret;
  for(int c1=0;c1<N;c1++){
-  for(int c2=0;c2<N;c2++){
-    ret._internal[c1][c2] = toComplex(z._internal[c1][c2]);
-  }}
+    for(int c2=0;c2<N;c2++){
+      ret._internal[c1][c2] = toComplex(z._internal[c1][c2]);
+    }}
  return ret;
 }

@@ -149,6 +150,6 @@ BINARY_RSCALAR(pow,RealD);
 #undef UNARY
 #undef BINARY_RSCALAR

+NAMESPACE_END(Grid);

-}
 #endif
--- a/lib/tensors/Tensors.h
+++ b/lib/tensors/Tensors.h
@@ -1,4 +1,4 @@
-    /*************************************************************************************
+/*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -25,8 +25,8 @@ Author: neo <cossu@post.kek.jp>
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_H
 #define GRID_MATH_H