Merge branch 'develop' of https://github.com/paboyle/Grid into develop

2026-01-09 03:19:34 +00:00 · 2016-07-15 19:26:06 +01:00
parent da34d75841 8b9301a74c
commit f4dd5062d7
62 changed files with 12722 additions and 2805 deletions
--- a/lib/tensors/Tensor_class.h
+++ b/lib/tensors/Tensor_class.h
@@ -1,31 +1,32 @@
-    /*************************************************************************************
+/*************************************************************************************

-    Grid physics library, www.github.com/paboyle/Grid 
+Grid physics library, www.github.com/paboyle/Grid

-    Source file: ./lib/tensors/Tensor_class.h
+Source file: ./lib/tensors/Tensor_class.h

-    Copyright (C) 2015
+Copyright (C) 2015

 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>

-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.

-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.

-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_MATH_TENSORS_H
 #define GRID_MATH_TENSORS_H

@@ -38,17 +39,18 @@ namespace Grid {

 // It is useful to NOT have any constructors
 // so that these classes assert "is_pod<class> == true"
-// because then the standard C++ valarray container eliminates fill overhead on new allocation and 
+// because then the standard C++ valarray container eliminates fill overhead on
+// new allocation and
 // non-move copying.
 //
-// However note that doing this eliminates some syntactical sugar such as 
+// However note that doing this eliminates some syntactical sugar such as
 // calling the constructor explicitly or implicitly
 //
 class GridTensorBase {};

-template<class vtype> class iScalar 
-{
-public:
+template <class vtype>
+class iScalar {
+ public:
  vtype _internal;

  typedef vtype element;
@@ -60,13 +62,14 @@ public:
  typedef iScalar<recurse_scalar_object> scalar_object;

  // substitutes a real or complex version with same tensor structure
-  typedef iScalar<typename GridTypeMapper<vtype>::Complexified > Complexified;
-  typedef iScalar<typename GridTypeMapper<vtype>::Realified >    Realified;
+  typedef iScalar<typename GridTypeMapper<vtype>::Complexified> Complexified;
+  typedef iScalar<typename GridTypeMapper<vtype>::Realified> Realified;

-  enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
+  enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };

  // Scalar no action
-  //  template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
+  //  template<int Level> using tensor_reduce_level = typename
+  //  iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
  iScalar() = default;
  /*
  iScalar(const iScalar<vtype> &copyme)=default;
@@ -74,15 +77,17 @@ public:
  iScalar<vtype> & operator= (const iScalar<vtype> &copyme) = default;
  iScalar<vtype> & operator= (iScalar<vtype> &&copyme) = default;
  */
-  iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
-  iScalar(const Zero &z){ *this = zero; };
+  iScalar(scalar_type s)
+      : _internal(s){};  // recurse down and hit the constructor for vector_type
+  iScalar(const Zero &z) { *this = zero; };

-  iScalar<vtype> & operator= (const Zero &hero){
+  iScalar<vtype> &operator=(const Zero &hero) {
    zeroit(*this);
    return *this;
  }
-  friend strong_inline void vstream(iScalar<vtype> &out,const iScalar<vtype> &in){
-    vstream(out._internal,in._internal);
+  friend strong_inline void vstream(iScalar<vtype> &out,
+                                    const iScalar<vtype> &in) {
+    vstream(out._internal, in._internal);
  }
  friend strong_inline void vbroadcast(iScalar<vtype> &out,const iScalar<vtype> &in,int lane){
    vbroadcast(out._internal,in._internal,lane);
@@ -90,73 +95,94 @@ public:
  friend strong_inline void zeroit(iScalar<vtype> &that){
    zeroit(that._internal);
  }
-  friend strong_inline void prefetch(iScalar<vtype> &that){
+  friend strong_inline void prefetch(iScalar<vtype> &that) {
    prefetch(that._internal);
  }
-  friend strong_inline void permute(iScalar<vtype> &out,const iScalar<vtype> &in,int permutetype){
-    permute(out._internal,in._internal,permutetype);
+  friend strong_inline void permute(iScalar<vtype> &out,
+                                    const iScalar<vtype> &in, int permutetype) {
+    permute(out._internal, in._internal, permutetype);
  }
  friend strong_inline void rotate(iScalar<vtype> &out,const iScalar<vtype> &in,int rot){
    rotate(out._internal,in._internal,rot);
  }

  // Unary negation
-  friend strong_inline iScalar<vtype> operator -(const iScalar<vtype> &r) {
+  friend strong_inline iScalar<vtype> operator-(const iScalar<vtype> &r) {
    iScalar<vtype> ret;
-    ret._internal= -r._internal;
+    ret._internal = -r._internal;
    return ret;
  }
  // *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
-  strong_inline iScalar<vtype> &operator *=(const iScalar<vtype> &r) {
-    *this = (*this)*r;
+  strong_inline iScalar<vtype> &operator*=(const iScalar<vtype> &r) {
+    *this = (*this) * r;
    return *this;
  }
-  strong_inline iScalar<vtype> &operator -=(const iScalar<vtype> &r) {
-    *this = (*this)-r;
+  strong_inline iScalar<vtype> &operator-=(const iScalar<vtype> &r) {
+    *this = (*this) - r;
    return *this;
  }
-  strong_inline iScalar<vtype> &operator +=(const iScalar<vtype> &r) {
-    *this = (*this)+r;
+  strong_inline iScalar<vtype> &operator+=(const iScalar<vtype> &r) {
+    *this = (*this) + r;
    return *this;
  }
-  strong_inline vtype & operator ()(void) {
-    return _internal;
-  }
-  strong_inline const vtype & operator ()(void) const {
-    return _internal;
-  }
+  strong_inline vtype &operator()(void) { return _internal; }
+  strong_inline const vtype &operator()(void) const { return _internal; }

  // Type casts meta programmed, must be pure scalar to match TensorRemove
-  template<class U=vtype,class V=scalar_type,IfComplex<V> = 0,IfNotSimd<U> = 0> operator ComplexF () const { return(TensorRemove(_internal)); };
-  template<class U=vtype,class V=scalar_type,IfComplex<V> = 0,IfNotSimd<U> = 0> operator ComplexD () const { return(TensorRemove(_internal)); };
-  //  template<class U=vtype,class V=scalar_type,IfComplex<V> = 0,IfNotSimd<U> = 0> operator RealD    () const { return(real(TensorRemove(_internal))); }
-  template<class U=vtype,class V=scalar_type,IfReal<V>    = 0,IfNotSimd<U> = 0> operator RealD    () const { return TensorRemove(_internal); }
-  template<class U=vtype,class V=scalar_type,IfInteger<V> = 0,IfNotSimd<U> = 0> operator Integer  () const { return Integer(TensorRemove(_internal)); }
-  
-  // convert from a something to a scalar via constructor of something arg
-  template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline iScalar<vtype> operator = (T arg)
-    { 
-      _internal = arg;
-      return *this;
-    }
+  template <class U = vtype, class V = scalar_type, IfComplex<V> = 0,
+            IfNotSimd<U> = 0>
+  operator ComplexF() const {
+    return (TensorRemove(_internal));
+  };
+  template <class U = vtype, class V = scalar_type, IfComplex<V> = 0,
+            IfNotSimd<U> = 0>
+  operator ComplexD() const {
+    return (TensorRemove(_internal));
+  };
+  //  template<class U=vtype,class V=scalar_type,IfComplex<V> = 0,IfNotSimd<U> =
+  //  0> operator RealD    () const { return(real(TensorRemove(_internal))); }
+  template <class U = vtype, class V = scalar_type, IfReal<V> = 0,
+            IfNotSimd<U> = 0>
+  operator RealD() const {
+    return TensorRemove(_internal);
+  }
+  template <class U = vtype, class V = scalar_type, IfInteger<V> = 0,
+            IfNotSimd<U> = 0>
+  operator Integer() const {
+    return Integer(TensorRemove(_internal));
+  }

-    friend std::ostream& operator<< (std::ostream& stream, const iScalar<vtype> &o){
-      stream<< "S {"<<o._internal<<"}";
-      return stream;
-    };
+  // convert from a something to a scalar via constructor of something arg
+  template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type
+                         * = nullptr>
+  strong_inline iScalar<vtype> operator=(T arg) {
+    _internal = arg;
+    return *this;
+  }
+
+  friend std::ostream &operator<<(std::ostream &stream,
+                                  const iScalar<vtype> &o) {
+    stream << "S {" << o._internal << "}";
+    return stream;
+  };
 };
 ///////////////////////////////////////////////////////////
 // Allows to turn scalar<scalar<scalar<double>>>> back to double.
 ///////////////////////////////////////////////////////////
-template<class T>     strong_inline typename std::enable_if<!isGridTensor<T>::value, T>::type TensorRemove(T arg) { return arg;}
-template<class vtype> strong_inline auto TensorRemove(iScalar<vtype> arg) -> decltype(TensorRemove(arg._internal))
-{
+template <class T>
+strong_inline typename std::enable_if<!isGridTensor<T>::value, T>::type
+TensorRemove(T arg) {
+  return arg;
+}
+template <class vtype>
+strong_inline auto TensorRemove(iScalar<vtype> arg)
+    -> decltype(TensorRemove(arg._internal)) {
  return TensorRemove(arg._internal);
 }
-    
-template<class vtype,int N> class iVector 
-{
-public:
+
+template <class vtype, int N>
+class iVector {
+ public:
  vtype _internal[N];

  typedef vtype element;
@@ -165,23 +191,23 @@ public:
  typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
  typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
  typedef iScalar<tensor_reduced_v> tensor_reduced;
-  typedef iVector<recurse_scalar_object,N> scalar_object;
+  typedef iVector<recurse_scalar_object, N> scalar_object;

  // substitutes a real or complex version with same tensor structure
-  typedef iVector<typename GridTypeMapper<vtype>::Complexified,N > Complexified;
-  typedef iVector<typename GridTypeMapper<vtype>::Realified,N >    Realified;
+  typedef iVector<typename GridTypeMapper<vtype>::Complexified, N> Complexified;
+  typedef iVector<typename GridTypeMapper<vtype>::Realified, N> Realified;

-  template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iVector<vtype,N>
-    { 
-      zeroit(*this);
-      for(int i=0;i<N;i++)
-	_internal[i] = arg;
-      return *this;
-    }
+  template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type
+                         * = nullptr>
+  strong_inline auto operator=(T arg) -> iVector<vtype, N> {
+    zeroit(*this);
+    for (int i = 0; i < N; i++) _internal[i] = arg;
+    return *this;
+  }

-  enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
-  iVector(const Zero &z){ *this = zero; };
-  iVector() =default;
+  enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };
+  iVector(const Zero &z) { *this = zero; };
+  iVector() = default;
  /*
  iVector(const iVector<vtype,N> &copyme)=default;
  iVector(iVector<vtype,N> &&copyme)=default;
@@ -189,23 +215,25 @@ public:
  iVector<vtype,N> & operator= (iVector<vtype,N> &&copyme) = default;
  */

-  iVector<vtype,N> & operator= (const Zero &hero){
+  iVector<vtype, N> &operator=(const Zero &hero) {
    zeroit(*this);
    return *this;
  }
-  friend strong_inline void zeroit(iVector<vtype,N> &that){
-    for(int i=0;i<N;i++){
+  friend strong_inline void zeroit(iVector<vtype, N> &that) {
+    for (int i = 0; i < N; i++) {
      zeroit(that._internal[i]);
    }
  }
-  friend strong_inline void prefetch(iVector<vtype,N> &that){
-    for(int i=0;i<N;i++) prefetch(that._internal[i]);
+  friend strong_inline void prefetch(iVector<vtype, N> &that) {
+    for (int i = 0; i < N; i++) prefetch(that._internal[i]);
  }
-  friend strong_inline void vstream(iVector<vtype,N> &out,const iVector<vtype,N> &in){
-    for(int i=0;i<N;i++){
-      vstream(out._internal[i],in._internal[i]);
+  friend strong_inline void vstream(iVector<vtype, N> &out,
+                                    const iVector<vtype, N> &in) {
+    for (int i = 0; i < N; i++) {
+      vstream(out._internal[i], in._internal[i]);
    }
  }
+<<<<<<< HEAD
  friend strong_inline void vbroadcast(iVector<vtype,N> &out,const iVector<vtype,N> &in,int lane){
    for(int i=0;i<N;i++){
      vbroadcast(out._internal[i],in._internal[i],lane);
@@ -214,6 +242,13 @@ public:
  friend strong_inline void permute(iVector<vtype,N> &out,const iVector<vtype,N> &in,int permutetype){
    for(int i=0;i<N;i++){
      permute(out._internal[i],in._internal[i],permutetype);
+=======
+  friend strong_inline void permute(iVector<vtype, N> &out,
+                                    const iVector<vtype, N> &in,
+                                    int permutetype) {
+    for (int i = 0; i < N; i++) {
+      permute(out._internal[i], in._internal[i], permutetype);
+>>>>>>> 8b9301a74cde2f28a321baf4bca854d950585b56
    }
  }
  friend strong_inline void rotate(iVector<vtype,N> &out,const iVector<vtype,N> &in,int rot){
@@ -223,47 +258,44 @@ public:
  }

  // Unary negation
-  friend strong_inline iVector<vtype,N> operator -(const iVector<vtype,N> &r) {
-    iVector<vtype,N> ret;
-    for(int i=0;i<N;i++) ret._internal[i]= -r._internal[i];
+  friend strong_inline iVector<vtype, N> operator-(const iVector<vtype, N> &r) {
+    iVector<vtype, N> ret;
+    for (int i = 0; i < N; i++) ret._internal[i] = -r._internal[i];
    return ret;
  }
  // *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
-  strong_inline iVector<vtype,N> &operator *=(const iScalar<vtype> &r) {
-    *this = (*this)*r;
+  strong_inline iVector<vtype, N> &operator*=(const iScalar<vtype> &r) {
+    *this = (*this) * r;
    return *this;
  }
-  strong_inline iVector<vtype,N> &operator -=(const iVector<vtype,N> &r) {
-    *this = (*this)-r;
+  strong_inline iVector<vtype, N> &operator-=(const iVector<vtype, N> &r) {
+    *this = (*this) - r;
    return *this;
  }
-  strong_inline iVector<vtype,N> &operator +=(const iVector<vtype,N> &r) {
-    *this = (*this)+r;
+  strong_inline iVector<vtype, N> &operator+=(const iVector<vtype, N> &r) {
+    *this = (*this) + r;
    return *this;
  }
-  strong_inline vtype & operator ()(int i) {
-    return _internal[i];
-  }
-  strong_inline const vtype & operator ()(int i) const {
-    return _internal[i];
-  }
-  friend std::ostream& operator<< (std::ostream& stream, const iVector<vtype,N> &o){
-    stream<< "V<"<<N<<">{";
-    for(int i=0;i<N;i++) {
-      stream<<o._internal[i];
-      if (i<N-1)	stream<<",";
+  strong_inline vtype &operator()(int i) { return _internal[i]; }
+  strong_inline const vtype &operator()(int i) const { return _internal[i]; }
+  friend std::ostream &operator<<(std::ostream &stream,
+                                  const iVector<vtype, N> &o) {
+    stream << "V<" << N << ">{";
+    for (int i = 0; i < N; i++) {
+      stream << o._internal[i];
+      if (i < N - 1) stream << ",";
    }
-    stream<<"}";
+    stream << "}";
    return stream;
  };
  //    strong_inline vtype && operator ()(int i) {
  //      return _internal[i];
  //    }
 };
-    
-template<class vtype,int N> class iMatrix 
-{
-public:
+
+template <class vtype, int N>
+class iMatrix {
+ public:
  vtype _internal[N][N];

  typedef vtype element;
@@ -273,29 +305,27 @@ public:
  typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;

  // substitutes a real or complex version with same tensor structure
-  typedef iMatrix<typename GridTypeMapper<vtype>::Complexified,N > Complexified;
-  typedef iMatrix<typename GridTypeMapper<vtype>::Realified,N >    Realified;
+  typedef iMatrix<typename GridTypeMapper<vtype>::Complexified, N> Complexified;
+  typedef iMatrix<typename GridTypeMapper<vtype>::Realified, N> Realified;

  // Tensure removal
  typedef iScalar<tensor_reduced_v> tensor_reduced;
-  typedef iMatrix<recurse_scalar_object,N> scalar_object;
+  typedef iMatrix<recurse_scalar_object, N> scalar_object;

-  enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
+  enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };

+  iMatrix(const Zero &z) { *this = zero; };
+  iMatrix() = default;

-  iMatrix(const Zero &z){ *this = zero; };
-  iMatrix() =default;
-  
-  iMatrix& operator=(const iMatrix& rhs){
-    for(int i=0;i<N;i++)
-      for(int j=0;j<N;j++)
-	vstream(_internal[i][j],rhs._internal[i][j]);
+  iMatrix &operator=(const iMatrix &rhs) {
+    for (int i = 0; i < N; i++)
+      for (int j = 0; j < N; j++) vstream(_internal[i][j], rhs._internal[i][j]);
    return *this;
-  }; 
-  
- 
+  };

-  iMatrix(scalar_type s)  { (*this) = s ;};// recurse down and hit the constructor for vector_type
+  iMatrix(scalar_type s) {
+    (*this) = s;
+  };  // recurse down and hit the constructor for vector_type

  /*
  iMatrix(const iMatrix<vtype,N> &copyme)=default;
@@ -304,20 +334,19 @@ public:
  iMatrix<vtype,N> & operator= (iMatrix<vtype,N> &&copyme) = default;
  */

-
-
-  iMatrix<vtype,N> & operator= (const Zero &hero){
+  iMatrix<vtype, N> &operator=(const Zero &hero) {
    zeroit(*this);
    return *this;
  }
-  template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iMatrix<vtype,N>
-    { 
-      zeroit(*this);
-      for(int i=0;i<N;i++)
-	_internal[i][i] = arg;
-      return *this;
-    }
+  template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type
+                         * = nullptr>
+  strong_inline auto operator=(T arg) -> iMatrix<vtype, N> {
+    zeroit(*this);
+    for (int i = 0; i < N; i++) _internal[i][i] = arg;
+    return *this;
+  }

+<<<<<<< HEAD
  friend strong_inline void zeroit(iMatrix<vtype,N> &that){
    for(int i=0;i<N;i++){
      for(int j=0;j<N;j++){
@@ -347,6 +376,26 @@ public:
      for(int j=0;j<N;j++){
 	permute(out._internal[i][j],in._internal[i][j],permutetype);
    }}
+=======
+  friend strong_inline void zeroit(iMatrix<vtype, N> &that) {
+    for (int i = 0; i < N; i++) {
+      for (int j = 0; j < N; j++) {
+        zeroit(that._internal[i][j]);
+      }
+    }
+  }
+  friend strong_inline void prefetch(iMatrix<vtype, N> &that) {
+    for (int i = 0; i < N; i++)
+      for (int j = 0; j < N; j++) prefetch(that._internal[i][j]);
+  }
+  friend strong_inline void vstream(iMatrix<vtype, N> &out,
+                                    const iMatrix<vtype, N> &in) {
+    for (int i = 0; i < N; i++) {
+      for (int j = 0; j < N; j++) {
+        vstream(out._internal[i][j], in._internal[i][j]);
+      }
+    }
+>>>>>>> 8b9301a74cde2f28a321baf4bca854d950585b56
  }
  friend strong_inline void rotate(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int rot){
    for(int i=0;i<N;i++){
@@ -355,79 +404,86 @@ public:
    }}
  }

+  friend strong_inline void permute(iMatrix<vtype, N> &out,
+                                    const iMatrix<vtype, N> &in,
+                                    int permutetype) {
+    for (int i = 0; i < N; i++) {
+      for (int j = 0; j < N; j++) {
+        permute(out._internal[i][j], in._internal[i][j], permutetype);
+      }
+    }
+  }

  // Unary negation
-  friend strong_inline iMatrix<vtype,N> operator -(const iMatrix<vtype,N> &r) {
-    iMatrix<vtype,N> ret;
-    for(int i=0;i<N;i++){
-      for(int j=0;j<N;j++){
-	ret._internal[i][j]= -r._internal[i][j];
-    }}
+  friend strong_inline iMatrix<vtype, N> operator-(const iMatrix<vtype, N> &r) {
+    iMatrix<vtype, N> ret;
+    for (int i = 0; i < N; i++) {
+      for (int j = 0; j < N; j++) {
+        ret._internal[i][j] = -r._internal[i][j];
+      }
+    }
    return ret;
  }
  // *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
-  template<class T>
-  strong_inline iMatrix<vtype,N> &operator *=(const T &r) {
-    *this = (*this)*r;
+  template <class T>
+  strong_inline iMatrix<vtype, N> &operator*=(const T &r) {
+    *this = (*this) * r;
    return *this;
  }
-  template<class T>
-  strong_inline iMatrix<vtype,N> &operator -=(const T &r) {
-    *this = (*this)-r;
+  template <class T>
+  strong_inline iMatrix<vtype, N> &operator-=(const T &r) {
+    *this = (*this) - r;
    return *this;
  }
-  template<class T>
-  strong_inline iMatrix<vtype,N> &operator +=(const T &r) {
-    *this = (*this)+r;
+  template <class T>
+  strong_inline iMatrix<vtype, N> &operator+=(const T &r) {
+    *this = (*this) + r;
    return *this;
  }

  // returns an lvalue reference
-  strong_inline vtype & operator ()(int i,int j) {
+  strong_inline vtype &operator()(int i, int j) { return _internal[i][j]; }
+  strong_inline const vtype &operator()(int i, int j) const {
    return _internal[i][j];
  }
-  strong_inline const vtype & operator ()(int i,int j) const {
-    return _internal[i][j];
-  }
-  friend std::ostream& operator<< (std::ostream& stream, const iMatrix<vtype,N> &o){
-    stream<< "M<"<<N<<">{";
-    for(int i=0;i<N;i++) {
-      stream<< "{";
-      for(int j=0;j<N;j++) {
-	stream<<o._internal[i][j];
-	if (i<N-1)	stream<<",";
+  friend std::ostream &operator<<(std::ostream &stream,
+                                  const iMatrix<vtype, N> &o) {
+    stream << "M<" << N << ">{";
+    for (int i = 0; i < N; i++) {
+      stream << "{";
+      for (int j = 0; j < N; j++) {
+        stream << o._internal[i][j];
+        if (i < N - 1) stream << ",";
      }
-      stream<<"}";
-      if(i!=N-1) stream<<"\n\t\t";
+      stream << "}";
+      if (i != N - 1) stream << "\n\t\t";
    }
-    stream<<"}";
+    stream << "}";
    return stream;
  };

  //  strong_inline vtype && operator ()(int i,int j) {
  //    return _internal[i][j];
  //  }
-
 };

-template<class v> void vprefetch(const iScalar<v> &vv)
-{
+template <class v>
+void vprefetch(const iScalar<v> &vv) {
  vprefetch(vv._internal);
 }
-template<class v,int N> void vprefetch(const iVector<v,N> &vv)
-{
-  for(int i=0;i<N;i++){
+template <class v, int N>
+void vprefetch(const iVector<v, N> &vv) {
+  for (int i = 0; i < N; i++) {
    vprefetch(vv._internal[i]);
  }
 }
-template<class v,int N> void vprefetch(const iMatrix<v,N> &vv)
-{
-  for(int i=0;i<N;i++){
-  for(int j=0;j<N;j++){
-    vprefetch(vv._internal[i][j]);
-  }}
+template <class v, int N>
+void vprefetch(const iMatrix<v, N> &vv) {
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < N; j++) {
+      vprefetch(vv._internal[i][j]);
+    }
+  }
 }
-
-
 }
 #endif
--- a/lib/tensors/Tensor_extract_merge.h
+++ b/lib/tensors/Tensor_extract_merge.h
@@ -10,6 +10,7 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: neo <cossu@post.kek.jp>
 Author: paboyle <paboyle@ph.ed.ac.uk>
+Author: Christopher Kelly <ckelly@phys.columbia.edu>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -167,6 +168,33 @@ void extract(const vobj &vec,std::vector<typename vobj::scalar_object *> &extrac
  }
 }

+////////////////////////////////////////////////////////////////////////
+// Extract to a bunch of scalar object pointers of different scalar type, with offset. Useful for precision change
+////////////////////////////////////////////////////////////////////////
+template<class vobj, class sobj> inline 
+void extract1(const vobj &vec,std::vector<sobj*> &extracted, int offset)
+{
+  typedef typename vobj::scalar_type vobj_scalar_type ;
+  typedef typename vobj::vector_type vobj_vector_type ;
+
+  typedef typename sobj::scalar_type sobj_scalar_type ;
+  
+  static const int words=sizeof(vobj)/sizeof(vobj_vector_type);
+  static const int Nsimd=vobj_vector_type::Nsimd();
+
+  int Nextr=extracted.size();
+  int s = Nsimd/Nextr;
+  vobj_scalar_type * vp = (vobj_scalar_type *)&vec;
+
+  for(int w=0;w<words;w++){
+    for(int i=0;i<Nextr;i++){
+      sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
+      pointer[w] = vp[i*s+w*Nsimd];
+    }
+  }
+}
+
+  
 ////////////////////////////////////////////////////////////////////////
 // Merge a contiguous array of scalar objects
 ////////////////////////////////////////////////////////////////////////
--- a/lib/tensors/Tensor_index.h
+++ b/lib/tensors/Tensor_index.h
@@ -146,14 +146,14 @@ class TensorIndexRecursion {
    }

  template<class vtype,int N> inline static 
-    void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0)),N> &arg, int i)
+    void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0)),N> &arg, int i)
    {
      for(int ii=0;ii<N;ii++){
 	TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii],arg._internal[ii],i);
      }
    }
  template<class vtype,int N> inline static 
-    void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0)),N> &arg, int i,int j)
+    void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0)),N> &arg, int i,int j)
    {
      for(int ii=0;ii<N;ii++){
 	TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii],arg._internal[ii],i,j);
@@ -161,7 +161,7 @@ class TensorIndexRecursion {
    }

  template<class vtype,int N> inline static 
-    void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0)),N> &arg, int i)
+    void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0)),N> &arg, int i)
    {
      for(int ii=0;ii<N;ii++){
      for(int jj=0;jj<N;jj++){
@@ -169,7 +169,7 @@ class TensorIndexRecursion {
      }}
    }
  template<class vtype,int N> inline static 
-    void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0)),N> &arg, int i,int j)
+    void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0)),N> &arg, int i,int j)
    {
      for(int ii=0;ii<N;ii++){
      for(int jj=0;jj<N;jj++){
--- a/lib/tensors/Tensor_traits.h
+++ b/lib/tensors/Tensor_traits.h
@@ -8,6 +8,7 @@

 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: Christopher Kelly <ckelly@phys.columbia.edu>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -230,6 +231,35 @@ namespace Grid {
    static const bool value = true;
  };

+  //Get the SIMD vector type from a Grid tensor or Lattice<Tensor>
+  template<typename T>
+  struct getVectorType{
+    typedef T type;
+  };
+  
+  //Query if a tensor or Lattice<Tensor> is SIMD vector or scalar
+  template<typename T>
+  class isSIMDvectorized{
+    template<typename U>
+    static typename std::enable_if< !std::is_same< typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,   typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, char>::type test(void *);
+
+    template<typename U>
+    static double test(...);
+  
+  public:
+    enum {value = sizeof(test<T>(0)) == sizeof(char) };
+  };
+  
+  //Get the precision of a Lattice, tensor or scalar type in units of sizeof(float)
+  template<typename T>
+  class getPrecision{
+    typedef typename getVectorType<T>::type vector_obj; //get the vector_obj (i.e. a grid Tensor) if its a Lattice<vobj>, do nothing otherwise (i.e. if fundamental or grid Tensor)
+  
+    typedef typename GridTypeMapper<vector_obj>::scalar_type scalar_type; //get the associated scalar type. Works on fundamental and tensor types
+    typedef typename GridTypeMapper<scalar_type>::Realified real_scalar_type; //remove any std::complex wrapper, should get us to the fundamental type
+  public:
+    enum { value = sizeof(real_scalar_type)/sizeof(float) };
+  };
 }

 #endif
--- a/lib/tensors/Tensor_unary.h
+++ b/lib/tensors/Tensor_unary.h
@@ -86,6 +86,8 @@ UNARY(sqrt);
 UNARY(rsqrt);
 UNARY(sin);
 UNARY(cos);
+UNARY(asin);
+UNARY(acos);
 UNARY(log);
 UNARY(exp);
 UNARY(abs);