Merge branch 'develop' into feature/hmc_generalise

2026-01-03 11:19:28 +00:00 · 2017-05-01 12:13:56 +01:00
parent 8c540333d5 99220f6531
commit 3344788fa1
69 changed files with 3971 additions and 3179 deletions
--- a/lib/tensors/Tensor_class.h
+++ b/lib/tensors/Tensor_class.h
@@ -1,9 +1,6 @@
 /*************************************************************************************
-
 Grid physics library, www.github.com/paboyle/Grid
-
 Source file: ./lib/tensors/Tensor_class.h
-
 Copyright (C) 2015

 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
@@ -13,16 +10,13 @@ This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
-
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
-
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
@@ -56,18 +50,18 @@ class iScalar {
  typedef vtype element;
  typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
  typedef typename GridTypeMapper<vtype>::vector_type vector_type;
+  typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
  typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
-  typedef iScalar<tensor_reduced_v> tensor_reduced;
  typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
+  typedef iScalar<tensor_reduced_v> tensor_reduced;
  typedef iScalar<recurse_scalar_object> scalar_object;
-
  // substitutes a real or complex version with same tensor structure
  typedef iScalar<typename GridTypeMapper<vtype>::Complexified> Complexified;
  typedef iScalar<typename GridTypeMapper<vtype>::Realified> Realified;

  // get double precision version
  typedef iScalar<typename GridTypeMapper<vtype>::DoublePrecision> DoublePrecision;
-
+  
  enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };

  // Scalar no action
@@ -80,29 +74,18 @@ class iScalar {
  iScalar<vtype> & operator= (const iScalar<vtype> &copyme) = default;
  iScalar<vtype> & operator= (iScalar<vtype> &&copyme) = default;
  */
-  iScalar(scalar_type s)
-      : _internal(s){};  // recurse down and hit the constructor for vector_type
+
+  //  template<int N=0>
+  //  iScalar(EnableIf<isSIMDvectorized<vector_type>, vector_type> s) : _internal(s){};  // recurse down and hit the constructor for vector_type
+
+  iScalar(scalar_type s) : _internal(s){};  // recurse down and hit the constructor for vector_type
+
  iScalar(const Zero &z) { *this = zero; };

  iScalar<vtype> &operator=(const Zero &hero) {
    zeroit(*this);
    return *this;
  }
-
-
-  // managing the internal vector structure
-  strong_inline scalar_object getlane(int lane){
-    scalar_object ret;
-    ret._internal = _internal.getlane(lane);
-    return ret;
-  }
-
-  strong_inline void putlane(scalar_object &s, int lane){
-    _internal.putlane(s._internal,lane);
-  }
-
-
-  
  friend strong_inline void vstream(iScalar<vtype> &out,
                                    const iScalar<vtype> &in) {
    vstream(out._internal, in._internal);
@@ -152,42 +135,38 @@ class iScalar {
  strong_inline const vtype &operator()(void) const { return _internal; }

  // Type casts meta programmed, must be pure scalar to match TensorRemove
-  template <class U = vtype, class V = scalar_type, IfComplex<V> = 0,
-            IfNotSimd<U> = 0>
+  template <class U = vtype, class V = scalar_type, IfComplex<V> = 0, IfNotSimd<U> = 0>
  operator ComplexF() const {
    return (TensorRemove(_internal));
  };
-  template <class U = vtype, class V = scalar_type, IfComplex<V> = 0,
-            IfNotSimd<U> = 0>
+  template <class U = vtype, class V = scalar_type, IfComplex<V> = 0, IfNotSimd<U> = 0>
  operator ComplexD() const {
    return (TensorRemove(_internal));
  };
  //  template<class U=vtype,class V=scalar_type,IfComplex<V> = 0,IfNotSimd<U> =
  //  0> operator RealD    () const { return(real(TensorRemove(_internal))); }
-  template <class U = vtype, class V = scalar_type, IfReal<V> = 0,
-            IfNotSimd<U> = 0>
+  template <class U = vtype, class V = scalar_type, IfReal<V> = 0,IfNotSimd<U> = 0>
  operator RealD() const {
    return TensorRemove(_internal);
  }
-  template <class U = vtype, class V = scalar_type, IfInteger<V> = 0,
-            IfNotSimd<U> = 0>
+  template <class U = vtype, class V = scalar_type, IfInteger<V> = 0, IfNotSimd<U> = 0>
  operator Integer() const {
    return Integer(TensorRemove(_internal));
  }

  // convert from a something to a scalar via constructor of something arg
-  template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type
-                         * = nullptr>
-  strong_inline iScalar<vtype> operator=(T arg) {
+  template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type * = nullptr>
+    strong_inline iScalar<vtype> operator=(T arg) {
    _internal = arg;
    return *this;
  }

-  friend std::ostream &operator<<(std::ostream &stream,
-                                  const iScalar<vtype> &o) {
+  friend std::ostream &operator<<(std::ostream &stream,const iScalar<vtype> &o) {
    stream << "S {" << o._internal << "}";
    return stream;
  };
+
+
 };
 ///////////////////////////////////////////////////////////
 // Allows to turn scalar<scalar<scalar<double>>>> back to double.
@@ -211,6 +190,7 @@ class iVector {
  typedef vtype element;
  typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
  typedef typename GridTypeMapper<vtype>::vector_type vector_type;
+  typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
  typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
  typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
  typedef iScalar<tensor_reduced_v> tensor_reduced;
@@ -222,8 +202,7 @@ class iVector {

  // get double precision version
  typedef iVector<typename GridTypeMapper<vtype>::DoublePrecision, N> DoublePrecision;
-
-
+  
  template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type
                         * = nullptr>
  strong_inline auto operator=(T arg) -> iVector<vtype, N> {
@@ -246,20 +225,6 @@ class iVector {
    zeroit(*this);
    return *this;
  }
-
-  strong_inline scalar_object getlane(int lane){
-    scalar_object ret;
-    for (int i = 0; i < N; i++) ret._internal[i] = _internal[i].getlane(lane);
-    return ret;
-  }
-
-  strong_inline void putlane(scalar_object &s, int lane){
-    for (int i = 0; i < N; i++) _internal[i].putlane(s._internal[i],lane);
-  }
-  
-
-
-  
  friend strong_inline void zeroit(iVector<vtype, N> &that) {
    for (int i = 0; i < N; i++) {
      zeroit(that._internal[i]);
@@ -341,6 +306,7 @@ class iMatrix {
  typedef vtype element;
  typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
  typedef typename GridTypeMapper<vtype>::vector_type vector_type;
+  typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
  typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
  typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;

@@ -350,8 +316,7 @@ class iMatrix {

  // get double precision version
  typedef iMatrix<typename GridTypeMapper<vtype>::DoublePrecision, N> DoublePrecision;
-
-
+  
  // Tensor removal
  typedef iScalar<tensor_reduced_v> tensor_reduced;
  typedef iMatrix<recurse_scalar_object, N> scalar_object;
@@ -390,25 +355,6 @@ class iMatrix {
    return *this;
  }

-
-  strong_inline scalar_object getlane(int lane){
-    scalar_object ret;
-    for (int i = 0; i < N; i++) {
-      for (int j = 0; j < N; j++) {
-	ret._internal[i][j] = _internal[i][j].getlane(lane);
-      }
-    }
-    return ret;
-  }
-
-  strong_inline void putlane(scalar_object &s, int lane){
-    for (int i = 0; i < N; i++)
-      for (int j = 0; j < N; j++) _internal[i][j].putlane(s._internal[i][j],lane);
-  }
-  
-
-  
-
  friend strong_inline void zeroit(iMatrix<vtype,N> &that){
    for(int i=0;i<N;i++){
      for(int j=0;j<N;j++){
@@ -527,3 +473,6 @@ void vprefetch(const iMatrix<v, N> &vv) {
 }
 }
 #endif
+
+
+
--- a/lib/tensors/Tensor_inner.h
+++ b/lib/tensors/Tensor_inner.h
@@ -29,51 +29,109 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #ifndef GRID_MATH_INNER_H
 #define GRID_MATH_INNER_H
 namespace Grid {
-    ///////////////////////////////////////////////////////////////////////////////////////
-    // innerProduct Scalar x Scalar -> Scalar
-    // innerProduct Vector x Vector -> Scalar
-    // innerProduct Matrix x Matrix -> Scalar
-    ///////////////////////////////////////////////////////////////////////////////////////
-    template<class sobj> inline RealD norm2(const sobj &arg){
-      typedef typename sobj::scalar_type scalar;
-      decltype(innerProduct(arg,arg)) nrm;
-      nrm = innerProduct(arg,arg);
-      RealD ret = real(nrm);
-      return ret;
-    }
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // innerProduct Scalar x Scalar -> Scalar
+  // innerProduct Vector x Vector -> Scalar
+  // innerProduct Matrix x Matrix -> Scalar
+  ///////////////////////////////////////////////////////////////////////////////////////
+  template<class sobj> inline RealD norm2(const sobj &arg){
+    auto nrm = innerProductD(arg,arg);
+    RealD ret = real(nrm);
+    return ret;
+  }
+  //////////////////////////////////////
+  // If single promote to double and sum 2x
+  //////////////////////////////////////

-    template<class l,class r,int N> inline
-    auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
-    {
-        typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
-        iScalar<ret_t> ret;
-	ret=zero;
-        for(int c1=0;c1<N;c1++){
-            ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
-        }
-        return ret;
+inline ComplexD innerProductD(const ComplexF &l,const ComplexF &r){  return innerProduct(l,r); }
+inline ComplexD innerProductD(const ComplexD &l,const ComplexD &r){  return innerProduct(l,r); }
+inline RealD    innerProductD(const RealD    &l,const RealD    &r){  return innerProduct(l,r); }
+inline RealD    innerProductD(const RealF    &l,const RealF    &r){  return innerProduct(l,r); }
+
+inline vComplexD innerProductD(const vComplexD &l,const vComplexD &r){  return innerProduct(l,r); }
+inline vRealD    innerProductD(const vRealD    &l,const vRealD    &r){  return innerProduct(l,r); }
+inline vComplexD innerProductD(const vComplexF &l,const vComplexF &r){  
+  vComplexD la,lb;
+  vComplexD ra,rb;
+  Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
+  Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
+  return innerProduct(la,ra) + innerProduct(lb,rb); 
+}
+inline vRealD innerProductD(const vRealF &l,const vRealF &r){  
+  vRealD la,lb;
+  vRealD ra,rb;
+  Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
+  Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
+  return innerProduct(la,ra) + innerProduct(lb,rb); 
+}
+
+  template<class l,class r,int N> inline
+  auto innerProductD (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0],rhs._internal[0]))>
+  {
+    typedef decltype(innerProductD(lhs._internal[0],rhs._internal[0])) ret_t;
+    iScalar<ret_t> ret;
+    ret=zero;
+    for(int c1=0;c1<N;c1++){
+      ret._internal += innerProductD(lhs._internal[c1],rhs._internal[c1]);
    }
-    template<class l,class r,int N> inline
-    auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
-    {
-        typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
-        iScalar<ret_t> ret;
-        iScalar<ret_t> tmp;
-	ret=zero;
-        for(int c1=0;c1<N;c1++){
-        for(int c2=0;c2<N;c2++){
-	  ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);
-        }}
-        return ret;
-    }
-    template<class l,class r> inline
-    auto innerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProduct(lhs._internal,rhs._internal))>
-    {
-        typedef decltype(innerProduct(lhs._internal,rhs._internal)) ret_t;
-        iScalar<ret_t> ret;
-        ret._internal = innerProduct(lhs._internal,rhs._internal);
-        return ret;
+    return ret;
+  }
+  template<class l,class r,int N> inline
+  auto innerProductD (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0]))>
+  {
+    typedef decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
+    iScalar<ret_t> ret;
+    iScalar<ret_t> tmp;
+    ret=zero;
+    for(int c1=0;c1<N;c1++){
+    for(int c2=0;c2<N;c2++){
+      ret._internal+=innerProductD(lhs._internal[c1][c2],rhs._internal[c1][c2]);
+    }}
+    return ret;
+  }
+  template<class l,class r> inline
+  auto innerProductD (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProductD(lhs._internal,rhs._internal))>
+  {
+    typedef decltype(innerProductD(lhs._internal,rhs._internal)) ret_t;
+    iScalar<ret_t> ret;
+    ret._internal = innerProductD(lhs._internal,rhs._internal);
+    return ret;
+  }
+  //////////////////////
+  // Keep same precison
+  //////////////////////
+  template<class l,class r,int N> inline
+  auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
+  {
+    typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
+    iScalar<ret_t> ret;
+    ret=zero;
+    for(int c1=0;c1<N;c1++){
+      ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
    }
+    return ret;
+  }
+  template<class l,class r,int N> inline
+  auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
+  {
+    typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
+    iScalar<ret_t> ret;
+    iScalar<ret_t> tmp;
+    ret=zero;
+    for(int c1=0;c1<N;c1++){
+    for(int c2=0;c2<N;c2++){
+      ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);
+    }}
+    return ret;
+  }
+  template<class l,class r> inline
+  auto innerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProduct(lhs._internal,rhs._internal))>
+  {
+    typedef decltype(innerProduct(lhs._internal,rhs._internal)) ret_t;
+    iScalar<ret_t> ret;
+    ret._internal = innerProduct(lhs._internal,rhs._internal);
+    return ret;
+  }

 }
 #endif
--- a/lib/tensors/Tensor_traits.h
+++ b/lib/tensors/Tensor_traits.h
@@ -1,29 +1,21 @@
-    /*************************************************************************************
-
+   /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
-
    Source file: ./lib/tensors/Tensor_traits.h
-
    Copyright (C) 2015
-
 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: Christopher Kelly <ckelly@phys.columbia.edu>
-
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
-
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
@@ -53,6 +45,7 @@ namespace Grid {
  public:
    typedef typename T::scalar_type scalar_type;
    typedef typename T::vector_type vector_type;
+    typedef typename T::vector_typeD vector_typeD;
    typedef typename T::tensor_reduced tensor_reduced;
    typedef typename T::scalar_object scalar_object;
    typedef typename T::Complexified Complexified;
@@ -68,6 +61,7 @@ namespace Grid {
  public:
    typedef RealF scalar_type;
    typedef RealF vector_type;
+    typedef RealD vector_typeD;
    typedef RealF tensor_reduced ;
    typedef RealF scalar_object;
    typedef ComplexF Complexified;
@@ -79,6 +73,7 @@ namespace Grid {
  public:
    typedef RealD scalar_type;
    typedef RealD vector_type;
+    typedef RealD vector_typeD;
    typedef RealD tensor_reduced;
    typedef RealD scalar_object;
    typedef ComplexD Complexified;
@@ -90,6 +85,7 @@ namespace Grid {
  public:
    typedef ComplexF scalar_type;
    typedef ComplexF vector_type;
+    typedef ComplexD vector_typeD;
    typedef ComplexF tensor_reduced;
    typedef ComplexF scalar_object;
    typedef ComplexF Complexified;
@@ -101,6 +97,7 @@ namespace Grid {
  public:
    typedef ComplexD scalar_type;
    typedef ComplexD vector_type;
+    typedef ComplexD vector_typeD;
    typedef ComplexD tensor_reduced;
    typedef ComplexD scalar_object;
    typedef ComplexD Complexified;
@@ -112,6 +109,7 @@ namespace Grid {
  public:
    typedef Integer scalar_type;
    typedef Integer vector_type;
+    typedef Integer vector_typeD;
    typedef Integer tensor_reduced;
    typedef Integer scalar_object;
    typedef void Complexified;
@@ -124,6 +122,7 @@ namespace Grid {
  public:
    typedef RealF  scalar_type;
    typedef vRealF vector_type;
+    typedef vRealD vector_typeD;
    typedef vRealF tensor_reduced;
    typedef RealF  scalar_object;
    typedef vComplexF Complexified;
@@ -135,6 +134,7 @@ namespace Grid {
  public:
    typedef RealD  scalar_type;
    typedef vRealD vector_type;
+    typedef vRealD vector_typeD;
    typedef vRealD tensor_reduced;
    typedef RealD  scalar_object;
    typedef vComplexD Complexified;
@@ -142,10 +142,23 @@ namespace Grid {
    typedef vRealD DoublePrecision;
    enum { TensorLevel = 0 };
  };
+  template<> class GridTypeMapper<vComplexH> {
+  public:
+    typedef ComplexF  scalar_type;
+    typedef vComplexH vector_type;
+    typedef vComplexD vector_typeD;
+    typedef vComplexH tensor_reduced;
+    typedef ComplexF  scalar_object;
+    typedef vComplexH Complexified;
+    typedef vRealH Realified;
+    typedef vComplexD DoublePrecision;
+    enum { TensorLevel = 0 };
+  };
  template<> class GridTypeMapper<vComplexF> {
  public:
    typedef ComplexF  scalar_type;
    typedef vComplexF vector_type;
+    typedef vComplexD vector_typeD;
    typedef vComplexF tensor_reduced;
    typedef ComplexF  scalar_object;
    typedef vComplexF Complexified;
@@ -157,6 +170,7 @@ namespace Grid {
  public:
    typedef ComplexD  scalar_type;
    typedef vComplexD vector_type;
+    typedef vComplexD vector_typeD;
    typedef vComplexD tensor_reduced;
    typedef ComplexD  scalar_object;
    typedef vComplexD Complexified;
@@ -168,6 +182,7 @@ namespace Grid {
  public:
    typedef  Integer scalar_type;
    typedef vInteger vector_type;
+    typedef vInteger vector_typeD;
    typedef vInteger tensor_reduced;
    typedef  Integer scalar_object;
    typedef void Complexified;
@@ -252,7 +267,8 @@ namespace Grid {
  template<typename T>
  class isSIMDvectorized{
    template<typename U>
-    static typename std::enable_if< !std::is_same< typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,   typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, char>::type test(void *);
+    static typename std::enable_if< !std::is_same< typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,   
+      typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, char>::type test(void *);

    template<typename U>
    static double test(...);
@@ -264,13 +280,15 @@ namespace Grid {
  //Get the precision of a Lattice, tensor or scalar type in units of sizeof(float)
  template<typename T>
  class getPrecision{
+  public:
    typedef typename getVectorType<T>::type vector_obj; //get the vector_obj (i.e. a grid Tensor) if its a Lattice<vobj>, do nothing otherwise (i.e. if fundamental or grid Tensor)
  
    typedef typename GridTypeMapper<vector_obj>::scalar_type scalar_type; //get the associated scalar type. Works on fundamental and tensor types
-  public:
    typedef typename GridTypeMapper<scalar_type>::Realified real_scalar_type; //remove any std::complex wrapper, should get us to the fundamental type
+
    enum { value = sizeof(real_scalar_type)/sizeof(float) };
  };
 }

 #endif
+