Merge branch 'develop' into feature/gpu-port

2025-10-26 09:39:34 +00:00 · 2019-07-16 11:55:17 +01:00
parent 7c11525d1a c3d0c176ab
commit fa9cd50c5b
274 changed files with 7120 additions and 4663 deletions
--- a/Grid/simd/Grid_gpu.h
+++ b/Grid/simd/Grid_gpu.h
@@ -1,652 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/simd/Grid_gpu.h
-
-    Copyright (C) 2018
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-//----------------------------------------------------------------------
-/*! @file Grid_gpu.h
-  @brief Optimization libraries for GPU
-  Use float4, double2 
-*/
-//----------------------------------------------------------------------
-
-#include <cuda_fp16.h>
-
-namespace Grid {
-
-  // re im, re, im, re, im etc..
-struct half8 {
-  half ax, ay, az, aw, bx, by, bz, bw;
-};
-accelerator_inline float half2float(half h)
-{
-  float f;
-#ifdef __CUDA_ARCH__
-  f = __half2float(h);
-#else 
-  //f = __half2float(h);
-  __half_raw hr(h);
-  Grid_half hh; 
-  hh.x = hr.x;
-  f=  sfw_half_to_float(hh);
-#endif
-  return f;
-}
-accelerator_inline half float2half(float f)
-{
-  half h;
-#ifdef __CUDA_ARCH__
-  h = __float2half(f);
-#else
-  Grid_half hh = sfw_float_to_half(f);
-  __half_raw hr;  
-  hr.x = hh.x;
-  h = __half(hr);
-#endif
-  return h;
-}
-
-namespace Optimization {
-  
-  inline accelerator float4 operator*(float4 a,float4 b) {return make_float4(a.x*b.x,a.y*b.y,a.z*b.z,a.w*b.w);}
-  inline accelerator float4 operator+(float4 a,float4 b) {return make_float4(a.x+b.x,a.y+b.y,a.z+b.z,a.w+b.w);}
-  inline accelerator float4 operator-(float4 a,float4 b) {return make_float4(a.x-b.x,a.y-b.y,a.z-b.z,a.w-b.w);}
-  inline accelerator float4 operator/(float4 a,float4 b) {return make_float4(a.x/b.x,a.y/b.y,a.z/b.z,a.w/b.w);}
-
-  inline accelerator double2 operator*(double2 a,double2 b) {return make_double2(a.x*b.x,a.y*b.y);}
-  inline accelerator double2 operator+(double2 a,double2 b) {return make_double2(a.x+b.x,a.y+b.y);}
-  inline accelerator double2 operator-(double2 a,double2 b) {return make_double2(a.x-b.x,a.y-b.y);}
-  inline accelerator double2 operator/(double2 a,double2 b) {return make_double2(a.x/b.x,a.y/b.y);}
-
-  inline accelerator int4 operator*(int4 a,int4 b) {return make_int4(a.x*b.x,a.y*b.y,a.z*b.z,a.w*b.w);}
-  inline accelerator int4 operator+(int4 a,int4 b) {return make_int4(a.x+b.x,a.y+b.y,a.z+b.z,a.w+b.w);}
-  inline accelerator int4 operator-(int4 a,int4 b) {return make_int4(a.x-b.x,a.y-b.y,a.z-b.z,a.w-b.w);}
-  inline accelerator int4 operator/(int4 a,int4 b) {return make_int4(a.x/b.x,a.y/b.y,a.z/b.z,a.w/b.w);}
-
-  struct Vsplat{
-    //Complex float
-    accelerator_inline float4 operator()(float a, float b){
-      float4 ret;
-      ret.x=ret.z=a;
-      ret.y=ret.w=b;
-      return ret;
-    }
-    // Real float
-    accelerator_inline float4 operator()(float a){
-      float4 ret;
-      ret.x=ret.y=ret.z=ret.w = a;
-      return ret;
-    }
-    //Complex double
-    accelerator_inline double2 operator()(double a, double b){
-      double2 ret;
-      ret.x=a;
-      ret.y=b;
-      return ret;
-    }
-    //Real double
-    accelerator_inline double2 operator()(double a){
-      double2 ret; 
-      ret.x = ret.y = a;
-      return ret;
-    }
-    //Integer
-    accelerator_inline int4 operator()(Integer a){
-      int4 ret;
-      ret.x=ret.y=ret.z=ret.w=a;
-      return ret;
-    }
-  };
-
-  struct Vstore{
-    //Float 
-    accelerator_inline void operator()(float4 a, float* F){
-      float4 *F4 = (float4 *)F;
-      *F4 = a;
-    }
-    //Double
-    accelerator_inline void operator()(double2 a, double* D){
-      double2 *D2 = (double2 *)D;
-      *D2 = a;
-    }
-    //Integer
-    accelerator_inline void operator()(int4 a, Integer* I){
-      int4 *I4 = (int4 *)I;
-      *I4 = a;
-    }
-
-  };
-
-  struct Vstream{
-    //Float
-    accelerator_inline void operator()(float * a, float4 b){
-      float4 * a4 = (float4 *)a;
-      *a4 = b;
-    }
-    //Double
-    accelerator_inline void operator()(double * a, double2 b){
-      double2 * a2 = (double2 *)a;
-      *a2 = b;
-    }
-
-  };
-
-  struct Vset{
-    // Complex float 
-    accelerator_inline float4 operator()(Grid::ComplexF *a){
-      float4 ret;
-      ret.x = a[0].real();
-      ret.y = a[0].imag();
-      ret.z = a[1].real();
-      ret.w = a[1].imag();
-      return ret;
-    }
-    // Complex double 
-    accelerator_inline double2 operator()(Grid::ComplexD *a){
-      double2 ret;
-      ret.x = a[0].real();
-      ret.y = a[0].imag();
-      return ret;
-    }
-    // Real float 
-    accelerator_inline float4 operator()(float *a){
-      float4 ret;
-      ret.x = a[0];
-      ret.y = a[1];
-      ret.z = a[2];
-      ret.w = a[3];
-      return ret;
-    }
-    // Real double
-    accelerator_inline double2 operator()(double *a){
-      double2 ret;
-      ret.x = a[0];
-      ret.y = a[1];
-      return ret;
-    }
-    // Integer
-    accelerator_inline int4 operator()(Integer *a){
-      int4 ret;
-      ret.x = a[0];
-      ret.y = a[1];
-      ret.z = a[2];
-      ret.w = a[3];
-      return ret;
-    }
-
-  };
-
-  template <typename Out_type, typename In_type>
-  struct Reduce{
-    //Need templated class to overload output type
-    //General form must generate error if compiled
-    accelerator_inline Out_type operator()(In_type in){
-      printf("Error, using wrong Reduce function\n");
-      exit(1);
-      return 0;
-    }
-  };
-
-  /////////////////////////////////////////////////////
-  // Arithmetic operations
-  /////////////////////////////////////////////////////
-  struct Sum{
-    //Complex/Real float
-    accelerator_inline float4 operator()(float4 a, float4 b){
-      return a+b;
-    }
-    //Complex/Real double
-    accelerator_inline double2 operator()(double2 a, double2 b){
-      return a+b;
-    }
-    //Integer
-    accelerator_inline int4 operator()(int4 a,int4 b){
-      return a+b;
-    }
-  };
-
-  struct Sub{
-    //Complex/Real float
-    accelerator_inline float4 operator()(float4 a, float4 b){
-      return a-b;
-    }
-    //Complex/Real double
-    accelerator_inline double2 operator()(double2 a, double2 b){
-      return a-b;
-    }
-    //Integer
-    accelerator_inline int4 operator()(int4 a, int4 b){
-      return a-b;
-    }
-  };
-
-  struct MultRealPart{
-    accelerator_inline float4 operator()(float4 a, float4 b){
-      float4 ymm0;
-      ymm0.x = a.x;
-      ymm0.y = a.x;
-      ymm0.z = a.z;
-      ymm0.w = a.z;
-      return  ymm0*b;
-      // ymm0 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar,
-      // return _mm_mul_ps(ymm0,b);                       // ymm0 <- ar bi, ar br
-    }
-    accelerator_inline double2 operator()(double2 a, double2 b){
-      double2 ymm0;
-      ymm0.x = a.x;
-      ymm0.y = a.x;
-      return ymm0*b;
-      //      ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ar,ar b'00,00
-      //      return _mm_mul_pd(ymm0,b);      // ymm0 <- ar bi, ar br
-    }
-  };
-  struct MaddRealPart{
-    accelerator_inline float4 operator()(float4 a, float4 b, float4 c){
-      float4 ymm0; // =  _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar,
-      ymm0.x = a.x;
-      ymm0.y = a.x;
-      ymm0.z = a.z;
-      ymm0.w = a.z;
-      return c+ymm0*b;
-    }
-    accelerator_inline double2 operator()(double2 a, double2 b, double2 c){
-      //      ymm0 = _mm_shuffle_pd( a, a, 0x0 );
-      double2 ymm0;
-      ymm0.x = a.x;
-      ymm0.y = a.x;
-      return c+ymm0*b;
-    }
-  };
-
-  struct MultComplex{
-    // Complex float
-    accelerator_inline float4 operator()(float4 a, float4 b){
-      float4 ymm0;
-      ymm0.x = a.x*b.x -  a.y*b.y ; // rr - ii
-      ymm0.y = a.x*b.y +  a.y*b.x ; // ir + ri
-      ymm0.z = a.z*b.z -  a.w*b.w ; // rr - ii
-      ymm0.w = a.w*b.z +  a.z*b.w ; // ir + ri
-      return ymm0;
-    }
-    // Complex double
-    accelerator_inline double2 operator()(double2 a, double2 b){
-      double2 ymm0;
-      ymm0.x = a.x*b.x -  a.y*b.y ; // rr - ii
-      ymm0.y = a.x*b.y +  a.y*b.x ; // ir + ri
-      return ymm0;
-    }
-  };
-
-  struct Mult{
-
-    accelerator_inline void mac(float4 &a, float4 b, float4 c){
-      a= a+b*c;
-    }
-
-    accelerator_inline void mac(double2 &a, double2 b, double2 c){
-      a= a+b*c;
-    }
-
-    // Real float
-    accelerator_inline float4 operator()(float4 a, float4 b){
-      return a*b;
-    }
-    // Real double
-    accelerator_inline double2 operator()(double2 a, double2 b){
-      return a*b;
-    }
-    // Integer
-    accelerator_inline int4 operator()(int4 a, int4 b){
-      return a*b;
-    }
-  };
-
-  struct Div{
-    // Real float
-    accelerator_inline float4 operator()(float4 a, float4 b){
-      return a/b;
-    }
-    // Real double
-    accelerator_inline double2 operator()(double2 a, double2 b){
-      return a/b;
-    }
-  };
-
-
-  struct Conj{
-    // Complex single
-    accelerator_inline float4 operator()(float4 in){
-      float4 ret;
-      ret.x =   in.x;
-      ret.y = - in.y;
-      ret.z =   in.z;
-      ret.w = -  in.w;
-      return ret;
-    }
-    // Complex double
-    accelerator_inline double2 operator()(double2 in){
-      double2 ret;
-      ret.x =   in.x;
-      ret.y = - in.y;
-      return ret;
-    }
-    // do not define for integer input
-  };
-
-  struct TimesMinusI{
-    //Complex single
-    accelerator_inline float4 operator()(float4 in, float4 ret){
-      float4 tmp;
-      tmp.x =   in.y;
-      tmp.y = - in.x;
-      tmp.z =   in.w;
-      tmp.w = - in.z;
-      return tmp;
-    }
-    //Complex double
-    accelerator_inline double2 operator()(double2 in, double2 ret){
-      double2 tmp;
-      tmp.x =   in.y;
-      tmp.y = - in.x;
-      return tmp;
-    }
-  };
-
-  struct TimesI{
-    //Complex single
-    accelerator_inline float4 operator()(float4 in, float4 ret){
-      float4 tmp;
-      tmp.x = - in.y;
-      tmp.y =   in.x;
-      tmp.z = - in.w;
-      tmp.w =   in.z;
-      return tmp;
-    }
-    //Complex double
-    accelerator_inline double2 operator()(double2 in, double2 ret){
-      double2 tmp ;
-      tmp.x = - in.y;
-      tmp.y =   in.x;
-      return tmp;
-    }
-  };
-
-  struct Permute{
-
-    static accelerator_inline float4 Permute0(float4 in){
-      float4 tmp;
-      tmp.x = in.z;
-      tmp.y = in.w;
-      tmp.z = in.x;
-      tmp.w = in.y;
-      return tmp;
-    };
-    static accelerator_inline float4 Permute1(float4 in){
-      float4 tmp;
-      tmp.x = in.y;
-      tmp.y = in.x;
-      tmp.z = in.w;
-      tmp.w = in.z;
-      return tmp;
-    };
-    static accelerator_inline float4 Permute2(float4 in){
-      return in;
-    };
-    static accelerator_inline float4 Permute3(float4 in){
-      return in;
-    };
-
-    static accelerator_inline double2 Permute0(double2 in){ //AB -> BA
-      double2 tmp;
-      tmp.x = in.y;
-      tmp.y = in.x;
-      return tmp;
-    };
-    static accelerator_inline double2 Permute1(double2 in){
-      return in;
-    };
-    static accelerator_inline double2 Permute2(double2 in){
-      return in;
-    };
-    static accelerator_inline double2 Permute3(double2 in){
-      return in;
-    };
-  };
-
-  struct PrecisionChange {
-    static accelerator_inline half8 StoH (float4 a,float4 b) {
-      half8 h;
-      h.ax = float2half(a.x);
-      h.ay = float2half(a.y);
-      h.az = float2half(a.z);
-      h.aw = float2half(a.w);
-      h.bx = float2half(b.x);
-      h.by = float2half(b.y);
-      h.bz = float2half(b.z);
-      h.bw = float2half(b.w);
-      return h;
-    }
-    static accelerator_inline void  HtoS (half8 h,float4 &sa,float4 &sb) {
-      sa.x = half2float(h.ax);
-      sa.y = half2float(h.ay);
-      sa.z = half2float(h.az);
-      sa.w = half2float(h.aw);
-      sb.x = half2float(h.bx);
-      sb.y = half2float(h.by);
-      sb.z = half2float(h.bz);
-      sb.w = half2float(h.bw);
-    }
-    static accelerator_inline float4 DtoS (double2 a,double2 b) {
-      float4 s;
-      s.x = a.x;
-      s.y = a.y;
-      s.z = b.x;
-      s.w = b.y;
-      return s;
-    }
-    static accelerator_inline void StoD (float4 s,double2 &a,double2 &b) {
-      a.x = s.x;
-      a.y = s.y;
-      b.x = s.z;
-      b.y = s.w;
-    }
-    static accelerator_inline half8 DtoH (double2 a,double2 b,double2 c,double2 d) {
-      float4 sa,sb;
-      sa = DtoS(a,b);
-      sb = DtoS(c,d);
-      return StoH(sa,sb);
-    }
-    static accelerator_inline void HtoD (half8 h,double2 &a,double2 &b,double2 &c,double2 &d) {
-      float4 sa,sb;
-      HtoS(h,sa,sb);
-      StoD(sa,a,b);
-      StoD(sb,c,d);
-    }
-  };
-
-  struct Exchange{
-    // 3210 ordering
-
-    static accelerator_inline void Exchange0(float4 &out1,float4 &out2,float4 in1,float4 in2){
-      out1.x = in1.x;      
-      out1.y = in1.y;
-      out1.z = in2.x;      
-      out1.w = in2.y;
-
-      out2.x = in1.z;      
-      out2.y = in1.w;
-      out2.z = in2.z;      
-      out2.w = in2.w;
-
-	//      out1= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0));
-	//      out2= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2));
-      return;
-    };
-
-    static accelerator_inline void Exchange1(float4 &out1,float4 &out2,float4 in1,float4 in2){
-
-      out1.x = in1.x;      
-      out1.y = in2.x;
-      out1.z = in1.z;      
-      out1.w = in2.z;
-
-      out2.x = in1.y;      
-      out2.y = in2.y;
-      out2.z = in1.w;      
-      out2.w = in2.w;
-
-      //      out1= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); /*ACEG*/
-      //      out2= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); /*BDFH*/
-      //      out1= _mm_shuffle_ps(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
-      //      out2= _mm_shuffle_ps(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
-    };
-    static accelerator_inline void Exchange2(float4 &out1,float4 &out2,float4 in1,float4 in2){
-      assert(0);
-      return;
-    };
-    static accelerator_inline void Exchange3(float4 &out1,float4 &out2,float4 in1,float4 in2){
-      assert(0);
-      return;
-    };
-
-    static accelerator_inline void Exchange0(double2 &out1,double2 &out2,double2 in1,double2 in2){
-      out1.x = in1.x;      
-      out1.y = in2.x;
-      out2.x = in1.y;      
-      out2.y = in2.y;
-      //      out1= _mm_shuffle_pd(in1,in2,0x0);
-      //      out2= _mm_shuffle_pd(in1,in2,0x3);
-    };
-    static accelerator_inline void Exchange1(double2 &out1,double2 &out2,double2 in1,double2 in2){
-      assert(0);
-      return;
-    };
-    static accelerator_inline void Exchange2(double2 &out1,double2 &out2,double2 in1,double2 in2){
-      assert(0);
-      return;
-    };
-    static accelerator_inline void Exchange3(double2 &out1,double2 &out2,double2 in1,double2 in2){
-      assert(0);
-      return;
-    };
-  };
-
-  struct Rotate{
-
-    static accelerator_inline float4 rotate(float4 in,int n){ 
-      float4 ret;
-      switch(n){
-      case 0: ret = in ; break;
-      case 1: ret.x = in.y; ret.y = in.z ; ret.z = in.w ; ret.w = in.x; break;
-      case 2: ret.x = in.z; ret.y = in.w ; ret.z = in.x ; ret.w = in.y; break;
-      case 3: ret.x = in.w; ret.y = in.x ; ret.z = in.y ; ret.w = in.z; break;
-      default: break;
-      }
-      return ret;
-    }
-    static accelerator_inline double2 rotate(double2 in,int n){ 
-      double2 ret;
-      switch(n){
-      case 0: ret = in; break;
-      case 1: ret.x = in.y; ret.y = in.x ; break;
-      default: break;
-      }
-      return ret;
-    }
-
-    template<int n> static accelerator_inline float4  tRotate(float4  in){ 
-      return rotate(in,n);
-    };
-    template<int n> static accelerator_inline double2 tRotate(double2 in){ 
-      return rotate(in,n);
-    };
-
-  };
-  //////////////////////////////////////////////
-  // Some Template specialization
-
-  //Complex float Reduce
-  template<>
-  accelerator_inline Grid::ComplexF Reduce<Grid::ComplexF, float4>::operator()(float4 in){
-    Grid::ComplexF ret(in.x+in.z,in.y+in.w);
-    return ret;
-  }
-  //Real float Reduce
-  template<>
-  accelerator_inline Grid::RealF Reduce<Grid::RealF, float4>::operator()(float4 in){
-    return in.x+in.y+in.z+in.w;
-  }
-  
-  //Complex double Reduce
-  template<>
-  accelerator_inline Grid::ComplexD Reduce<Grid::ComplexD, double2>::operator()(double2 in){
-    return Grid::ComplexD(in.x,in.y);
-  }
-  
-  //Real double Reduce
-  template<>
-  accelerator_inline Grid::RealD Reduce<Grid::RealD, double2>::operator()(double2 in){
-    return in.x+in.y;
-  }
-
-  //Integer Reduce
-  template<>
-  accelerator_inline Integer Reduce<Integer, int4>::operator()(int4 in){
-    return in.x+in.y+in.z+in.w;
-  }
-}
-
-//////////////////////////////////////////////////////////////////////////////////////
-// Here assign types 
-//////////////////////////////////////////////////////////////////////////////////////
-  typedef half8   SIMD_Htype;  // Single precision type
-  typedef float4  SIMD_Ftype;  // Single precision type
-  typedef double2 SIMD_Dtype; // Double precision type
-  typedef int4    SIMD_Itype; // Integer type
-
-  // prefetch utilities
-  accelerator_inline void v_prefetch0(int size, const char *ptr){};
-  accelerator_inline void prefetch_HINT_T0(const char *ptr){};
-
-  // Function name aliases
-  typedef Optimization::Vsplat   VsplatSIMD;
-  typedef Optimization::Vstore   VstoreSIMD;
-  typedef Optimization::Vset     VsetSIMD;
-  typedef Optimization::Vstream  VstreamSIMD;
-  template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
-
-  // Arithmetic operations
-  typedef Optimization::Sum         SumSIMD;
-  typedef Optimization::Sub         SubSIMD;
-  typedef Optimization::Div         DivSIMD;
-  typedef Optimization::Mult        MultSIMD;
-  typedef Optimization::MultComplex MultComplexSIMD;
-  typedef Optimization::MultRealPart MultRealPartSIMD;
-  typedef Optimization::MaddRealPart MaddRealPartSIMD;
-  typedef Optimization::Conj        ConjSIMD;
-  typedef Optimization::TimesMinusI TimesMinusISIMD;
-  typedef Optimization::TimesI      TimesISIMD;
-
-}
--- a/Grid/simd/Grid_vector_types.h
+++ b/Grid/simd/Grid_vector_types.h
@@ -11,6 +11,7 @@ Author: Guido Cossu <cossu@iroiro-pc.kek.jp>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: neo <cossu@post.kek.jp>
 Author: paboyle <paboyle@ph.ed.ac.uk>
+Author: Michael Marshall <michael.marshall@ed.ac.au>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -109,9 +110,6 @@ accelerator_inline Grid_half sfw_float_to_half(float ff) {
 #ifdef GPU_VEC
 #include "Grid_gpu_vec.h"
 #endif
-#ifdef GPU
-#include "Grid_gpu.h"
-#endif
 #ifdef GEN
 #include "Grid_generic.h"
 #endif
@@ -162,17 +160,25 @@ template <typename Condition, typename ReturnType> using NotEnableIf = Invoke<st
 ////////////////////////////////////////////////////////
 // Check for complexity with type traits
 template <typename T> struct is_complex : public std::false_type {};
-template <> struct is_complex<complex<double> > : public std::true_type {};
-template <> struct is_complex<complex<float> >  : public std::true_type {};
+template <> struct is_complex<ComplexD> : public std::true_type {};
+template <> struct is_complex<ComplexF> : public std::true_type {};

-template <typename T>              using IfReal    = Invoke<std::enable_if<std::is_floating_point<T>::value, int> >;
+template<typename T, typename V=void> struct is_real : public std::false_type {};
+template<typename T> struct is_real<T, typename std::enable_if<std::is_floating_point<T>::value,
+  void>::type> : public std::true_type {};
+
+template<typename T, typename V=void> struct is_integer : public std::false_type {};
+template<typename T> struct is_integer<T, typename std::enable_if<std::is_integral<T>::value,
+  void>::type> : public std::true_type {};
+  
+template <typename T>              using IfReal    = Invoke<std::enable_if<is_real<T>::value, int> >;
 template <typename T>              using IfComplex = Invoke<std::enable_if<is_complex<T>::value, int> >;
-template <typename T>              using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value, int> >;
+template <typename T>              using IfInteger = Invoke<std::enable_if<is_integer<T>::value, int> >;
 template <typename T1,typename T2> using IfSame    = Invoke<std::enable_if<std::is_same<T1,T2>::value, int> >;

-template <typename T>              using IfNotReal    = Invoke<std::enable_if<!std::is_floating_point<T>::value, int> >;
+template <typename T>              using IfNotReal    = Invoke<std::enable_if<!is_real<T>::value, int> >;
 template <typename T>              using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value, int> >;
-template <typename T>              using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value, int> >;
+template <typename T>              using IfNotInteger = Invoke<std::enable_if<!is_integer<T>::value, int> >;
 template <typename T1,typename T2> using IfNotSame    = Invoke<std::enable_if<!std::is_same<T1,T2>::value, int> >;

 ////////////////////////////////////////////////////////
@@ -957,8 +963,10 @@ template <typename T>
 struct is_simd : public std::false_type {};
 template <> struct is_simd<vRealF>     : public std::true_type {};
 template <> struct is_simd<vRealD>     : public std::true_type {};
+template <> struct is_simd<vRealH>     : public std::true_type {};
 template <> struct is_simd<vComplexF>  : public std::true_type {};
 template <> struct is_simd<vComplexD>  : public std::true_type {};
+template <> struct is_simd<vComplexH>  : public std::true_type {};
 template <> struct is_simd<vInteger>   : public std::true_type {};

 template <typename T> using IfSimd    = Invoke<std::enable_if<is_simd<T>::value, int> >;