mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	first commit for QPX intrinsics
This commit is contained in:
		@@ -125,11 +125,14 @@ case ${ax_cv_cxx_compiler_vendor} in
 | 
				
			|||||||
        AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
 | 
					        AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
 | 
				
			||||||
        SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
 | 
					        SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
 | 
				
			||||||
      IMCI|KNC)
 | 
					      IMCI|KNC)
 | 
				
			||||||
        AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
 | 
					        AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
 | 
				
			||||||
        SIMD_FLAGS='';;
 | 
					        SIMD_FLAGS='';;
 | 
				
			||||||
      GEN)
 | 
					      GEN)
 | 
				
			||||||
        AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
 | 
					        AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
 | 
				
			||||||
        SIMD_FLAGS='';;
 | 
					        SIMD_FLAGS='';;
 | 
				
			||||||
 | 
					      QPX|BGQ)
 | 
				
			||||||
 | 
					        AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
 | 
				
			||||||
 | 
					        SIMD_FLAGS='';;
 | 
				
			||||||
      *)
 | 
					      *)
 | 
				
			||||||
        AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
 | 
					        AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
 | 
				
			||||||
    esac;;
 | 
					    esac;;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,300 +1,312 @@
 | 
				
			|||||||
    /*************************************************************************************
 | 
					/*******************************************************************************
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
					 Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
    Source file: ./lib/simd/Grid_qpx.h
 | 
					 Source file: ./lib/simd/Grid_qpx.h
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
    Copyright (C) 2015
 | 
					 Copyright (C) 2016
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
Author: neo <cossu@post.kek.jp>
 | 
					 Author: Antonin Portelli <antonin.portelli@me.com>
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
    This program is free software; you can redistribute it and/or modify
 | 
					 This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
    it under the terms of the GNU General Public License as published by
 | 
					 it under the terms of the GNU General Public License as published by
 | 
				
			||||||
    the Free Software Foundation; either version 2 of the License, or
 | 
					 the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
    (at your option) any later version.
 | 
					 (at your option) any later version.
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
    This program is distributed in the hope that it will be useful,
 | 
					 This program is distributed in the hope that it will be useful,
 | 
				
			||||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					 but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
    GNU General Public License for more details.
 | 
					 GNU General Public License for more details.
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
    You should have received a copy of the GNU General Public License along
 | 
					 You should have received a copy of the GNU General Public License along
 | 
				
			||||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
					 with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
					 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
					 See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
    *************************************************************************************/
 | 
					 ******************************************************************************/
 | 
				
			||||||
    /*  END LEGAL */
 | 
					 | 
				
			||||||
//----------------------------------------------------------------------
 | 
					 | 
				
			||||||
/*! @file Grid_qpx.h
 | 
					 | 
				
			||||||
  @brief Optimization libraries for QPX instructions set for BG/Q
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  Using intrinsics
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
// Time-stamp: <2015-05-27 11:30:21 neo>
 | 
					 | 
				
			||||||
//----------------------------------------------------------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// lot of undefined functions
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Grid {
 | 
				
			||||||
namespace Optimization {
 | 
					namespace Optimization {
 | 
				
			||||||
 | 
					  inline std::ostream & operator<<(std::ostream& stream, const vector4double a)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    stream << "{"<<vec_extract(a,0)<<","<<vec_extract(a,1)<<","<<vec_extract(a,2)<<","<<vec_extract(a,3)<<"}";
 | 
				
			||||||
 | 
					    return stream;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  struct Vsplat{
 | 
					  struct Vsplat{
 | 
				
			||||||
    //Complex float
 | 
					    //Complex float
 | 
				
			||||||
    inline float operator()(float a, float b){
 | 
					    inline vector4double operator()(float a, float b){
 | 
				
			||||||
      return {a,b,a,b};
 | 
					      return (vector4double){(double)a, (double)b, (double)a, (double)b};
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // Real float
 | 
					    // Real float
 | 
				
			||||||
    inline float operator()(float a){
 | 
					    inline vector4double operator()(float a){
 | 
				
			||||||
      return {a,a,a,a};
 | 
					      return (vector4double){(double)a, (double)a, (double)a, (double)a};
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Complex double
 | 
					    //Complex double
 | 
				
			||||||
    inline vector4double operator()(double a, double b){
 | 
					    inline vector4double operator()(double a, double b){
 | 
				
			||||||
      return {a,b,a,b};
 | 
					      return (vector4double){a, b, a, b};
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Real double
 | 
					    //Real double
 | 
				
			||||||
    inline vector4double operator()(double a){
 | 
					    inline vector4double operator()(double a){
 | 
				
			||||||
      return {a,a,a,a};
 | 
					      return (vector4double){a, a, a, a};
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline int operator()(Integer a){
 | 
					    inline int operator()(Integer a){
 | 
				
			||||||
#error
 | 
					      return a;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  struct Vstore{
 | 
					  struct Vstore{
 | 
				
			||||||
    //Float 
 | 
					    //Float
 | 
				
			||||||
    inline void operator()(float a, float* F){
 | 
					    inline void operator()(vector4double a, float *f){
 | 
				
			||||||
      assert(0);
 | 
					      vec_st(a, 0, f);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Double
 | 
					    //Double
 | 
				
			||||||
    inline void operator()(vector4double a, double* D){
 | 
					    inline void operator()(vector4double a, double *d){
 | 
				
			||||||
      assert(0);
 | 
					      vec_st(a, 0, d);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline void operator()(int a, Integer* I){
 | 
					    inline void operator()(int a, Integer *i){
 | 
				
			||||||
      assert(0);
 | 
					      i[0] = a;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					 | 
				
			||||||
  struct Vstream{
 | 
					  struct Vstream{
 | 
				
			||||||
    //Float
 | 
					    //Float
 | 
				
			||||||
    inline void operator()(float * a, float b){
 | 
					    inline void operator()(float *f, vector4double a){
 | 
				
			||||||
      assert(0);
 | 
					      vec_st(a, 0, f);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Double
 | 
					    //Double
 | 
				
			||||||
    inline void operator()(double * a, vector4double b){
 | 
					    inline void operator()(double *d, vector4double a){
 | 
				
			||||||
      assert(0);
 | 
					      vec_st(a, 0, d);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  struct Vset{
 | 
					  struct Vset{
 | 
				
			||||||
    // Complex float 
 | 
					    // Complex float
 | 
				
			||||||
    inline float operator()(Grid::ComplexF *a){
 | 
					    inline vector4double operator()(Grid::ComplexF *a){
 | 
				
			||||||
      return {a[0].real(),a[0].imag(),a[1].real(),a[1].imag(),a[2].real(),a[2].imag(),a[3].real(),a[3].imag()};
 | 
					      return vec_ld(0, (float *)a);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // Complex double 
 | 
					    // Complex double
 | 
				
			||||||
    inline vector4double operator()(Grid::ComplexD *a){
 | 
					    inline vector4double operator()(Grid::ComplexD *a){
 | 
				
			||||||
      return {a[0].real(),a[0].imag(),a[1].real(),a[1].imag(),a[2].real(),a[2].imag(),a[3].real(),a[3].imag()};
 | 
					      return vec_ld(0, (double *)a);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // Real float 
 | 
					    // Real float
 | 
				
			||||||
    inline float operator()(float *a){
 | 
					    inline vector4double operator()(float *a){
 | 
				
			||||||
      return {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]};
 | 
					      return vec_ld(0, a);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // Real double
 | 
					    // Real double
 | 
				
			||||||
    inline vector4double operator()(double *a){
 | 
					    inline vector4double operator()(double *a){
 | 
				
			||||||
      return {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]};
 | 
					      return vec_ld(0, a);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // Integer
 | 
					    // Integer
 | 
				
			||||||
    inline int operator()(Integer *a){
 | 
					    inline int operator()(Integer *a){
 | 
				
			||||||
#error
 | 
					      return a[0];
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  template <typename Out_type, typename In_type>
 | 
					  template <typename Out_type, typename In_type>
 | 
				
			||||||
    struct Reduce{
 | 
					  struct Reduce{
 | 
				
			||||||
      //Need templated class to overload output type
 | 
					    //Need templated class to overload output type
 | 
				
			||||||
      //General form must generate error if compiled
 | 
					    //General form must generate error if compiled
 | 
				
			||||||
      inline Out_type operator()(In_type in){
 | 
					    inline Out_type operator()(In_type in){
 | 
				
			||||||
	printf("Error, using wrong Reduce function\n");
 | 
					      printf("Error, using wrong Reduce function\n");
 | 
				
			||||||
	exit(1);
 | 
					      exit(1);
 | 
				
			||||||
	return 0;
 | 
					      return 0;
 | 
				
			||||||
      }
 | 
					    }
 | 
				
			||||||
    };
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					 | 
				
			||||||
 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  /////////////////////////////////////////////////////
 | 
					  /////////////////////////////////////////////////////
 | 
				
			||||||
  // Arithmetic operations
 | 
					  // Arithmetic operations
 | 
				
			||||||
  /////////////////////////////////////////////////////
 | 
					  /////////////////////////////////////////////////////
 | 
				
			||||||
  struct Sum{
 | 
					  struct Sum{
 | 
				
			||||||
    //Complex/Real float
 | 
					 | 
				
			||||||
    inline float operator()(float a, float b){
 | 
					 | 
				
			||||||
#error
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    //Complex/Real double
 | 
					    //Complex/Real double
 | 
				
			||||||
    inline vector4double operator()(vector4double a, vector4double b){
 | 
					    inline vector4double operator()(vector4double a, vector4double b){
 | 
				
			||||||
      return vec_add(a,b);
 | 
					      return vec_add(a, b);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline int operator()(int a, int b){
 | 
					    inline int operator()(int a, int b){
 | 
				
			||||||
#error
 | 
					      return a + b;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  struct Sub{
 | 
					  struct Sub{
 | 
				
			||||||
    //Complex/Real float
 | 
					 | 
				
			||||||
    inline float operator()(float a, float b){
 | 
					 | 
				
			||||||
#error
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    //Complex/Real double
 | 
					    //Complex/Real double
 | 
				
			||||||
    inline vector4double operator()(vector4double a, vector4double b){
 | 
					    inline vector4double operator()(vector4double a, vector4double b){
 | 
				
			||||||
#error
 | 
					      return vec_sub(a, b);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    //Integer
 | 
					    //Integer
 | 
				
			||||||
    inline floati operator()(int a, int b){
 | 
					    inline int operator()(int a, int b){
 | 
				
			||||||
#error
 | 
					      return a - b;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					 | 
				
			||||||
  struct MultComplex{
 | 
					  struct MultComplex{
 | 
				
			||||||
    // Complex float
 | 
					 | 
				
			||||||
    inline float operator()(float a, float b){
 | 
					 | 
				
			||||||
#error
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    // Complex double
 | 
					    // Complex double
 | 
				
			||||||
    inline vector4double operator()(vector4double a, vector4double b){
 | 
					    inline vector4double operator()(vector4double a, vector4double b){
 | 
				
			||||||
#error
 | 
					      return vec_xxnpmadd(a, b, vec_xmul(b, a));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  struct Mult{
 | 
					  struct Mult{
 | 
				
			||||||
    // Real float
 | 
					 | 
				
			||||||
    inline float operator()(float a, float b){
 | 
					 | 
				
			||||||
#error
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    // Real double
 | 
					    // Real double
 | 
				
			||||||
    inline vector4double operator()(vector4double a, vector4double b){
 | 
					    inline vector4double operator()(vector4double a, vector4double b){
 | 
				
			||||||
#error
 | 
					      return vec_mul(a, b);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // Integer
 | 
					    // Integer
 | 
				
			||||||
    inline int operator()(int a, int b){
 | 
					    inline int operator()(int a, int b){
 | 
				
			||||||
#error
 | 
					      return a*b;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					 | 
				
			||||||
  struct Conj{
 | 
					  struct Conj{
 | 
				
			||||||
    // Complex single
 | 
					 | 
				
			||||||
    inline float operator()(float in){
 | 
					 | 
				
			||||||
      assert(0);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    // Complex double
 | 
					    // Complex double
 | 
				
			||||||
    inline vector4double operator()(vector4double in){
 | 
					    inline vector4double operator()(vector4double v){
 | 
				
			||||||
      assert(0);
 | 
					      return vec_mul(v, (vector4double){1., -1., 1., -1.});
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // do not define for integer input
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  struct TimesMinusI{
 | 
					  struct TimesMinusI{
 | 
				
			||||||
    //Complex single
 | 
					 | 
				
			||||||
    inline float operator()(float in, float ret){
 | 
					 | 
				
			||||||
      assert(0);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    //Complex double
 | 
					    //Complex double
 | 
				
			||||||
    inline vector4double operator()(vector4double in, vector4double ret){
 | 
					    inline vector4double operator()(vector4double v, vector4double ret){
 | 
				
			||||||
      assert(0);
 | 
					      return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.},
 | 
				
			||||||
 | 
					                               (vector4double){0., 0., 0., 0.});
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  struct TimesI{
 | 
					  struct TimesI{
 | 
				
			||||||
    //Complex single
 | 
					 | 
				
			||||||
    inline float operator()(float in, float ret){
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    //Complex double
 | 
					    //Complex double
 | 
				
			||||||
    inline vector4double operator()(vector4double in, vector4double ret){
 | 
					    inline vector4double operator()(vector4double v, vector4double ret){
 | 
				
			||||||
  
 | 
					      return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.},
 | 
				
			||||||
 | 
					                              (vector4double){0., 0., 0., 0.});
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  struct Permute{
 | 
				
			||||||
 | 
					    static inline vector4double Permute0(vector4double v){ //0123 -> 2301
 | 
				
			||||||
 | 
					      return vec_perm(v, v, vec_gpci(02301));
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    static inline vector4double Permute1(vector4double v){ //0123 -> 1032
 | 
				
			||||||
 | 
					      return vec_perm(v, v, vec_gpci(01032));
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    static inline vector4double Permute2(vector4double v){
 | 
				
			||||||
 | 
					      return v;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    static inline vector4double Permute3(vector4double v){
 | 
				
			||||||
 | 
					      return v;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 | 
					  struct Rotate{
 | 
				
			||||||
 | 
					    static inline vector4double rotate(vector4double v, int n){
 | 
				
			||||||
  //////////////////////////////////////////////
 | 
					      switch(n){
 | 
				
			||||||
  // Some Template specialization
 | 
					        case 0:
 | 
				
			||||||
 | 
					          return v;
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        case 1:
 | 
				
			||||||
 | 
					          return vec_perm(v, v, vec_gpci(01230));
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        case 2:
 | 
				
			||||||
 | 
					          return vec_perm(v, v, vec_gpci(02301));
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        case 3:
 | 
				
			||||||
 | 
					          return vec_perm(v, v, vec_gpci(03012));
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        default: assert(0);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  //Complex float Reduce
 | 
					  //Complex float Reduce
 | 
				
			||||||
  template<>
 | 
					  template<>
 | 
				
			||||||
    inline Grid::ComplexF Reduce<Grid::ComplexF, float>::operator()(float in){
 | 
					  inline Grid::ComplexF
 | 
				
			||||||
    assert(0);
 | 
					  Reduce<Grid::ComplexF, vector4double>::operator()(vector4double v) { //2 complex
 | 
				
			||||||
 | 
					    vector4double v1,v2;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    v1 = Optimization::Permute::Permute0(v);
 | 
				
			||||||
 | 
					    v1 = vec_add(v1, v);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return Grid::ComplexF((float)vec_extract(v1, 0), (float)vec_extract(v1, 1));
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  //Real float Reduce
 | 
					  //Real float Reduce
 | 
				
			||||||
  template<>
 | 
					  template<>
 | 
				
			||||||
    inline Grid::RealF Reduce<Grid::RealF, float>::operator()(float in){
 | 
					  inline Grid::RealF
 | 
				
			||||||
    assert(0);
 | 
					  Reduce<Grid::RealF, vector4double>::operator()(vector4double v){ //4 floats
 | 
				
			||||||
 | 
					    vector4double v1,v2;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    v1 = Optimization::Permute::Permute0(v);
 | 
				
			||||||
 | 
					    v1 = vec_add(v1, v);
 | 
				
			||||||
 | 
					    v2 = Optimization::Permute::Permute1(v1);
 | 
				
			||||||
 | 
					    v1 = vec_add(v1, v2);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return (float)vec_extract(v1, 0);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  //Complex double Reduce
 | 
					  //Complex double Reduce
 | 
				
			||||||
  template<>
 | 
					  template<>
 | 
				
			||||||
    inline Grid::ComplexD Reduce<Grid::ComplexD, vector4double>::operator()(vector4double in){
 | 
					  inline Grid::ComplexD
 | 
				
			||||||
    assert(0);
 | 
					  Reduce<Grid::ComplexD, vector4double>::operator()(vector4double v){ //2 complex
 | 
				
			||||||
 | 
					    vector4double v1;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    v1 = Optimization::Permute::Permute0(v);
 | 
				
			||||||
 | 
					    v1 = vec_add(v1, v);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return Grid::ComplexD(vec_extract(v1, 0), vec_extract(v1, 1));
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  //Real double Reduce
 | 
					  //Real double Reduce
 | 
				
			||||||
  template<>
 | 
					  template<>
 | 
				
			||||||
    inline Grid::RealD Reduce<Grid::RealD, vector4double>::operator()(vector4double in){
 | 
					  inline Grid::RealD
 | 
				
			||||||
    assert(0);
 | 
					  Reduce<Grid::RealD, vector4double>::operator()(vector4double v){ //4 doubles
 | 
				
			||||||
  }
 | 
					    vector4double v1,v2;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    v1 = Optimization::Permute::Permute0(v);
 | 
				
			||||||
 | 
					    v1 = vec_add(v1, v);
 | 
				
			||||||
 | 
					    v2 = Optimization::Permute::Permute1(v1);
 | 
				
			||||||
 | 
					    v1 = vec_add(v1, v2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return vec_extract(v1, 0);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  //Integer Reduce
 | 
					  //Integer Reduce
 | 
				
			||||||
  template<>
 | 
					  template<>
 | 
				
			||||||
    inline Integer Reduce<Integer, floati>::operator()(float in){
 | 
					  inline Integer Reduce<Integer, int>::operator()(int in){
 | 
				
			||||||
 | 
					    // FIXME unimplemented
 | 
				
			||||||
 | 
					    printf("Reduce : Missing integer implementation -> FIX\n");
 | 
				
			||||||
    assert(0);
 | 
					    assert(0);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//////////////////////////////////////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
// Here assign types 
 | 
					// Here assign types
 | 
				
			||||||
namespace Grid {
 | 
					 | 
				
			||||||
  typedef float SIMD_Ftype  __attribute__ ((vector_size (16)));         // Single precision type
 | 
					 | 
				
			||||||
  typedef vector4double SIMD_Dtype; // Double precision type
 | 
					 | 
				
			||||||
  typedef int SIMD_Itype;           // Integer type
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  inline void v_prefetch0(int size, const char *ptr){};
 | 
					typedef vector4double SIMD_Ftype;  // Single precision type
 | 
				
			||||||
 | 
					typedef vector4double SIMD_Dtype; // Double precision type
 | 
				
			||||||
 | 
					typedef int SIMD_Itype; // Integer type
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Function name aliases
 | 
					// prefetch utilities
 | 
				
			||||||
  typedef Optimization::Vsplat   VsplatSIMD;
 | 
					inline void v_prefetch0(int size, const char *ptr){};
 | 
				
			||||||
  typedef Optimization::Vstore   VstoreSIMD;
 | 
					inline void prefetch_HINT_T0(const char *ptr){};
 | 
				
			||||||
  typedef Optimization::Vset     VsetSIMD;
 | 
					 | 
				
			||||||
  typedef Optimization::Vstream  VstreamSIMD;
 | 
					 | 
				
			||||||
  template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Arithmetic operations
 | 
					// Function name aliases
 | 
				
			||||||
  typedef Optimization::Sum         SumSIMD;
 | 
					typedef Optimization::Vsplat   VsplatSIMD;
 | 
				
			||||||
  typedef Optimization::Sub         SubSIMD;
 | 
					typedef Optimization::Vstore   VstoreSIMD;
 | 
				
			||||||
  typedef Optimization::Mult        MultSIMD;
 | 
					typedef Optimization::Vset     VsetSIMD;
 | 
				
			||||||
  typedef Optimization::MultComplex MultComplexSIMD;
 | 
					typedef Optimization::Vstream  VstreamSIMD;
 | 
				
			||||||
  typedef Optimization::Conj        ConjSIMD;
 | 
					template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
 | 
				
			||||||
  typedef Optimization::TimesMinusI TimesMinusISIMD;
 | 
					 | 
				
			||||||
  typedef Optimization::TimesI      TimesISIMD;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Arithmetic operations
 | 
				
			||||||
 | 
					typedef Optimization::Sum         SumSIMD;
 | 
				
			||||||
 | 
					typedef Optimization::Sub         SubSIMD;
 | 
				
			||||||
 | 
					typedef Optimization::Mult        MultSIMD;
 | 
				
			||||||
 | 
					typedef Optimization::MultComplex MultComplexSIMD;
 | 
				
			||||||
 | 
					typedef Optimization::Conj        ConjSIMD;
 | 
				
			||||||
 | 
					typedef Optimization::TimesMinusI TimesMinusISIMD;
 | 
				
			||||||
 | 
					typedef Optimization::TimesI      TimesISIMD;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user