1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

Merge branch 'develop' into feature/hirep

This commit is contained in:
Guido Cossu 2016-09-23 11:06:06 +01:00
commit 5c190a1b8c
6 changed files with 330 additions and 211 deletions

4
.gitignore vendored
View File

@ -94,6 +94,10 @@ build.sh
################ ################
lib/Eigen/* lib/Eigen/*
# FFTW source #
################
lib/fftw/*
# libtool macros # # libtool macros #
################## ##################
m4/lt* m4/lt*

View File

@ -1,15 +1,20 @@
AC_PREREQ([2.63]) AC_PREREQ([2.63])
AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid]) AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid])
AC_CANONICAL_BUILD
AC_CANONICAL_HOST
AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE(subdir-objects) AM_INIT_AUTOMAKE(subdir-objects)
AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([lib/Grid.h]) AC_CONFIG_SRCDIR([lib/Grid.h])
AC_CONFIG_HEADERS([lib/Config.h]) AC_CONFIG_HEADERS([lib/Config.h])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
############### Checks for programs ############### Checks for programs
AC_LANG(C++) AC_LANG(C++)
CXXFLAGS="-O3 $CXXFLAGS" CXXFLAGS="-O3 $CXXFLAGS"
AC_PROG_CXX AC_PROG_CXX
AC_PROG_RANLIB
############ openmp ############### ############ openmp ###############
AC_OPENMP AC_OPENMP
@ -22,9 +27,6 @@ AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS" AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
fi fi
############ libtool ###############
LT_INIT
############### Checks for header files ############### Checks for header files
AC_CHECK_HEADERS(stdint.h) AC_CHECK_HEADERS(stdint.h)
AC_CHECK_HEADERS(mm_malloc.h) AC_CHECK_HEADERS(mm_malloc.h)
@ -75,23 +77,6 @@ AC_ARG_WITH([fftw],
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
#
# What about the MKL library replacement for fftw3 ? How do we know if fftw_execute
# can be found in MKL?
#
AC_CHECK_LIB([fftw3],[fftw_execute],
[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])] [ac_fftw=yes],
[ac_fftw=no])
case ${ac_fftw} in
no)
echo WARNING libfftw3 not found FFT routines will not work
;;
yes)
AM_LDFLAGS="$AM_LDFLAGS -lfftw3 -lfftw3f"
esac
################ Get compiler informations ################ Get compiler informations
AC_LANG([C++]) AC_LANG([C++])
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory]) AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
@ -105,7 +90,6 @@ AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
############### Checks for library functions ############### Checks for library functions
CXXFLAGS_CPY=$CXXFLAGS CXXFLAGS_CPY=$CXXFLAGS
LDFLAGS_CPY=$LDFLAGS LDFLAGS_CPY=$LDFLAGS
LIBS_CPY=$LIBS
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
LDFLAGS="$AM_LDFLAGS $LDFLAGS" LDFLAGS="$AM_LDFLAGS $LDFLAGS"
AC_CHECK_FUNCS([gettimeofday]) AC_CHECK_FUNCS([gettimeofday])
@ -124,6 +108,11 @@ if test "${ac_LAPACK}x" != "nox"; then
AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[], AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[],
[AC_MSG_ERROR("LAPACK enabled but library not found")]) [AC_MSG_ERROR("LAPACK enabled but library not found")])
fi fi
AC_CHECK_LIB([fftw3],[fftw_execute],
[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])]
[have_fftw=true]
[LIBS="$LIBS -lfftw3 -lfftw3f"],
[AC_MSG_WARN([**** FFTW library not found, Grid can still compile but FFT-based routines will not work ****])])
CXXFLAGS=$CXXFLAGS_CPY CXXFLAGS=$CXXFLAGS_CPY
LDFLAGS=$LDFLAGS_CPY LDFLAGS=$LDFLAGS_CPY
@ -151,11 +140,14 @@ case ${ax_cv_cxx_compiler_vendor} in
AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
IMCI|KNC) IMCI|KNC)
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner]) AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
SIMD_FLAGS='';; SIMD_FLAGS='';;
GEN) GEN)
AC_DEFINE([GENERIC_VEC],[1],[generic vector code]) AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
SIMD_FLAGS='';; SIMD_FLAGS='';;
QPX|BGQ)
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
SIMD_FLAGS='';;
*) *)
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);; AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
esac;; esac;;
@ -336,12 +328,15 @@ Summary of configuration for $PACKAGE v$VERSION
- RNG choice : ${ac_RNG} - RNG choice : ${ac_RNG}
- GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` - GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
- LAPACK : ${ac_LAPACK} - LAPACK : ${ac_LAPACK}
- FFTW : ${ac_fftw} - FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi` - build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi` - graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
----- BUILD FLAGS ------------------------------------- ----- BUILD FLAGS -------------------------------------
- CXXFLAGS: "${AM_CXXFLAGS} ${CXXFLAGS}" - CXXFLAGS:
- LDFLAGS: "${AM_LDFLAGS} ${LDFLAGS}" `echo ${AM_CXXFLAGS} ${CXXFLAGS} | sed 's/ -/\n\t-/g' | sed 's/^-/\t-/g'`
- LIBS: "${LIBS} " - LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | sed 's/ -/\n\t-/g' | sed 's/^-/\t-/g'`
- LIBS:
`echo ${LIBS} | sed 's/ -/\n\t-/g' | sed 's/^-/\t-/g'`
------------------------------------------------------- -------------------------------------------------------
" "

View File

@ -17,8 +17,8 @@ endif
include Make.inc include Make.inc
include Eigen.inc include Eigen.inc
lib_LTLIBRARIES = libGrid.la lib_LIBRARIES = libGrid.a
libGrid_la_SOURCES = $(CCFILES) $(extra_sources) libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
libGrid_ladir = $(pkgincludedir) libGrid_adir = $(pkgincludedir)
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h

View File

@ -265,7 +265,7 @@
// _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0); // _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
} }
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) { inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0); //_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
local = _entries[ent]._is_local; local = _entries[ent]._is_local;
perm = _entries[ent]._permute; perm = _entries[ent]._permute;
if (perm) ptype = _permute_type[point]; if (perm) ptype = _permute_type[point];

View File

@ -1,300 +1,421 @@
/************************************************************************************* /*******************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_qpx.h Source file: ./lib/simd/Grid_qpx.h
Copyright (C) 2015 Copyright (C) 2016
Author: neo <cossu@post.kek.jp> Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ ******************************************************************************/
/* END LEGAL */
//----------------------------------------------------------------------
/*! @file Grid_qpx.h
@brief Optimization libraries for QPX instructions set for BG/Q
Using intrinsics
*/
// Time-stamp: <2015-05-27 11:30:21 neo>
//----------------------------------------------------------------------
// lot of undefined functions
namespace Grid {
namespace Optimization { namespace Optimization {
typedef struct
{
float v0,v1,v2,v3;
} vector4float;
inline std::ostream & operator<<(std::ostream& stream, const vector4double a)
{
stream << "{"<<vec_extract(a,0)<<","<<vec_extract(a,1)<<","<<vec_extract(a,2)<<","<<vec_extract(a,3)<<"}";
return stream;
};
inline std::ostream & operator<<(std::ostream& stream, const vector4float a)
{
stream << "{"<< a.v0 <<","<< a.v1 <<","<< a.v2 <<","<< a.v3 <<"}";
return stream;
};
struct Vsplat{ struct Vsplat{
//Complex float //Complex float
inline float operator()(float a, float b){ inline vector4float operator()(float a, float b){
return {a,b,a,b}; return (vector4float){a, b, a, b};
} }
// Real float // Real float
inline float operator()(float a){ inline vector4float operator()(float a){
return {a,a,a,a}; return (vector4float){a, a, a, a};
} }
//Complex double //Complex double
inline vector4double operator()(double a, double b){ inline vector4double operator()(double a, double b){
return {a,b,a,b}; return (vector4double){a, b, a, b};
} }
//Real double //Real double
inline vector4double operator()(double a){ inline vector4double operator()(double a){
return {a,a,a,a}; return (vector4double){a, a, a, a};
} }
//Integer //Integer
inline int operator()(Integer a){ inline int operator()(Integer a){
#error return a;
} }
}; };
struct Vstore{ struct Vstore{
//Float //Float
inline void operator()(float a, float* F){ inline void operator()(vector4double a, float *f){
assert(0); vec_st(a, 0, f);
} }
inline void operator()(vector4double a, vector4float &f){
vec_st(a, 0, (float *)(&f));
}
inline void operator()(vector4float a, float *f){
f[0] = a.v0;
f[1] = a.v1;
f[2] = a.v2;
f[3] = a.v3;
}
//Double //Double
inline void operator()(vector4double a, double* D){ inline void operator()(vector4double a, double *d){
assert(0); vec_st(a, 0, d);
} }
//Integer //Integer
inline void operator()(int a, Integer* I){ inline void operator()(int a, Integer *i){
assert(0); i[0] = a;
} }
}; };
struct Vstream{ struct Vstream{
//Float //Float
inline void operator()(float * a, float b){ inline void operator()(float *f, vector4double a){
assert(0); vec_st(a, 0, f);
}
//Double
inline void operator()(double * a, vector4double b){
assert(0);
} }
inline void operator()(vector4float f, vector4double a){
vec_st(a, 0, (float *)(&f));
}
inline void operator()(float *f, vector4float a){
f[0] = a.v0;
f[1] = a.v1;
f[2] = a.v2;
f[3] = a.v3;
}
//Double
inline void operator()(double *d, vector4double a){
vec_st(a, 0, d);
}
}; };
struct Vset{ struct Vset{
// Complex float // Complex float
inline float operator()(Grid::ComplexF *a){ inline vector4float operator()(Grid::ComplexF *a){
return {a[0].real(),a[0].imag(),a[1].real(),a[1].imag(),a[2].real(),a[2].imag(),a[3].real(),a[3].imag()}; return (vector4float){a[0].real(), a[0].imag(), a[1].real(), a[1].imag()};
} }
// Complex double // Complex double
inline vector4double operator()(Grid::ComplexD *a){ inline vector4double operator()(Grid::ComplexD *a){
return {a[0].real(),a[0].imag(),a[1].real(),a[1].imag(),a[2].real(),a[2].imag(),a[3].real(),a[3].imag()}; return vec_ld(0, (double *)a);
} }
// Real float
inline float operator()(float *a){ // Real float
return {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]}; inline vector4float operator()(float *a){
return (vector4float){a[0], a[1], a[2], a[3]};
} }
inline vector4double operator()(vector4float a){
return vec_ld(0, (float *)(&a));
}
// Real double // Real double
inline vector4double operator()(double *a){ inline vector4double operator()(double *a){
return {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]}; return vec_ld(0, a);
} }
// Integer // Integer
inline int operator()(Integer *a){ inline int operator()(Integer *a){
#error return a[0];
} }
}; };
template <typename Out_type, typename In_type> template <typename Out_type, typename In_type>
struct Reduce{ struct Reduce{
//Need templated class to overload output type //Need templated class to overload output type
//General form must generate error if compiled //General form must generate error if compiled
inline Out_type operator()(In_type in){ inline Out_type operator()(In_type in){
printf("Error, using wrong Reduce function\n"); printf("Error, using wrong Reduce function\n");
exit(1); exit(1);
return 0; return 0;
} }
}; };
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Arithmetic operations // Arithmetic operations
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
#define FLOAT_WRAP_2(fn, pref)\
pref vector4float fn(vector4float a, vector4float b)\
{\
vector4double ad, bd, rd;\
vector4float r;\
\
ad = Vset()(a);\
bd = Vset()(b);\
rd = fn(ad, bd);\
Vstore()(rd, r);\
\
return r;\
}
#define FLOAT_WRAP_1(fn, pref)\
pref vector4float fn(vector4float a)\
{\
vector4double ad, rd;\
vector4float r;\
\
ad = Vset()(a);\
rd = fn(ad);\
Vstore()(rd, r);\
\
return r;\
}
struct Sum{ struct Sum{
//Complex/Real float
inline float operator()(float a, float b){
#error
}
//Complex/Real double //Complex/Real double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
return vec_add(a,b); return vec_add(a, b);
} }
//Complex/Real float
FLOAT_WRAP_2(operator(), inline)
//Integer //Integer
inline int operator()(int a, int b){ inline int operator()(int a, int b){
#error return a + b;
} }
}; };
struct Sub{ struct Sub{
//Complex/Real float
inline float operator()(float a, float b){
#error
}
//Complex/Real double //Complex/Real double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
#error return vec_sub(a, b);
} }
//Complex/Real float
FLOAT_WRAP_2(operator(), inline)
//Integer //Integer
inline floati operator()(int a, int b){ inline int operator()(int a, int b){
#error return a - b;
} }
}; };
struct MultComplex{ struct MultComplex{
// Complex float
inline float operator()(float a, float b){
#error
}
// Complex double // Complex double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
#error return vec_xxnpmadd(a, b, vec_xmul(b, a));
} }
};
// Complex float
FLOAT_WRAP_2(operator(), inline)
};
struct Mult{ struct Mult{
// Real float
inline float operator()(float a, float b){
#error
}
// Real double // Real double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
#error return vec_mul(a, b);
} }
// Real float
FLOAT_WRAP_2(operator(), inline)
// Integer // Integer
inline int operator()(int a, int b){ inline int operator()(int a, int b){
#error return a*b;
} }
}; };
struct Conj{ struct Conj{
// Complex single
inline float operator()(float in){
assert(0);
}
// Complex double // Complex double
inline vector4double operator()(vector4double in){ inline vector4double operator()(vector4double v){
assert(0); return vec_mul(v, (vector4double){1., -1., 1., -1.});
} }
// do not define for integer input
};
// Complex float
FLOAT_WRAP_1(operator(), inline)
};
struct TimesMinusI{ struct TimesMinusI{
//Complex single
inline float operator()(float in, float ret){
assert(0);
}
//Complex double //Complex double
inline vector4double operator()(vector4double in, vector4double ret){ inline vector4double operator()(vector4double v, vector4double ret){
assert(0); return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.},
(vector4double){0., 0., 0., 0.});
} }
// Complex float
FLOAT_WRAP_2(operator(), inline)
}; };
struct TimesI{ struct TimesI{
//Complex single
inline float operator()(float in, float ret){
}
//Complex double //Complex double
inline vector4double operator()(vector4double in, vector4double ret){ inline vector4double operator()(vector4double v, vector4double ret){
return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.},
(vector4double){0., 0., 0., 0.});
} }
// Complex float
FLOAT_WRAP_2(operator(), inline)
}; };
struct Permute{
//Complex double
static inline vector4double Permute0(vector4double v){ //0123 -> 2301
return vec_perm(v, v, vec_gpci(02301));
};
static inline vector4double Permute1(vector4double v){ //0123 -> 1032
return vec_perm(v, v, vec_gpci(01032));
};
static inline vector4double Permute2(vector4double v){
return v;
};
static inline vector4double Permute3(vector4double v){
return v;
};
// Complex float
FLOAT_WRAP_1(Permute0, static inline)
FLOAT_WRAP_1(Permute1, static inline)
FLOAT_WRAP_1(Permute2, static inline)
FLOAT_WRAP_1(Permute3, static inline)
};
struct Rotate{
static inline vector4double rotate(vector4double v, int n){
switch(n){
case 0:
return v;
break;
case 1:
return vec_perm(v, v, vec_gpci(01230));
break;
case 2:
return vec_perm(v, v, vec_gpci(02301));
break;
case 3:
return vec_perm(v, v, vec_gpci(03012));
break;
default: assert(0);
}
}
static inline vector4float rotate(vector4float v, int n){
vector4double vd, rd;
vector4float r;
////////////////////////////////////////////// vd = Vset()(v);
// Some Template specialization rd = rotate(vd, n);
Vstore()(rd, r);
return r;
}
};
//Complex float Reduce //Complex float Reduce
template<> template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, float>::operator()(float in){ inline Grid::ComplexF
assert(0); Reduce<Grid::ComplexF, vector4float>::operator()(vector4float v) { //2 complex
vector4float v1,v2;
v1 = Optimization::Permute::Permute0(v);
v1 = Optimization::Sum()(v1, v);
return Grid::ComplexF(v1.v0, v1.v1);
} }
//Real float Reduce //Real float Reduce
template<> template<>
inline Grid::RealF Reduce<Grid::RealF, float>::operator()(float in){ inline Grid::RealF
assert(0); Reduce<Grid::RealF, vector4float>::operator()(vector4float v){ //4 floats
vector4float v1,v2;
v1 = Optimization::Permute::Permute0(v);
v1 = Optimization::Sum()(v1, v);
v2 = Optimization::Permute::Permute1(v1);
v1 = Optimization::Sum()(v1, v2);
return v1.v0;
} }
//Complex double Reduce //Complex double Reduce
template<> template<>
inline Grid::ComplexD Reduce<Grid::ComplexD, vector4double>::operator()(vector4double in){ inline Grid::ComplexD
assert(0); Reduce<Grid::ComplexD, vector4double>::operator()(vector4double v){ //2 complex
vector4double v1;
v1 = Optimization::Permute::Permute0(v);
v1 = vec_add(v1, v);
return Grid::ComplexD(vec_extract(v1, 0), vec_extract(v1, 1));
} }
//Real double Reduce //Real double Reduce
template<> template<>
inline Grid::RealD Reduce<Grid::RealD, vector4double>::operator()(vector4double in){ inline Grid::RealD
assert(0); Reduce<Grid::RealD, vector4double>::operator()(vector4double v){ //4 doubles
} vector4double v1,v2;
v1 = Optimization::Permute::Permute0(v);
v1 = vec_add(v1, v);
v2 = Optimization::Permute::Permute1(v1);
v1 = vec_add(v1, v2);
return vec_extract(v1, 0);
}
//Integer Reduce //Integer Reduce
template<> template<>
inline Integer Reduce<Integer, floati>::operator()(float in){ inline Integer Reduce<Integer, int>::operator()(int in){
// FIXME unimplemented
printf("Reduce : Missing integer implementation -> FIX\n");
assert(0); assert(0);
} }
} }
////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Here assign types // Here assign types
namespace Grid { typedef Optimization::vector4float SIMD_Ftype; // Single precision type
typedef float SIMD_Ftype __attribute__ ((vector_size (16))); // Single precision type typedef vector4double SIMD_Dtype; // Double precision type
typedef vector4double SIMD_Dtype; // Double precision type typedef int SIMD_Itype; // Integer type
typedef int SIMD_Itype; // Integer type
inline void v_prefetch0(int size, const char *ptr){}; // prefetch utilities
inline void v_prefetch0(int size, const char *ptr){};
// Function name aliases inline void prefetch_HINT_T0(const char *ptr){};
typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Vstore VstoreSIMD;
typedef Optimization::Vset VsetSIMD;
typedef Optimization::Vstream VstreamSIMD;
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
// Arithmetic operations // Function name aliases
typedef Optimization::Sum SumSIMD; typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Sub SubSIMD; typedef Optimization::Vstore VstoreSIMD;
typedef Optimization::Mult MultSIMD; typedef Optimization::Vset VsetSIMD;
typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::Vstream VstreamSIMD;
typedef Optimization::Conj ConjSIMD; template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;
// Arithmetic operations
typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD;
typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;
} }

View File

@ -157,10 +157,9 @@ void Tester(const functor &func)
std::cout << GridLogMessage << " " << func.name() << std::endl; std::cout << GridLogMessage << " " << func.name() << std::endl;
std::cout << GridLogDebug << v_input1 << std::endl; std::cout << GridLogDebug << v_input1 << std::endl;
std::cout << GridLogDebug << v_input2 << std::endl;
std::cout << GridLogDebug << v_result << std::endl; std::cout << GridLogDebug << v_result << std::endl;
int ok=0; int ok=0;
for(int i=0;i<Nsimd;i++){ for(int i=0;i<Nsimd;i++){
if ( abs(reference[i]-result[i])>1.0e-7){ if ( abs(reference[i]-result[i])>1.0e-7){