1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 15:55:37 +00:00

Merge branch 'coppolachan-master'

This commit is contained in:
Peter Boyle 2015-05-19 15:05:32 +01:00
commit 5f0530b68a
16 changed files with 526 additions and 128 deletions

2
README
View File

@ -33,6 +33,8 @@ MPI parallelism is UNIMPLEMENTED and for now only OpenMP and SIMD parallelism is
by setting variables in the command line or in the environment. Here by setting variables in the command line or in the environment. Here
is are examples: is are examples:
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1 ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2 ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2

View File

@ -37,6 +37,8 @@ MPI, OpenMP, and SIMD parallelism are present in the library.
by setting variables in the command line or in the environment. Here by setting variables in the command line or in the environment. Here
are examples: are examples:
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1 ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2 ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2

104
configure vendored
View File

@ -4503,6 +4503,11 @@ _ACEOF
# Checks for library functions. # Checks for library functions.
echo
echo Checking libraries
echo :::::::::::::::::::::::::::::::::::::::::::
for ac_func in gettimeofday for ac_func in gettimeofday
do : do :
ac_fn_cxx_check_func "$LINENO" "gettimeofday" "ac_cv_func_gettimeofday" ac_fn_cxx_check_func "$LINENO" "gettimeofday" "ac_cv_func_gettimeofday"
@ -4515,6 +4520,105 @@ fi
done done
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __gmpf_init in -lgmp" >&5
$as_echo_n "checking for __gmpf_init in -lgmp... " >&6; }
if ${ac_cv_lib_gmp___gmpf_init+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-lgmp $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char __gmpf_init ();
int
main ()
{
return __gmpf_init ();
;
return 0;
}
_ACEOF
if ac_fn_cxx_try_link "$LINENO"; then :
ac_cv_lib_gmp___gmpf_init=yes
else
ac_cv_lib_gmp___gmpf_init=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gmp___gmpf_init" >&5
$as_echo "$ac_cv_lib_gmp___gmpf_init" >&6; }
if test "x$ac_cv_lib_gmp___gmpf_init" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_LIBGMP 1
_ACEOF
LIBS="-lgmp $LIBS"
else
as_fn_error $? "GNU Multiple Precision GMP library was not found in your system.
Please install or provide the correct path to your installation
Info at: http://www.gmplib.org" "$LINENO" 5
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpfr_init in -lmpfr" >&5
$as_echo_n "checking for mpfr_init in -lmpfr... " >&6; }
if ${ac_cv_lib_mpfr_mpfr_init+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-lmpfr $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char mpfr_init ();
int
main ()
{
return mpfr_init ();
;
return 0;
}
_ACEOF
if ac_fn_cxx_try_link "$LINENO"; then :
ac_cv_lib_mpfr_mpfr_init=yes
else
ac_cv_lib_mpfr_mpfr_init=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpfr_mpfr_init" >&5
$as_echo "$ac_cv_lib_mpfr_mpfr_init" >&6; }
if test "x$ac_cv_lib_mpfr_mpfr_init" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_LIBMPFR 1
_ACEOF
LIBS="-lmpfr $LIBS"
else
as_fn_error $? "GNU Multiple Precision MPFR library was not found in your system.
Please install or provide the correct path to your installation
Info at: http://www.mpfr.org/" "$LINENO" 5
fi

View File

@ -3,7 +3,7 @@
# #
# Project Grid package # Project Grid package
# #
# Time-stamp: <2015-05-18 17:14:20 neo> # Time-stamp: <2015-05-19 13:51:08 neo>
AC_PREREQ([2.69]) AC_PREREQ([2.69])
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk]) AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
@ -46,8 +46,22 @@ AC_TYPE_UINT32_T
AC_TYPE_UINT64_T AC_TYPE_UINT64_T
# Checks for library functions. # Checks for library functions.
echo
echo Checking libraries
echo :::::::::::::::::::::::::::::::::::::::::::
AC_CHECK_FUNCS([gettimeofday]) AC_CHECK_FUNCS([gettimeofday])
AC_CHECK_LIB([gmp],[__gmpf_init],,
[AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
Please install or provide the correct path to your installation
Info at: http://www.gmplib.org)])
AC_CHECK_LIB([mpfr],[mpfr_init],,
[AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
Please install or provide the correct path to your installation
Info at: http://www.mpfr.org/)])

View File

@ -36,6 +36,12 @@
/* Define to 1 if you have the <inttypes.h> header file. */ /* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1 #define HAVE_INTTYPES_H 1
/* Define to 1 if you have the `gmp' library (-lgmp). */
#define HAVE_LIBGMP 1
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
#define HAVE_LIBMPFR 1
/* Define to 1 if you have the <malloc.h> header file. */ /* Define to 1 if you have the <malloc.h> header file. */
#define HAVE_MALLOC_H 1 #define HAVE_MALLOC_H 1

View File

@ -35,6 +35,12 @@
/* Define to 1 if you have the <inttypes.h> header file. */ /* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H #undef HAVE_INTTYPES_H
/* Define to 1 if you have the `gmp' library (-lgmp). */
#undef HAVE_LIBGMP
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
#undef HAVE_LIBMPFR
/* Define to 1 if you have the <malloc.h> header file. */ /* Define to 1 if you have the <malloc.h> header file. */
#undef HAVE_MALLOC_H #undef HAVE_MALLOC_H

View File

@ -95,5 +95,8 @@ nobase_include_HEADERS = algorithms/approx/bigfloat.h\
simd/Grid_vComplexF.h \ simd/Grid_vComplexF.h \
simd/Grid_vInteger.h \ simd/Grid_vInteger.h \
simd/Grid_vRealD.h \ simd/Grid_vRealD.h \
simd/Grid_vRealF.h simd/Grid_vRealF.h \
simd/Grid_vector_types.h \
simd/Grid_sse4.h

0
lib/algorithms/approx/Remez.cc Executable file → Normal file
View File

0
lib/algorithms/approx/Remez.h Executable file → Normal file
View File

0
lib/algorithms/approx/Zolotarev.cc Executable file → Normal file
View File

0
lib/algorithms/approx/Zolotarev.h Executable file → Normal file
View File

0
lib/algorithms/approx/bigfloat.h Executable file → Normal file
View File

0
lib/simd/.dirstamp Normal file
View File

194
lib/simd/Grid_sse4.h Normal file
View File

@ -0,0 +1,194 @@
//----------------------------------------------------------------------
/*! @file Grid_sse4.h
@brief Optimization libraries
*/
// Time-stamp: <2015-05-19 17:06:51 neo>
//----------------------------------------------------------------------
#include <pmmintrin.h>
namespace Optimization {
struct Vsplat{
//Complex float
inline __m128 operator()(float a, float b){
return _mm_set_ps(b,a,b,a);
}
// Real float
inline __m128 operator()(float a){
return _mm_set_ps(a,a,a,a);
}
//Complex double
inline __m128d operator()(double a, double b){
return _mm_set_pd(b,a);
}
//Real double
inline __m128d operator()(double a){
return _mm_set_pd(a,a);
}
//Integer
inline __m128i operator()(Integer a){
return _mm_set1_epi32(a);
}
};
struct Vstore{
//Float
inline void operator()(__m128 a, float* F){
_mm_store_ps(F,a);
}
//Double
inline void operator()(__m128d a, double* D){
_mm_store_pd(D,a);
}
//Integer
inline void operator()(__m128i a, Integer* I){
_mm_store_si128((__m128i *)I,a);
}
};
struct Vset{
// Complex float
inline __m128 operator()(Grid::ComplexF *a){
return _mm_set_ps(a[1].imag(), a[1].real(),a[0].imag(),a[0].real());
}
// Complex double
inline __m128d operator()(Grid::ComplexD *a){
return _mm_set_pd(a[0].imag(),a[0].real());
}
// Real float
inline __m128 operator()(float *a){
return _mm_set_ps(a[3],a[2],a[1],a[0]);
}
// Real double
inline __m128d operator()(double *a){
return _mm_set_pd(a[1],a[0]);
}
// Integer
inline __m128i operator()(Integer *a){
return _mm_set_epi32(a[0],a[1],a[2],a[3]);
}
};
struct Reduce{
//Complex float
inline Grid::ComplexF operator()(__m128 in){
union {
__m128 v1;
float f[4];
} u128;
u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
return Grid::ComplexF(u128.f[0], u128.f[1]);
}
//Complex double
inline Grid::ComplexD operator()(__m128d in){
printf("Missing complex double implementation -> FIX\n");
return Grid::ComplexD(0,0); // FIXME wrong
}
};
/////////////////////////////////////////////////////
// Arithmetic operations
/////////////////////////////////////////////////////
struct Sum{
//Complex/Real float
inline __m128 operator()(__m128 a, __m128 b){
return _mm_add_ps(a,b);
}
//Complex/Real double
inline __m128d operator()(__m128d a, __m128d b){
return _mm_add_pd(a,b);
}
//Integer
inline __m128i operator()(__m128i a, __m128i b){
return _mm_add_epi32(a,b);
}
};
struct Sub{
//Complex/Real float
inline __m128 operator()(__m128 a, __m128 b){
return _mm_sub_ps(a,b);
}
//Complex/Real double
inline __m128d operator()(__m128d a, __m128d b){
return _mm_sub_pd(a,b);
}
//Integer
inline __m128i operator()(__m128i a, __m128i b){
return _mm_sub_epi32(a,b);
}
};
struct MultComplex{
// Complex float
inline __m128 operator()(__m128 a, __m128 b){
__m128 ymm0,ymm1,ymm2;
ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
return _mm_addsub_ps(ymm0,ymm1);
}
// Complex double
inline __m128d operator()(__m128d a, __m128d b){
__m128d ymm0,ymm1,ymm2;
ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar,
ymm0 = _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01
ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11
ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
return _mm_addsub_pd(ymm0,ymm1);
}
};
struct Mult{
// Real float
inline __m128 operator()(__m128 a, __m128 b){
return _mm_mul_ps(a,b);
}
// Real double
inline __m128d operator()(__m128d a, __m128d b){
return _mm_mul_pd(a,b);
}
// Integer
inline __m128i operator()(__m128i a, __m128i b){
return _mm_mul_epi32(a,b);
}
};
}
// Here assign types
namespace Grid {
typedef __m128 SIMD_Ftype; // Single precision type
typedef __m128d SIMD_Dtype; // Double precision type
typedef __m128i SIMD_Itype; // Integer type
// Function names
typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Vstore VstoreSIMD;
// Arithmetic operations
typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD;
typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::Vset VsetSIMD;
}

View File

@ -1,6 +1,14 @@
//---------------------------------------------------------------------------
/*! @file Grid_vector_types.h
@brief Defines templated class Grid_simd to deal with inner vector types
*/
// Time-stamp: <2015-05-19 17:20:36 neo>
//---------------------------------------------------------------------------
#ifndef GRID_VECTOR_TYPES #ifndef GRID_VECTOR_TYPES
#define GRID_VECTOR_TYPES #define GRID_VECTOR_TYPES
#include "Grid_sse4.h"
namespace Grid { namespace Grid {
@ -13,31 +21,32 @@ namespace Grid {
struct RealPart< std::complex<T> >{ struct RealPart< std::complex<T> >{
typedef T type; typedef T type;
}; };
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
// Check for complexity with type traits // Check for complexity with type traits
template <typename T> template <typename T>
struct is_complex : std::false_type {}; struct is_complex : std::false_type {};
template < typename T > template < typename T >
struct is_complex< std::complex<T> >: std::true_type {}; struct is_complex< std::complex<T> >: std::true_type {};
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Define the operation templates functors // Define the operation templates functors
template < class SIMD, class Operation > // general forms to allow for vsplat syntax
SIMD binary(SIMD src_1, SIMD src_2, Operation op){ // need explicit declaration of types when used since
// clang cannot automatically determine the output type sometimes
template < class Out, class Input1, class Input2, class Operation >
Out binary(Input1 src_1, Input2 src_2, Operation op){
return op(src_1, src_2); return op(src_1, src_2);
} }
template < class SIMD, class Operation > template < class SIMDout, class Input, class Operation >
SIMD unary(SIMD src, Operation op){ SIMDout unary(Input src, Operation op){
return op(src); return op(src);
} }
/////////////////////////////////////////////// ///////////////////////////////////////////////
/* /*
@brief Grid_simd class for the SIMD vector type operations @brief Grid_simd class for the SIMD vector type operations
*/ */
template < class Scalar_type, class Vector_type > template < class Scalar_type, class Vector_type >
class Grid_simd { class Grid_simd {
@ -70,75 +79,90 @@ namespace Grid {
/////////////////////////////////////////////// ///////////////////////////////////////////////
// mac, mult, sub, add, adj // mac, mult, sub, add, adj
// Should do an AVX2 version with mac.
/////////////////////////////////////////////// ///////////////////////////////////////////////
friend inline void mac (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); }; friend inline void mac (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); };
friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); } friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); }
friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); } friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); } friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
//not for integer types... FIXME
friend inline Grid_simd adj(const Grid_simd &in){ return conjugate(in); } friend inline Grid_simd adj(const Grid_simd &in){ return conjugate(in); }
////////////////////////////////// ///////////////////////////////////////////////
// Initialise to 1,0,i // Initialise to 1,0,i for the correct types
////////////////////////////////// ///////////////////////////////////////////////
// if not complex overload here // if not complex overload here
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); } friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); }
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); } friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); }
// overload for complex type // overload for complex type
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); } friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); }
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); } friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor?
// For integral type // For integral type
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
friend inline void vone(Grid_simd &ret) { vsplat(ret,1); } friend inline void vone(Grid_simd &ret) { vsplat(ret,1); }
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); } friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); }
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
friend inline void vfalse(vInteger &ret){vsplat(ret,0);}
// do not compile if real or integer, send an error message from the compiler // do not compile if real or integer, send an error message from the compiler
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);} friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);}
//////////////////////////////////// ////////////////////////////////////
// Arithmetic operator overloads +,-,* // Arithmetic operator overloads +,-,*
//////////////////////////////////// ////////////////////////////////////
friend inline Grid_simd operator + (Grid_simd a, Grid_simd b) friend inline Grid_simd operator + (Grid_simd a, Grid_simd b)
{ {
vComplexF ret; Grid_simd ret;
// FIXME call the binary op ret.v = binary<Vector_type>(a.v, b.v, SumSIMD());
return ret; return ret;
}; };
friend inline Grid_simd operator - (Grid_simd a, Grid_simd b) friend inline Grid_simd operator - (Grid_simd a, Grid_simd b)
{ {
vComplexF ret; Grid_simd ret;
// FIXME call the binary op ret.v = binary<Vector_type>(a.v, b.v, SubSIMD());
return ret; return ret;
}; };
// Distinguish between complex types and others
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
{ {
vComplexF ret; Grid_simd ret;
// FIXME call the binary op ret.v = binary<Vector_type>(a.v,b.v, MultComplexSIMD());
return ret; return ret;
}; };
// Real/Integer types
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
{
Grid_simd ret;
ret.v = binary<Vector_type>(a.v,b.v, MultSIMD());
return ret;
};
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// FIXME: gonna remove these load/store, get, set, prefetch // FIXME: gonna remove these load/store, get, set, prefetch
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
friend inline void vset(Grid_simd &ret, Scalar_type *a){ friend inline void vset(Grid_simd &ret, Scalar_type *a){
// FIXME set ret.v = unary<Vector_type>(a, VsetSIMD());
} }
/////////////////////// ///////////////////////
@ -155,26 +179,25 @@ namespace Grid {
// this only for the complex version // this only for the complex version
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 > template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
friend inline void vsplat(Grid_simd &ret,Real a, Real b){ friend inline void vsplat(Grid_simd &ret,Real a, Real b){
// FIXME add operator ret.v = binary<Vector_type>(a, b, VsplatSIMD());
} }
//if real fill with a, if complex fill with a in the real part //if real fill with a, if complex fill with a in the real part (first function above)
friend inline void vsplat(Grid_simd &ret,Real a){ friend inline void vsplat(Grid_simd &ret,Real a){
// FIXME add operator ret.v = unary<Vector_type>(a, VsplatSIMD());
} }
friend inline void vstore(const Grid_simd &ret, Scalar_type *a){ friend inline void vstore(const Grid_simd &ret, Scalar_type *a){
//FIXME binary<void>(ret.v, (Real*)a, VstoreSIMD());
} }
friend inline void vprefetch(const Grid_simd &v) friend inline void vprefetch(const Grid_simd &v)
{ {
_mm_prefetch((const char*)&v.v,_MM_HINT_T0); _mm_prefetch((const char*)&v.v,_MM_HINT_T0);
} }
friend inline Scalar_type Reduce(const Grid_simd & in) friend inline Scalar_type Reduce(const Grid_simd & in)
{ {
// FIXME add operator // FIXME add operator
@ -221,6 +244,7 @@ namespace Grid {
inline Grid_simd &operator *=(const Grid_simd &r) { inline Grid_simd &operator *=(const Grid_simd &r) {
*this = (*this)*r; *this = (*this)*r;
return *this; return *this;
// return (*this)*r; ?
} }
inline Grid_simd &operator +=(const Grid_simd &r) { inline Grid_simd &operator +=(const Grid_simd &r) {
*this = *this+r; *this = *this+r;
@ -233,6 +257,12 @@ namespace Grid {
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
{
Gpermute<Grid_simd>(y,b,perm);
}
/*
friend inline void permute(Grid_simd &y,Grid_simd b,int perm) friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
{ {
Gpermute<Grid_simd>(y,b,perm); Gpermute<Grid_simd>(y,b,perm);
@ -253,7 +283,7 @@ namespace Grid {
{ {
Gextract<Grid_simd,Scalar_type>(y,extracted); Gextract<Grid_simd,Scalar_type>(y,extracted);
} }
*/
};// end of Grid_simd class definition };// end of Grid_simd class definition
@ -286,11 +316,11 @@ namespace Grid {
// Define available types (now change names to avoid clashing) // Define available types (now change names to avoid clashing)
typedef __m128 SIMD_type;// decided at compilation time
typedef Grid_simd< float , SIMD_type > MyRealF; typedef Grid_simd< float , SIMD_Ftype > MyRealF;
typedef Grid_simd< double , SIMD_type > MyRealD; typedef Grid_simd< double , SIMD_Dtype > MyRealD;
typedef Grid_simd< std::complex< float > , SIMD_type > MyComplexF; typedef Grid_simd< std::complex< float > , SIMD_Ftype > MyComplexF;
typedef Grid_simd< std::complex< double >, SIMD_type > MyComplexD; typedef Grid_simd< std::complex< double >, SIMD_Dtype > MyComplexD;

View File

@ -1,5 +1,9 @@
#include "Grid.h" #include "Grid.h"
//DEBUG
#include "simd/Grid_vector_types.h"
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
@ -151,6 +155,39 @@ int main (int argc, char ** argv)
scMat = sMat*scMat; // LatticeSpinColourMatrix = LatticeSpinMatrix * LatticeSpinColourMatrix scMat = sMat*scMat; // LatticeSpinColourMatrix = LatticeSpinMatrix * LatticeSpinColourMatrix
#ifdef SSE4
///////// Tests the new class Grid_simd
std::complex<double> ctest(3.0,2.0);
std::complex<float> ctestf(3.0,2.0);
MyComplexF TestMe1(1.0); // fill real part
MyComplexD TestMe2(ctest);
MyComplexD TestMe3(ctest);// compiler generate conversion of basic types
//MyRealF TestMe5(ctest);// Must generate compiler error
MyRealD TestMe4(2.0);
MyComplexF TestMe6(ctestf);
MyComplexF TestMe7(ctestf);
MyComplexD TheSum= TestMe2*TestMe3;
MyComplexF TheSumF= TestMe6*TestMe7;
double dsum[2];
_mm_store_pd(dsum, TheSum.v);
for (int i =0; i< 2; i++)
printf("%f\n", dsum[i]);
float fsum[4];
_mm_store_ps(fsum, TheSumF.v);
for (int i =0; i< 4; i++)
printf("%f\n", fsum[i]);
vstore(TheSum, &ctest);
std::cout << ctest<< std::endl;
#endif
///////////////////////
// Non-lattice (const objects) * Lattice // Non-lattice (const objects) * Lattice
ColourMatrix cm; ColourMatrix cm;
SpinColourMatrix scm; SpinColourMatrix scm;