mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merged
Merge branch 'master' of https://github.com/coppolachan/Grid into coppolachan-master Conflicts: lib/simd/Grid_vector_types.h
This commit is contained in:
commit
fde7f8d6b9
2
README
2
README
@ -33,6 +33,8 @@ MPI parallelism is UNIMPLEMENTED and for now only OpenMP and SIMD parallelism is
|
|||||||
by setting variables in the command line or in the environment. Here
|
by setting variables in the command line or in the environment. Here
|
||||||
is are examples:
|
is are examples:
|
||||||
|
|
||||||
|
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
|
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
|
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
|
||||||
|
@ -37,6 +37,8 @@ MPI, OpenMP, and SIMD parallelism are present in the library.
|
|||||||
by setting variables in the command line or in the environment. Here
|
by setting variables in the command line or in the environment. Here
|
||||||
are examples:
|
are examples:
|
||||||
|
|
||||||
|
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
|
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
|
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
|
||||||
|
104
configure
vendored
104
configure
vendored
@ -4503,6 +4503,11 @@ _ACEOF
|
|||||||
|
|
||||||
|
|
||||||
# Checks for library functions.
|
# Checks for library functions.
|
||||||
|
echo
|
||||||
|
echo Checking libraries
|
||||||
|
echo :::::::::::::::::::::::::::::::::::::::::::
|
||||||
|
|
||||||
|
|
||||||
for ac_func in gettimeofday
|
for ac_func in gettimeofday
|
||||||
do :
|
do :
|
||||||
ac_fn_cxx_check_func "$LINENO" "gettimeofday" "ac_cv_func_gettimeofday"
|
ac_fn_cxx_check_func "$LINENO" "gettimeofday" "ac_cv_func_gettimeofday"
|
||||||
@ -4515,6 +4520,105 @@ fi
|
|||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __gmpf_init in -lgmp" >&5
|
||||||
|
$as_echo_n "checking for __gmpf_init in -lgmp... " >&6; }
|
||||||
|
if ${ac_cv_lib_gmp___gmpf_init+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
ac_check_lib_save_LIBS=$LIBS
|
||||||
|
LIBS="-lgmp $LIBS"
|
||||||
|
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
/* Override any GCC internal prototype to avoid an error.
|
||||||
|
Use char because int might match the return type of a GCC
|
||||||
|
builtin and then its argument prototype would still apply. */
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
char __gmpf_init ();
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
return __gmpf_init ();
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
if ac_fn_cxx_try_link "$LINENO"; then :
|
||||||
|
ac_cv_lib_gmp___gmpf_init=yes
|
||||||
|
else
|
||||||
|
ac_cv_lib_gmp___gmpf_init=no
|
||||||
|
fi
|
||||||
|
rm -f core conftest.err conftest.$ac_objext \
|
||||||
|
conftest$ac_exeext conftest.$ac_ext
|
||||||
|
LIBS=$ac_check_lib_save_LIBS
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gmp___gmpf_init" >&5
|
||||||
|
$as_echo "$ac_cv_lib_gmp___gmpf_init" >&6; }
|
||||||
|
if test "x$ac_cv_lib_gmp___gmpf_init" = xyes; then :
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define HAVE_LIBGMP 1
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
LIBS="-lgmp $LIBS"
|
||||||
|
|
||||||
|
else
|
||||||
|
as_fn_error $? "GNU Multiple Precision GMP library was not found in your system.
|
||||||
|
Please install or provide the correct path to your installation
|
||||||
|
Info at: http://www.gmplib.org" "$LINENO" 5
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpfr_init in -lmpfr" >&5
|
||||||
|
$as_echo_n "checking for mpfr_init in -lmpfr... " >&6; }
|
||||||
|
if ${ac_cv_lib_mpfr_mpfr_init+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
ac_check_lib_save_LIBS=$LIBS
|
||||||
|
LIBS="-lmpfr $LIBS"
|
||||||
|
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
/* Override any GCC internal prototype to avoid an error.
|
||||||
|
Use char because int might match the return type of a GCC
|
||||||
|
builtin and then its argument prototype would still apply. */
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
char mpfr_init ();
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
return mpfr_init ();
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
if ac_fn_cxx_try_link "$LINENO"; then :
|
||||||
|
ac_cv_lib_mpfr_mpfr_init=yes
|
||||||
|
else
|
||||||
|
ac_cv_lib_mpfr_mpfr_init=no
|
||||||
|
fi
|
||||||
|
rm -f core conftest.err conftest.$ac_objext \
|
||||||
|
conftest$ac_exeext conftest.$ac_ext
|
||||||
|
LIBS=$ac_check_lib_save_LIBS
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpfr_mpfr_init" >&5
|
||||||
|
$as_echo "$ac_cv_lib_mpfr_mpfr_init" >&6; }
|
||||||
|
if test "x$ac_cv_lib_mpfr_mpfr_init" = xyes; then :
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define HAVE_LIBMPFR 1
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
LIBS="-lmpfr $LIBS"
|
||||||
|
|
||||||
|
else
|
||||||
|
as_fn_error $? "GNU Multiple Precision MPFR library was not found in your system.
|
||||||
|
Please install or provide the correct path to your installation
|
||||||
|
Info at: http://www.mpfr.org/" "$LINENO" 5
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
16
configure.ac
16
configure.ac
@ -3,7 +3,7 @@
|
|||||||
#
|
#
|
||||||
# Project Grid package
|
# Project Grid package
|
||||||
#
|
#
|
||||||
# Time-stamp: <2015-05-18 17:14:20 neo>
|
# Time-stamp: <2015-05-19 13:51:08 neo>
|
||||||
|
|
||||||
AC_PREREQ([2.69])
|
AC_PREREQ([2.69])
|
||||||
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
|
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
|
||||||
@ -46,8 +46,22 @@ AC_TYPE_UINT32_T
|
|||||||
AC_TYPE_UINT64_T
|
AC_TYPE_UINT64_T
|
||||||
|
|
||||||
# Checks for library functions.
|
# Checks for library functions.
|
||||||
|
echo
|
||||||
|
echo Checking libraries
|
||||||
|
echo :::::::::::::::::::::::::::::::::::::::::::
|
||||||
|
|
||||||
|
|
||||||
AC_CHECK_FUNCS([gettimeofday])
|
AC_CHECK_FUNCS([gettimeofday])
|
||||||
|
|
||||||
|
AC_CHECK_LIB([gmp],[__gmpf_init],,
|
||||||
|
[AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
||||||
|
Please install or provide the correct path to your installation
|
||||||
|
Info at: http://www.gmplib.org)])
|
||||||
|
|
||||||
|
AC_CHECK_LIB([mpfr],[mpfr_init],,
|
||||||
|
[AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
||||||
|
Please install or provide the correct path to your installation
|
||||||
|
Info at: http://www.mpfr.org/)])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -36,6 +36,12 @@
|
|||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
#define HAVE_INTTYPES_H 1
|
#define HAVE_INTTYPES_H 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `gmp' library (-lgmp). */
|
||||||
|
#define HAVE_LIBGMP 1
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
|
||||||
|
#define HAVE_LIBMPFR 1
|
||||||
|
|
||||||
/* Define to 1 if you have the <malloc.h> header file. */
|
/* Define to 1 if you have the <malloc.h> header file. */
|
||||||
#define HAVE_MALLOC_H 1
|
#define HAVE_MALLOC_H 1
|
||||||
|
|
||||||
|
@ -35,6 +35,12 @@
|
|||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
#undef HAVE_INTTYPES_H
|
#undef HAVE_INTTYPES_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `gmp' library (-lgmp). */
|
||||||
|
#undef HAVE_LIBGMP
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
|
||||||
|
#undef HAVE_LIBMPFR
|
||||||
|
|
||||||
/* Define to 1 if you have the <malloc.h> header file. */
|
/* Define to 1 if you have the <malloc.h> header file. */
|
||||||
#undef HAVE_MALLOC_H
|
#undef HAVE_MALLOC_H
|
||||||
|
|
||||||
|
@ -95,5 +95,8 @@ nobase_include_HEADERS = algorithms/approx/bigfloat.h\
|
|||||||
simd/Grid_vComplexF.h \
|
simd/Grid_vComplexF.h \
|
||||||
simd/Grid_vInteger.h \
|
simd/Grid_vInteger.h \
|
||||||
simd/Grid_vRealD.h \
|
simd/Grid_vRealD.h \
|
||||||
simd/Grid_vRealF.h
|
simd/Grid_vRealF.h \
|
||||||
|
simd/Grid_vector_types.h \
|
||||||
|
simd/Grid_sse4.h
|
||||||
|
|
||||||
|
|
||||||
|
0
lib/algorithms/approx/Remez.cc
Executable file → Normal file
0
lib/algorithms/approx/Remez.cc
Executable file → Normal file
0
lib/algorithms/approx/Remez.h
Executable file → Normal file
0
lib/algorithms/approx/Remez.h
Executable file → Normal file
0
lib/algorithms/approx/Zolotarev.cc
Executable file → Normal file
0
lib/algorithms/approx/Zolotarev.cc
Executable file → Normal file
0
lib/algorithms/approx/Zolotarev.h
Executable file → Normal file
0
lib/algorithms/approx/Zolotarev.h
Executable file → Normal file
0
lib/algorithms/approx/bigfloat.h
Executable file → Normal file
0
lib/algorithms/approx/bigfloat.h
Executable file → Normal file
0
lib/simd/.dirstamp
Normal file
0
lib/simd/.dirstamp
Normal file
194
lib/simd/Grid_sse4.h
Normal file
194
lib/simd/Grid_sse4.h
Normal file
@ -0,0 +1,194 @@
|
|||||||
|
//----------------------------------------------------------------------
|
||||||
|
/*! @file Grid_sse4.h
|
||||||
|
@brief Optimization libraries
|
||||||
|
*/
|
||||||
|
// Time-stamp: <2015-05-19 17:06:51 neo>
|
||||||
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#include <pmmintrin.h>
|
||||||
|
|
||||||
|
namespace Optimization {
|
||||||
|
|
||||||
|
struct Vsplat{
|
||||||
|
//Complex float
|
||||||
|
inline __m128 operator()(float a, float b){
|
||||||
|
return _mm_set_ps(b,a,b,a);
|
||||||
|
}
|
||||||
|
// Real float
|
||||||
|
inline __m128 operator()(float a){
|
||||||
|
return _mm_set_ps(a,a,a,a);
|
||||||
|
}
|
||||||
|
//Complex double
|
||||||
|
inline __m128d operator()(double a, double b){
|
||||||
|
return _mm_set_pd(b,a);
|
||||||
|
}
|
||||||
|
//Real double
|
||||||
|
inline __m128d operator()(double a){
|
||||||
|
return _mm_set_pd(a,a);
|
||||||
|
}
|
||||||
|
//Integer
|
||||||
|
inline __m128i operator()(Integer a){
|
||||||
|
return _mm_set1_epi32(a);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Vstore{
|
||||||
|
//Float
|
||||||
|
inline void operator()(__m128 a, float* F){
|
||||||
|
_mm_store_ps(F,a);
|
||||||
|
}
|
||||||
|
//Double
|
||||||
|
inline void operator()(__m128d a, double* D){
|
||||||
|
_mm_store_pd(D,a);
|
||||||
|
}
|
||||||
|
//Integer
|
||||||
|
inline void operator()(__m128i a, Integer* I){
|
||||||
|
_mm_store_si128((__m128i *)I,a);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
struct Vset{
|
||||||
|
// Complex float
|
||||||
|
inline __m128 operator()(Grid::ComplexF *a){
|
||||||
|
return _mm_set_ps(a[1].imag(), a[1].real(),a[0].imag(),a[0].real());
|
||||||
|
}
|
||||||
|
// Complex double
|
||||||
|
inline __m128d operator()(Grid::ComplexD *a){
|
||||||
|
return _mm_set_pd(a[0].imag(),a[0].real());
|
||||||
|
}
|
||||||
|
// Real float
|
||||||
|
inline __m128 operator()(float *a){
|
||||||
|
return _mm_set_ps(a[3],a[2],a[1],a[0]);
|
||||||
|
}
|
||||||
|
// Real double
|
||||||
|
inline __m128d operator()(double *a){
|
||||||
|
return _mm_set_pd(a[1],a[0]);
|
||||||
|
}
|
||||||
|
// Integer
|
||||||
|
inline __m128i operator()(Integer *a){
|
||||||
|
return _mm_set_epi32(a[0],a[1],a[2],a[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Reduce{
|
||||||
|
//Complex float
|
||||||
|
inline Grid::ComplexF operator()(__m128 in){
|
||||||
|
union {
|
||||||
|
__m128 v1;
|
||||||
|
float f[4];
|
||||||
|
} u128;
|
||||||
|
u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
|
||||||
|
return Grid::ComplexF(u128.f[0], u128.f[1]);
|
||||||
|
}
|
||||||
|
//Complex double
|
||||||
|
inline Grid::ComplexD operator()(__m128d in){
|
||||||
|
printf("Missing complex double implementation -> FIX\n");
|
||||||
|
return Grid::ComplexD(0,0); // FIXME wrong
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Arithmetic operations
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
struct Sum{
|
||||||
|
//Complex/Real float
|
||||||
|
inline __m128 operator()(__m128 a, __m128 b){
|
||||||
|
return _mm_add_ps(a,b);
|
||||||
|
}
|
||||||
|
//Complex/Real double
|
||||||
|
inline __m128d operator()(__m128d a, __m128d b){
|
||||||
|
return _mm_add_pd(a,b);
|
||||||
|
}
|
||||||
|
//Integer
|
||||||
|
inline __m128i operator()(__m128i a, __m128i b){
|
||||||
|
return _mm_add_epi32(a,b);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Sub{
|
||||||
|
//Complex/Real float
|
||||||
|
inline __m128 operator()(__m128 a, __m128 b){
|
||||||
|
return _mm_sub_ps(a,b);
|
||||||
|
}
|
||||||
|
//Complex/Real double
|
||||||
|
inline __m128d operator()(__m128d a, __m128d b){
|
||||||
|
return _mm_sub_pd(a,b);
|
||||||
|
}
|
||||||
|
//Integer
|
||||||
|
inline __m128i operator()(__m128i a, __m128i b){
|
||||||
|
return _mm_sub_epi32(a,b);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MultComplex{
|
||||||
|
// Complex float
|
||||||
|
inline __m128 operator()(__m128 a, __m128 b){
|
||||||
|
__m128 ymm0,ymm1,ymm2;
|
||||||
|
ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
|
||||||
|
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
|
ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
|
||||||
|
ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
|
||||||
|
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||||
|
return _mm_addsub_ps(ymm0,ymm1);
|
||||||
|
}
|
||||||
|
// Complex double
|
||||||
|
inline __m128d operator()(__m128d a, __m128d b){
|
||||||
|
__m128d ymm0,ymm1,ymm2;
|
||||||
|
ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar,
|
||||||
|
ymm0 = _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
|
ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01
|
||||||
|
ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11
|
||||||
|
ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||||
|
return _mm_addsub_pd(ymm0,ymm1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Mult{
|
||||||
|
// Real float
|
||||||
|
inline __m128 operator()(__m128 a, __m128 b){
|
||||||
|
return _mm_mul_ps(a,b);
|
||||||
|
}
|
||||||
|
// Real double
|
||||||
|
inline __m128d operator()(__m128d a, __m128d b){
|
||||||
|
return _mm_mul_pd(a,b);
|
||||||
|
}
|
||||||
|
// Integer
|
||||||
|
inline __m128i operator()(__m128i a, __m128i b){
|
||||||
|
return _mm_mul_epi32(a,b);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Here assign types
|
||||||
|
namespace Grid {
|
||||||
|
typedef __m128 SIMD_Ftype; // Single precision type
|
||||||
|
typedef __m128d SIMD_Dtype; // Double precision type
|
||||||
|
typedef __m128i SIMD_Itype; // Integer type
|
||||||
|
|
||||||
|
|
||||||
|
// Function names
|
||||||
|
typedef Optimization::Vsplat VsplatSIMD;
|
||||||
|
typedef Optimization::Vstore VstoreSIMD;
|
||||||
|
|
||||||
|
// Arithmetic operations
|
||||||
|
typedef Optimization::Sum SumSIMD;
|
||||||
|
typedef Optimization::Sub SubSIMD;
|
||||||
|
typedef Optimization::Mult MultSIMD;
|
||||||
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
|
typedef Optimization::Vset VsetSIMD;
|
||||||
|
|
||||||
|
}
|
@ -1,6 +1,14 @@
|
|||||||
|
//---------------------------------------------------------------------------
|
||||||
|
/*! @file Grid_vector_types.h
|
||||||
|
@brief Defines templated class Grid_simd to deal with inner vector types
|
||||||
|
*/
|
||||||
|
// Time-stamp: <2015-05-19 17:20:36 neo>
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
#ifndef GRID_VECTOR_TYPES
|
#ifndef GRID_VECTOR_TYPES
|
||||||
#define GRID_VECTOR_TYPES
|
#define GRID_VECTOR_TYPES
|
||||||
|
|
||||||
|
#include "Grid_sse4.h"
|
||||||
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -13,31 +21,32 @@ namespace Grid {
|
|||||||
struct RealPart< std::complex<T> >{
|
struct RealPart< std::complex<T> >{
|
||||||
typedef T type;
|
typedef T type;
|
||||||
};
|
};
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
// Check for complexity with type traits
|
// Check for complexity with type traits
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct is_complex : std::false_type {};
|
struct is_complex : std::false_type {};
|
||||||
template < typename T >
|
template < typename T >
|
||||||
struct is_complex< std::complex<T> >: std::true_type {};
|
struct is_complex< std::complex<T> >: std::true_type {};
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
// Define the operation templates functors
|
// Define the operation templates functors
|
||||||
template < class SIMD, class Operation >
|
// general forms to allow for vsplat syntax
|
||||||
SIMD binary(SIMD src_1, SIMD src_2, Operation op){
|
// need explicit declaration of types when used since
|
||||||
|
// clang cannot automatically determine the output type sometimes
|
||||||
|
template < class Out, class Input1, class Input2, class Operation >
|
||||||
|
Out binary(Input1 src_1, Input2 src_2, Operation op){
|
||||||
return op(src_1, src_2);
|
return op(src_1, src_2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template < class SIMD, class Operation >
|
template < class SIMDout, class Input, class Operation >
|
||||||
SIMD unary(SIMD src, Operation op){
|
SIMDout unary(Input src, Operation op){
|
||||||
return op(src);
|
return op(src);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@brief Grid_simd class for the SIMD vector type operations
|
@brief Grid_simd class for the SIMD vector type operations
|
||||||
|
|
||||||
*/
|
*/
|
||||||
template < class Scalar_type, class Vector_type >
|
template < class Scalar_type, class Vector_type >
|
||||||
class Grid_simd {
|
class Grid_simd {
|
||||||
@ -70,75 +79,90 @@ namespace Grid {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// mac, mult, sub, add, adj
|
// mac, mult, sub, add, adj
|
||||||
// Should do an AVX2 version with mac.
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
friend inline void mac (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); };
|
friend inline void mac (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); };
|
||||||
friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); }
|
friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); }
|
||||||
friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
|
friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
|
||||||
friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
|
friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
|
||||||
|
|
||||||
|
//not for integer types... FIXME
|
||||||
friend inline Grid_simd adj(const Grid_simd &in){ return conjugate(in); }
|
friend inline Grid_simd adj(const Grid_simd &in){ return conjugate(in); }
|
||||||
|
|
||||||
//////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// Initialise to 1,0,i
|
// Initialise to 1,0,i for the correct types
|
||||||
//////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// if not complex overload here
|
// if not complex overload here
|
||||||
|
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
|
||||||
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); }
|
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); }
|
||||||
|
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
|
||||||
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); }
|
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); }
|
||||||
|
|
||||||
// overload for complex type
|
// overload for complex type
|
||||||
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
||||||
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); }
|
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); }
|
||||||
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
||||||
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }
|
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor?
|
||||||
|
|
||||||
// For integral type
|
// For integral type
|
||||||
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
||||||
friend inline void vone(Grid_simd &ret) { vsplat(ret,1); }
|
friend inline void vone(Grid_simd &ret) { vsplat(ret,1); }
|
||||||
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
||||||
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); }
|
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); }
|
||||||
|
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
||||||
|
friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
|
||||||
|
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
||||||
|
friend inline void vfalse(vInteger &ret){vsplat(ret,0);}
|
||||||
|
|
||||||
|
|
||||||
// do not compile if real or integer, send an error message from the compiler
|
// do not compile if real or integer, send an error message from the compiler
|
||||||
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
||||||
friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);}
|
friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Arithmetic operator overloads +,-,*
|
// Arithmetic operator overloads +,-,*
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
friend inline Grid_simd operator + (Grid_simd a, Grid_simd b)
|
friend inline Grid_simd operator + (Grid_simd a, Grid_simd b)
|
||||||
{
|
{
|
||||||
vComplexF ret;
|
Grid_simd ret;
|
||||||
// FIXME call the binary op
|
ret.v = binary<Vector_type>(a.v, b.v, SumSIMD());
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
friend inline Grid_simd operator - (Grid_simd a, Grid_simd b)
|
friend inline Grid_simd operator - (Grid_simd a, Grid_simd b)
|
||||||
{
|
{
|
||||||
vComplexF ret;
|
Grid_simd ret;
|
||||||
// FIXME call the binary op
|
ret.v = binary<Vector_type>(a.v, b.v, SubSIMD());
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Distinguish between complex types and others
|
||||||
|
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
||||||
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
|
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
|
||||||
{
|
{
|
||||||
vComplexF ret;
|
Grid_simd ret;
|
||||||
// FIXME call the binary op
|
ret.v = binary<Vector_type>(a.v,b.v, MultComplexSIMD());
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Real/Integer types
|
||||||
|
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
|
||||||
|
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
|
||||||
|
{
|
||||||
|
Grid_simd ret;
|
||||||
|
ret.v = binary<Vector_type>(a.v,b.v, MultSIMD());
|
||||||
|
return ret;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// FIXME: gonna remove these load/store, get, set, prefetch
|
// FIXME: gonna remove these load/store, get, set, prefetch
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
friend inline void vset(Grid_simd &ret, Scalar_type *a){
|
friend inline void vset(Grid_simd &ret, Scalar_type *a){
|
||||||
// FIXME set
|
ret.v = unary<Vector_type>(a, VsetSIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
@ -155,26 +179,25 @@ namespace Grid {
|
|||||||
// this only for the complex version
|
// this only for the complex version
|
||||||
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
||||||
friend inline void vsplat(Grid_simd &ret,Real a, Real b){
|
friend inline void vsplat(Grid_simd &ret,Real a, Real b){
|
||||||
// FIXME add operator
|
ret.v = binary<Vector_type>(a, b, VsplatSIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
//if real fill with a, if complex fill with a in the real part
|
//if real fill with a, if complex fill with a in the real part (first function above)
|
||||||
friend inline void vsplat(Grid_simd &ret,Real a){
|
friend inline void vsplat(Grid_simd &ret,Real a){
|
||||||
// FIXME add operator
|
ret.v = unary<Vector_type>(a, VsplatSIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
friend inline void vstore(const Grid_simd &ret, Scalar_type *a){
|
friend inline void vstore(const Grid_simd &ret, Scalar_type *a){
|
||||||
//FIXME
|
binary<void>(ret.v, (Real*)a, VstoreSIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
friend inline void vprefetch(const Grid_simd &v)
|
friend inline void vprefetch(const Grid_simd &v)
|
||||||
{
|
{
|
||||||
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
|
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
friend inline Scalar_type Reduce(const Grid_simd & in)
|
friend inline Scalar_type Reduce(const Grid_simd & in)
|
||||||
{
|
{
|
||||||
// FIXME add operator
|
// FIXME add operator
|
||||||
@ -221,6 +244,7 @@ namespace Grid {
|
|||||||
inline Grid_simd &operator *=(const Grid_simd &r) {
|
inline Grid_simd &operator *=(const Grid_simd &r) {
|
||||||
*this = (*this)*r;
|
*this = (*this)*r;
|
||||||
return *this;
|
return *this;
|
||||||
|
// return (*this)*r; ?
|
||||||
}
|
}
|
||||||
inline Grid_simd &operator +=(const Grid_simd &r) {
|
inline Grid_simd &operator +=(const Grid_simd &r) {
|
||||||
*this = *this+r;
|
*this = *this+r;
|
||||||
@ -233,6 +257,12 @@ namespace Grid {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
||||||
|
{
|
||||||
|
Gpermute<Grid_simd>(y,b,perm);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
||||||
{
|
{
|
||||||
Gpermute<Grid_simd>(y,b,perm);
|
Gpermute<Grid_simd>(y,b,perm);
|
||||||
@ -253,7 +283,7 @@ namespace Grid {
|
|||||||
{
|
{
|
||||||
Gextract<Grid_simd,Scalar_type>(y,extracted);
|
Gextract<Grid_simd,Scalar_type>(y,extracted);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
};// end of Grid_simd class definition
|
};// end of Grid_simd class definition
|
||||||
|
|
||||||
@ -286,11 +316,11 @@ namespace Grid {
|
|||||||
|
|
||||||
|
|
||||||
// Define available types (now change names to avoid clashing)
|
// Define available types (now change names to avoid clashing)
|
||||||
typedef __m128 SIMD_type;// decided at compilation time
|
|
||||||
typedef Grid_simd< float , SIMD_type > MyRealF;
|
typedef Grid_simd< float , SIMD_Ftype > MyRealF;
|
||||||
typedef Grid_simd< double , SIMD_type > MyRealD;
|
typedef Grid_simd< double , SIMD_Dtype > MyRealD;
|
||||||
typedef Grid_simd< std::complex< float > , SIMD_type > MyComplexF;
|
typedef Grid_simd< std::complex< float > , SIMD_Ftype > MyComplexF;
|
||||||
typedef Grid_simd< std::complex< double >, SIMD_type > MyComplexD;
|
typedef Grid_simd< std::complex< double >, SIMD_Dtype > MyComplexD;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
#include "Grid.h"
|
#include "Grid.h"
|
||||||
|
|
||||||
|
//DEBUG
|
||||||
|
#include "simd/Grid_vector_types.h"
|
||||||
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
using namespace Grid::QCD;
|
using namespace Grid::QCD;
|
||||||
@ -151,6 +155,39 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
scMat = sMat*scMat; // LatticeSpinColourMatrix = LatticeSpinMatrix * LatticeSpinColourMatrix
|
scMat = sMat*scMat; // LatticeSpinColourMatrix = LatticeSpinMatrix * LatticeSpinColourMatrix
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef SSE4
|
||||||
|
///////// Tests the new class Grid_simd
|
||||||
|
std::complex<double> ctest(3.0,2.0);
|
||||||
|
std::complex<float> ctestf(3.0,2.0);
|
||||||
|
MyComplexF TestMe1(1.0); // fill real part
|
||||||
|
MyComplexD TestMe2(ctest);
|
||||||
|
MyComplexD TestMe3(ctest);// compiler generate conversion of basic types
|
||||||
|
//MyRealF TestMe5(ctest);// Must generate compiler error
|
||||||
|
MyRealD TestMe4(2.0);
|
||||||
|
|
||||||
|
MyComplexF TestMe6(ctestf);
|
||||||
|
MyComplexF TestMe7(ctestf);
|
||||||
|
|
||||||
|
MyComplexD TheSum= TestMe2*TestMe3;
|
||||||
|
MyComplexF TheSumF= TestMe6*TestMe7;
|
||||||
|
|
||||||
|
double dsum[2];
|
||||||
|
_mm_store_pd(dsum, TheSum.v);
|
||||||
|
for (int i =0; i< 2; i++)
|
||||||
|
printf("%f\n", dsum[i]);
|
||||||
|
|
||||||
|
float fsum[4];
|
||||||
|
_mm_store_ps(fsum, TheSumF.v);
|
||||||
|
for (int i =0; i< 4; i++)
|
||||||
|
printf("%f\n", fsum[i]);
|
||||||
|
|
||||||
|
vstore(TheSum, &ctest);
|
||||||
|
std::cout << ctest<< std::endl;
|
||||||
|
#endif
|
||||||
|
///////////////////////
|
||||||
|
|
||||||
// Non-lattice (const objects) * Lattice
|
// Non-lattice (const objects) * Lattice
|
||||||
ColourMatrix cm;
|
ColourMatrix cm;
|
||||||
SpinColourMatrix scm;
|
SpinColourMatrix scm;
|
||||||
|
Loading…
Reference in New Issue
Block a user