1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-11 11:56:56 +01:00

Merge remote-tracking branch 'upstream/master'

Conflicts:
	lib/math/Grid_math_tensors.h
	lib/simd/Grid_vector_types.h
This commit is contained in:
neo
2015-05-26 13:14:06 +09:00
39 changed files with 1091 additions and 439 deletions

View File

@ -283,6 +283,7 @@ namespace Optimization {
//////////////////////////////////////////////////////////////////////////////////////
// Here assign types
namespace Grid {
typedef __m128 SIMD_Ftype; // Single precision type
typedef __m128d SIMD_Dtype; // Double precision type
typedef __m128i SIMD_Itype; // Integer type

View File

@ -2,7 +2,7 @@
/*! @file Grid_vector_types.h
@brief Defines templated class Grid_simd to deal with inner vector types
*/
// Time-stamp: <2015-05-22 17:08:19 neo>
// Time-stamp: <2015-05-26 12:05:39 neo>
//---------------------------------------------------------------------------
#ifndef GRID_VECTOR_TYPES
#define GRID_VECTOR_TYPES
@ -21,31 +21,24 @@
namespace Grid {
// To take the floating point type of real/complex type
template <typename T>
struct RealPart {
typedef T type;
};
template <typename T>
struct RealPart< std::complex<T> >{
template <typename T> struct RealPart {
typedef T type;
};
template <typename T> struct RealPart< std::complex<T> >{
typedef T type;
};
// type alias used to simplify the syntax of std::enable_if
template <typename T> using Invoke =
typename T::type;
template <typename Condition, typename ReturnType> using EnableIf =
Invoke<std::enable_if<Condition::value, ReturnType>>;
template <typename Condition, typename ReturnType> using NotEnableIf =
Invoke<std::enable_if<!Condition::value, ReturnType>>;
template <typename T> using Invoke = typename T::type;
template <typename Condition, typename ReturnType> using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType>>;
template <typename Condition, typename ReturnType> using NotEnableIf= Invoke<std::enable_if<!Condition::value, ReturnType>>;
////////////////////////////////////////////////////////
// Check for complexity with type traits
template <typename T>
struct is_complex : std::false_type {};
template < typename T >
struct is_complex< std::complex<T> >: std::true_type {};
template <typename T> struct is_complex : std::false_type {};
template < typename T > struct is_complex< std::complex<T> >: std::true_type {};
////////////////////////////////////////////////////////
// Define the operation templates functors
// general forms to allow for vsplat syntax
@ -102,8 +95,6 @@ namespace Grid {
Grid_simd(const Real a){
vsplat(*this,Scalar_type(a));
};
///////////////////////////////////////////////
// mac, mult, sub, add, adj
@ -145,10 +136,6 @@ namespace Grid {
friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vfalse(Grid_simd &ret){vsplat(ret,0);}
////////////////////////////////////
// Arithmetic operator overloads +,-,*
@ -184,7 +171,6 @@ namespace Grid {
ret.v = binary<Vector_type>(a.v,b.v, MultSIMD());
return ret;
};
////////////////////////////////////////////////////////////////////////
// FIXME: gonna remove these load/store, get, set, prefetch

View File

@ -345,20 +345,30 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
// REDUCE FIXME must be a cleaner implementation
friend inline ComplexD Reduce(const vComplexD & in)
{
vComplexD v1,v2;
union {
zvec v;
double f[sizeof(zvec)/sizeof(double)];
} conv;
#ifdef SSE4
return ComplexD(in.v[0],in.v[1]);
v1=in;
#endif
#if defined(AVX1) || defined (AVX2)
vComplexD v1;
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
return ComplexD(v1.v[0],v1.v[1]);
#endif
#ifdef AVX512
return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v));
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
permute(v2,v1,1); // avx 256; quad complex single
v1=v1+v2;
#endif
#ifdef QPX
#error
#endif
conv.v = v1.v;
return ComplexD(conv.f[0],conv.f[1]);
}
// Unary negation

View File

@ -234,26 +234,34 @@ namespace Grid {
}
friend inline ComplexF Reduce(const vComplexF & in)
{
vComplexF v1,v2;
union {
cvec v;
float f[sizeof(cvec)/sizeof(float)];
} conv;
#ifdef SSE4
vComplexF v1;
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
return ComplexF(v1.v[0],v1.v[1]);
#endif
#if defined(AVX1) || defined (AVX2)
vComplexF v1,v2;
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
permute(v2,v1,1); // avx 256; quad complex single
v1=v1+v2;
return ComplexF(v1.v[0],v1.v[1]);
#endif
#ifdef AVX512
return ComplexF(_mm512_mask_reduce_add_ps(0x5555, in.v),_mm512_mask_reduce_add_ps(0xAAAA, in.v));
permute(v1,in,0); // avx512 octo-complex single
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
#endif
#ifdef QPX
#error
#endif
conv.v = v1.v;
return ComplexF(conv.f[0],conv.f[1]);
}
friend inline vComplexF operator * (const ComplexF &a, vComplexF b){

View File

@ -210,25 +210,33 @@ namespace Grid {
friend inline RealD Reduce(const vRealD & in)
{
vRealD v1,v2;
union {
dvec v;
double f[sizeof(dvec)/sizeof(double)];
} conv;
#ifdef SSE4
vRealD v1;
permute(v1,in,0); // sse 128; paired real double
v1=v1+in;
return RealD(v1.v[0]);
#endif
#if defined(AVX1) || defined (AVX2)
vRealD v1,v2;
permute(v1,in,0); // avx 256; quad double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
return v1.v[0];
#endif
#ifdef AVX512
return _mm512_reduce_add_pd(in.v);
permute(v1,in,0); // avx 512; octo-double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
#endif
#ifdef QPX
#endif
conv.v=v1.v;
return conv.f[0];
}
// *=,+=,-= operators

View File

@ -243,29 +243,39 @@ friend inline void vstore(const vRealF &ret, float *a){
}
friend inline RealF Reduce(const vRealF & in)
{
#ifdef SSE4
vRealF v1,v2;
union {
fvec v;
float f[sizeof(fvec)/sizeof(double)];
} conv;
#ifdef SSE4
permute(v1,in,0); // sse 128; quad single
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
return v1.v[0];
#endif
#if defined(AVX1) || defined (AVX2)
vRealF v1,v2;
permute(v1,in,0); // avx 256; octo-double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
return v1.v[0];
#endif
#ifdef AVX512
return _mm512_reduce_add_ps(in.v);
permute(v1,in,0); // avx 256; octo-double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
permute(v2,v1,3);
v1=v1+v2;
#endif
#ifdef QPX
#endif
conv.v=v1.v;
return conv.f[0];
}
// *=,+=,-= operators