mirror of
https://github.com/paboyle/Grid.git
synced 2024-12-23 19:35:26 +00:00
Implemented all SSE4 functions.
A test code Grid_simd_new.cc has been created to test the new class. Tests are all OK.
This commit is contained in:
parent
74e91cd925
commit
cf7be0e461
@ -1,8 +1,10 @@
|
||||
//----------------------------------------------------------------------
|
||||
/*! @file Grid_sse4.h
|
||||
@brief Optimization libraries
|
||||
@brief Optimization libraries for SSE4 instructions set
|
||||
|
||||
Using intrinsics
|
||||
*/
|
||||
// Time-stamp: <2015-05-19 17:06:51 neo>
|
||||
// Time-stamp: <2015-05-20 16:45:39 neo>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#include <pmmintrin.h>
|
||||
@ -49,6 +51,20 @@ namespace Optimization {
|
||||
};
|
||||
|
||||
|
||||
struct Vstream{
|
||||
//Float
|
||||
inline void operator()(__m128 a, __m128 b){
|
||||
_mm_stream_ps((float *)&a,b);
|
||||
}
|
||||
//Double
|
||||
inline void operator()(__m128d a, __m128d b){
|
||||
_mm_stream_pd((double *)&a,b);
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct Vset{
|
||||
// Complex float
|
||||
@ -75,27 +91,20 @@ namespace Optimization {
|
||||
|
||||
};
|
||||
|
||||
template <typename Out_type, typename In_type>
|
||||
struct Reduce{
|
||||
//Complex float
|
||||
inline Grid::ComplexF operator()(__m128 in){
|
||||
union {
|
||||
__m128 v1;
|
||||
float f[4];
|
||||
} u128;
|
||||
u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
|
||||
return Grid::ComplexF(u128.f[0], u128.f[1]);
|
||||
//Need templated class to overload output type
|
||||
//General form must generate error if compiled
|
||||
inline Out_type operator()(In_type in){
|
||||
printf("Error, using wrong Reduce function\n");
|
||||
exit(1);
|
||||
return 0;
|
||||
}
|
||||
//Complex double
|
||||
inline Grid::ComplexD operator()(__m128d in){
|
||||
printf("Missing complex double implementation -> FIX\n");
|
||||
return Grid::ComplexD(0,0); // FIXME wrong
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Arithmetic operations
|
||||
/////////////////////////////////////////////////////
|
||||
@ -129,25 +138,26 @@ namespace Optimization {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct MultComplex{
|
||||
// Complex float
|
||||
inline __m128 operator()(__m128 a, __m128 b){
|
||||
__m128 ymm0,ymm1,ymm2;
|
||||
ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
|
||||
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
||||
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
||||
ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
|
||||
ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
|
||||
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||
return _mm_addsub_ps(ymm0,ymm1);
|
||||
}
|
||||
// Complex double
|
||||
inline __m128d operator()(__m128d a, __m128d b){
|
||||
__m128d ymm0,ymm1,ymm2;
|
||||
ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar,
|
||||
ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar,
|
||||
ymm0 = _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
|
||||
ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01
|
||||
ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11
|
||||
ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||
ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01
|
||||
ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11
|
||||
ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||
return _mm_addsub_pd(ymm0,ymm1);
|
||||
}
|
||||
};
|
||||
@ -165,14 +175,112 @@ namespace Optimization {
|
||||
inline __m128i operator()(__m128i a, __m128i b){
|
||||
return _mm_mul_epi32(a,b);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct Conj{
|
||||
// Complex single
|
||||
inline __m128 operator()(__m128 in){
|
||||
return _mm_xor_ps(_mm_addsub_ps(_mm_setzero_ps(),in), _mm_set1_ps(-0.f));
|
||||
}
|
||||
// Complex double
|
||||
inline __m128d operator()(__m128d in){
|
||||
return _mm_xor_pd(_mm_addsub_pd(_mm_setzero_pd(),in), _mm_set1_pd(-0.f));//untested
|
||||
}
|
||||
// do not define for integer input
|
||||
};
|
||||
|
||||
struct TimesMinusI{
|
||||
//Complex single
|
||||
inline __m128 operator()(__m128 in, __m128 ret){
|
||||
__m128 tmp =_mm_addsub_ps(_mm_setzero_ps(),in); // r,-i
|
||||
return _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
||||
}
|
||||
//Complex double
|
||||
inline __m128d operator()(__m128d in, __m128d ret){
|
||||
__m128d tmp =_mm_addsub_pd(_mm_setzero_pd(),in); // r,-i
|
||||
return _mm_shuffle_pd(tmp,tmp,0x1);
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct TimesI{
|
||||
//Complex single
|
||||
inline __m128 operator()(__m128 in, __m128 ret){
|
||||
__m128 tmp =_mm_shuffle_ps(in,in,_MM_SHUFFLE(2,3,0,1));
|
||||
return _mm_addsub_ps(_mm_setzero_ps(),tmp); // r,-i
|
||||
}
|
||||
//Complex double
|
||||
inline __m128d operator()(__m128d in, __m128d ret){
|
||||
__m128d tmp = _mm_shuffle_pd(in,in,0x1);
|
||||
return _mm_addsub_pd(_mm_setzero_pd(),tmp); // r,-i
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Some Template specialization
|
||||
|
||||
//Complex float Reduce
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m128>::operator()(__m128 in){
|
||||
union {
|
||||
__m128 v1;
|
||||
float f[4];
|
||||
} u128;
|
||||
u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
|
||||
return Grid::ComplexF(u128.f[0], u128.f[1]);
|
||||
}
|
||||
//Real float Reduce
|
||||
template<>
|
||||
inline Grid::RealF Reduce<Grid::RealF, __m128>::operator()(__m128 in){
|
||||
// FIXME Hack
|
||||
const Grid::RealF * ptr = (const Grid::RealF *) ∈
|
||||
Grid::RealF ret = 0;
|
||||
for(int i=0;i< 4 ;i++){ // 4 number of simd lanes for float
|
||||
ret = ret+ptr[i];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
//Complex double Reduce
|
||||
template<>
|
||||
inline Grid::ComplexD Reduce<Grid::ComplexD, __m128d>::operator()(__m128d in){
|
||||
printf("Reduce : Missing good complex double implementation -> FIX\n");
|
||||
return Grid::ComplexD(in[0], in[1]); // inefficient
|
||||
}
|
||||
|
||||
//Real double Reduce
|
||||
template<>
|
||||
inline Grid::RealD Reduce<Grid::RealD, __m128d>::operator()(__m128d in){
|
||||
// FIXME Hack
|
||||
const Grid::RealD * ptr =(const Grid::RealD *) ∈
|
||||
Grid::RealD ret = 0;
|
||||
for(int i=0;i< 2 ;i++){// 2 number of simd lanes for float
|
||||
ret = ret+ptr[i];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
//Integer Reduce
|
||||
template<>
|
||||
inline Integer Reduce<Integer, __m128i>::operator()(__m128i in){
|
||||
// FIXME unimplemented
|
||||
printf("Reduce : Missing integer implementation -> FIX\n");
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Here assign types
|
||||
namespace Grid {
|
||||
typedef __m128 SIMD_Ftype; // Single precision type
|
||||
@ -180,15 +288,21 @@ namespace Grid {
|
||||
typedef __m128i SIMD_Itype; // Integer type
|
||||
|
||||
|
||||
// Function names
|
||||
typedef Optimization::Vsplat VsplatSIMD;
|
||||
typedef Optimization::Vstore VstoreSIMD;
|
||||
// Function name aliases
|
||||
typedef Optimization::Vsplat VsplatSIMD;
|
||||
typedef Optimization::Vstore VstoreSIMD;
|
||||
typedef Optimization::Vset VsetSIMD;
|
||||
typedef Optimization::Vstream VstreamSIMD;
|
||||
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
||||
|
||||
|
||||
// Arithmetic operations
|
||||
typedef Optimization::Sum SumSIMD;
|
||||
typedef Optimization::Sub SubSIMD;
|
||||
typedef Optimization::Mult MultSIMD;
|
||||
typedef Optimization::MultComplex MultComplexSIMD;
|
||||
typedef Optimization::Vset VsetSIMD;
|
||||
typedef Optimization::Conj ConjSIMD;
|
||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||
typedef Optimization::TimesI TimesISIMD;
|
||||
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
/*! @file Grid_vector_types.h
|
||||
@brief Defines templated class Grid_simd to deal with inner vector types
|
||||
*/
|
||||
// Time-stamp: <2015-05-19 17:20:36 neo>
|
||||
// Time-stamp: <2015-05-20 17:21:52 neo>
|
||||
//---------------------------------------------------------------------------
|
||||
#ifndef GRID_VECTOR_TYPES
|
||||
#define GRID_VECTOR_TYPES
|
||||
@ -22,6 +22,16 @@ namespace Grid {
|
||||
typedef T type;
|
||||
};
|
||||
|
||||
// type alias used to simplify the syntax of std::enable_if
|
||||
template <typename T> using Invoke =
|
||||
typename T::type;
|
||||
template <typename Condition, typename ReturnType> using EnableIf =
|
||||
Invoke<std::enable_if<Condition::value, ReturnType>>;
|
||||
template <typename Condition, typename ReturnType> using NotEnableIf =
|
||||
Invoke<std::enable_if<!Condition::value, ReturnType>>;
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Check for complexity with type traits
|
||||
template <typename T>
|
||||
@ -93,31 +103,32 @@ namespace Grid {
|
||||
// Initialise to 1,0,i for the correct types
|
||||
///////////////////////////////////////////////
|
||||
// if not complex overload here
|
||||
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
|
||||
template < class S = Scalar_type, NotEnableIf<is_complex < S >,int> = 0 >
|
||||
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); }
|
||||
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
|
||||
template < class S = Scalar_type, NotEnableIf<is_complex < S >,int> = 0 >
|
||||
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); }
|
||||
|
||||
// overload for complex type
|
||||
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
||||
// For complex types
|
||||
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); }
|
||||
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
||||
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor?
|
||||
|
||||
// For integral type
|
||||
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
||||
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);}
|
||||
|
||||
// For integral types
|
||||
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
|
||||
friend inline void vone(Grid_simd &ret) { vsplat(ret,1); }
|
||||
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
||||
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
|
||||
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); }
|
||||
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
||||
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
|
||||
friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
|
||||
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
||||
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
|
||||
friend inline void vfalse(vInteger &ret){vsplat(ret,0);}
|
||||
|
||||
|
||||
// do not compile if real or integer, send an error message from the compiler
|
||||
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
||||
friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);}
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////
|
||||
// Arithmetic operator overloads +,-,*
|
||||
@ -137,7 +148,7 @@ namespace Grid {
|
||||
};
|
||||
|
||||
// Distinguish between complex types and others
|
||||
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
||||
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
|
||||
{
|
||||
Grid_simd ret;
|
||||
@ -146,7 +157,7 @@ namespace Grid {
|
||||
};
|
||||
|
||||
// Real/Integer types
|
||||
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
|
||||
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
|
||||
{
|
||||
Grid_simd ret;
|
||||
@ -155,8 +166,6 @@ namespace Grid {
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// FIXME: gonna remove these load/store, get, set, prefetch
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -169,14 +178,14 @@ namespace Grid {
|
||||
///////////////////////
|
||||
// overload if complex
|
||||
template < class S = Scalar_type >
|
||||
friend inline void vsplat(Grid_simd &ret, typename std::enable_if< is_complex < S >::value, S>::type c){
|
||||
friend inline void vsplat(Grid_simd &ret, EnableIf<is_complex < S >, S> c){
|
||||
Real a = real(c);
|
||||
Real b = imag(c);
|
||||
vsplat(ret,a,b);
|
||||
}
|
||||
|
||||
// this only for the complex version
|
||||
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
||||
// this is only for the complex version
|
||||
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline void vsplat(Grid_simd &ret,Real a, Real b){
|
||||
ret.v = binary<Vector_type>(a, b, VsplatSIMD());
|
||||
}
|
||||
@ -186,22 +195,45 @@ namespace Grid {
|
||||
ret.v = unary<Vector_type>(a, VsplatSIMD());
|
||||
}
|
||||
|
||||
|
||||
///////////////////////
|
||||
// Vstore
|
||||
///////////////////////
|
||||
friend inline void vstore(const Grid_simd &ret, Scalar_type *a){
|
||||
binary<void>(ret.v, (Real*)a, VstoreSIMD());
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// Vstream
|
||||
///////////////////////
|
||||
friend inline void vstream(Grid_simd &out,const Grid_simd &in){
|
||||
binary<void>(out.v, in.v, VstreamSIMD());
|
||||
}
|
||||
|
||||
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
|
||||
friend inline void vstream(Grid_simd &out,const Grid_simd &in){
|
||||
out=in;
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// Vprefetch
|
||||
///////////////////////
|
||||
friend inline void vprefetch(const Grid_simd &v)
|
||||
{
|
||||
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////
|
||||
// Reduce
|
||||
///////////////////////
|
||||
friend inline Scalar_type Reduce(const Grid_simd & in)
|
||||
{
|
||||
// FIXME add operator
|
||||
return unary<Scalar_type>(in.v, ReduceSIMD<Scalar_type, Vector_type>());
|
||||
}
|
||||
|
||||
////////////////////////////
|
||||
// opreator scalar * simd
|
||||
////////////////////////////
|
||||
friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){
|
||||
Grid_simd va;
|
||||
vsplat(va,a);
|
||||
@ -214,25 +246,63 @@ namespace Grid {
|
||||
///////////////////////
|
||||
// Conjugate
|
||||
///////////////////////
|
||||
|
||||
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline Grid_simd conj(const Grid_simd &in){
|
||||
Grid_simd ret ; vzero(ret);
|
||||
// FIXME add operator
|
||||
Grid_simd ret ;
|
||||
ret.v = unary<Vector_type>(in.v, ConjSIMD());
|
||||
return ret;
|
||||
}
|
||||
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline Grid_simd conj(const Grid_simd &in){
|
||||
return in; // for real objects
|
||||
}
|
||||
|
||||
|
||||
///////////////////////
|
||||
// timesMinusI
|
||||
///////////////////////
|
||||
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline void timesMinusI( Grid_simd &ret,const Grid_simd &in){
|
||||
ret.v = binary<Vector_type>(in.v, ret.v, TimesMinusISIMD());
|
||||
}
|
||||
|
||||
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline Grid_simd timesMinusI(const Grid_simd &in){
|
||||
Grid_simd ret;
|
||||
vzero(ret);
|
||||
// FIXME add operator
|
||||
timesMinusI(ret,in);
|
||||
return ret;
|
||||
}
|
||||
friend inline Grid_simd timesI(const Grid_simd &in){
|
||||
Grid_simd ret; vzero(ret);
|
||||
// FIXME add operator
|
||||
return ret;
|
||||
|
||||
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline Grid_simd timesMinusI(const Grid_simd &in){
|
||||
return in;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////
|
||||
// timesI
|
||||
///////////////////////
|
||||
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline void timesI(Grid_simd &ret,const Grid_simd &in){
|
||||
ret.v = binary<Vector_type>(in.v, ret.v, TimesISIMD());
|
||||
}
|
||||
|
||||
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline Grid_simd timesI(const Grid_simd &in){
|
||||
Grid_simd ret;
|
||||
timesI(ret,in);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
|
||||
friend inline Grid_simd timesI(const Grid_simd &in){
|
||||
return in;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////
|
||||
// Unary negation
|
||||
///////////////////////
|
||||
friend inline Grid_simd operator -(const Grid_simd &r) {
|
||||
vComplexF ret;
|
||||
vzero(ret);
|
||||
@ -256,41 +326,22 @@ namespace Grid {
|
||||
|
||||
|
||||
|
||||
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
||||
{
|
||||
Gpermute<Grid_simd>(y,b,perm);
|
||||
}
|
||||
|
||||
/*
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// General permute; assumes vector length is same across
|
||||
// all subtypes; may not be a good assumption, but could
|
||||
// add the vector width as a template param for BG/Q for example
|
||||
////////////////////////////////////////////////////////////////////
|
||||
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
||||
{
|
||||
Gpermute<Grid_simd>(y,b,perm);
|
||||
}
|
||||
friend inline void merge(Grid_simd &y,std::vector<Scalar_type *> &extracted)
|
||||
{
|
||||
Gmerge<Grid_simd,Scalar_type >(y,extracted);
|
||||
}
|
||||
friend inline void extract(const Grid_simd &y,std::vector<Scalar_type *> &extracted)
|
||||
{
|
||||
Gextract<Grid_simd,Scalar_type>(y,extracted);
|
||||
}
|
||||
friend inline void merge(Grid_simd &y,std::vector<Scalar_type > &extracted)
|
||||
{
|
||||
Gmerge<Grid_simd,Scalar_type >(y,extracted);
|
||||
}
|
||||
friend inline void extract(const Grid_simd &y,std::vector<Scalar_type > &extracted)
|
||||
{
|
||||
Gextract<Grid_simd,Scalar_type>(y,extracted);
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
||||
};// end of Grid_simd class definition
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<class scalar_type, class vector_type >
|
||||
inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r)
|
||||
{
|
||||
@ -314,7 +365,7 @@ namespace Grid {
|
||||
}
|
||||
|
||||
|
||||
// Define available types (now change names to avoid clashing)
|
||||
// Define available types (now change names to avoid clashing with the rest of the code)
|
||||
|
||||
typedef Grid_simd< float , SIMD_Ftype > MyRealF;
|
||||
typedef Grid_simd< double , SIMD_Dtype > MyRealD;
|
||||
@ -323,6 +374,29 @@ namespace Grid {
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Temporary hack to keep independent from the rest of the code
|
||||
template<> struct isGridTensor<MyRealD > {
|
||||
static const bool value = false;
|
||||
static const bool notvalue = true;
|
||||
};
|
||||
template<> struct isGridTensor<MyRealF > {
|
||||
static const bool value = false;
|
||||
static const bool notvalue = true;
|
||||
};
|
||||
template<> struct isGridTensor<MyComplexD > {
|
||||
static const bool value = false;
|
||||
static const bool notvalue = true;
|
||||
};
|
||||
template<> struct isGridTensor<MyComplexF > {
|
||||
static const bool value = false;
|
||||
static const bool notvalue = true;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -161,30 +161,40 @@ int main (int argc, char ** argv)
|
||||
///////// Tests the new class Grid_simd
|
||||
std::complex<double> ctest(3.0,2.0);
|
||||
std::complex<float> ctestf(3.0,2.0);
|
||||
MyComplexF TestMe1(1.0); // fill real part
|
||||
MyComplexF TestMe1(1.0); // fills only real part
|
||||
MyComplexD TestMe2(ctest);
|
||||
MyComplexD TestMe3(ctest);// compiler generate conversion of basic types
|
||||
//MyRealF TestMe5(ctest);// Must generate compiler error
|
||||
MyRealD TestMe4(2.0);
|
||||
MyRealD TestRe1(2.0);
|
||||
MyRealF TestRe2(3.0);
|
||||
|
||||
vone(TestRe2);
|
||||
|
||||
MyComplexF TestMe6(ctestf);
|
||||
MyComplexF TestMe7(ctestf);
|
||||
|
||||
MyComplexD TheSum= TestMe2*TestMe3;
|
||||
MyComplexF TheSumF= TestMe6*TestMe7;
|
||||
|
||||
|
||||
|
||||
double dsum[2];
|
||||
_mm_store_pd(dsum, TheSum.v);
|
||||
for (int i =0; i< 2; i++)
|
||||
printf("%f\n", dsum[i]);
|
||||
MyComplexD TheSumI = timesMinusI(TheSum);
|
||||
MyComplexF TheSumIF = timesMinusI(TheSumF);
|
||||
|
||||
float fsum[4];
|
||||
_mm_store_ps(fsum, TheSumF.v);
|
||||
for (int i =0; i< 4; i++)
|
||||
printf("%f\n", fsum[i]);
|
||||
|
||||
vstore(TheSum, &ctest);
|
||||
vstore(TheSumI, &ctest);
|
||||
std::complex<float> sum = Reduce(TheSumF);
|
||||
std::cout << ctest<< std::endl;
|
||||
std::cout << sum<< std::endl;
|
||||
|
||||
#endif
|
||||
///////////////////////
|
||||
|
||||
|
165
tests/Grid_simd_new.cc
Normal file
165
tests/Grid_simd_new.cc
Normal file
@ -0,0 +1,165 @@
|
||||
#include <Grid.h>
|
||||
#include "simd/Grid_vector_types.h"
|
||||
#include <parallelIO/GridNerscIO.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
class funcPlus {
|
||||
public:
|
||||
funcPlus() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;}
|
||||
std::string name(void) const { return std::string("Plus"); }
|
||||
};
|
||||
class funcMinus {
|
||||
public:
|
||||
funcMinus() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;}
|
||||
std::string name(void) const { return std::string("Minus"); }
|
||||
};
|
||||
class funcTimes {
|
||||
public:
|
||||
funcTimes() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;}
|
||||
std::string name(void) const { return std::string("Times"); }
|
||||
};
|
||||
class funcConj {
|
||||
public:
|
||||
funcConj() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conj(i1);}
|
||||
std::string name(void) const { return std::string("Conj"); }
|
||||
};
|
||||
class funcAdj {
|
||||
public:
|
||||
funcAdj() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
|
||||
std::string name(void) const { return std::string("Adj"); }
|
||||
};
|
||||
|
||||
class funcTimesI {
|
||||
public:
|
||||
funcTimesI() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);}
|
||||
std::string name(void) const { return std::string("timesI"); }
|
||||
};
|
||||
|
||||
class funcTimesMinusI {
|
||||
public:
|
||||
funcTimesMinusI() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);}
|
||||
std::string name(void) const { return std::string("timesMinusI"); }
|
||||
};
|
||||
|
||||
template<class scal, class vec,class functor >
|
||||
void Tester(const functor &func)
|
||||
{
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedRandomDevice();
|
||||
|
||||
int Nsimd = vec::Nsimd();
|
||||
|
||||
std::vector<scal> input1(Nsimd);
|
||||
std::vector<scal> input2(Nsimd);
|
||||
std::vector<scal> result(Nsimd);
|
||||
std::vector<scal> reference(Nsimd);
|
||||
|
||||
std::vector<vec,alignedAllocator<vec> > buf(3);
|
||||
vec & v_input1 = buf[0];
|
||||
vec & v_input2 = buf[1];
|
||||
vec & v_result = buf[2];
|
||||
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
random(sRNG,input1[i]);
|
||||
random(sRNG,input2[i]);
|
||||
random(sRNG,result[i]);
|
||||
}
|
||||
|
||||
merge<vec,scal>(v_input1,input1);
|
||||
merge<vec,scal>(v_input2,input2);
|
||||
merge<vec,scal>(v_result,result);
|
||||
|
||||
func(v_result,v_input1,v_input2);
|
||||
|
||||
for(int i=0;i<Nsimd;i++) {
|
||||
func(reference[i],input1[i],input2[i]);
|
||||
}
|
||||
|
||||
extract<vec,scal>(v_result,result);
|
||||
std::cout << " " << func.name()<<std::endl;
|
||||
|
||||
int ok=0;
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
if ( abs(reference[i]-result[i])>0){
|
||||
std::cout<< "*****" << std::endl;
|
||||
std::cout<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
|
||||
ok++;
|
||||
}
|
||||
}
|
||||
if ( ok==0 ) std::cout << " OK!" <<std::endl;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(4,MyComplexF::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
|
||||
// Insist that operations on random scalars gives
|
||||
// identical results to on vectors.
|
||||
|
||||
std::cout << "==================================="<< std::endl;
|
||||
std::cout << "Testing MyComplexF "<<std::endl;
|
||||
std::cout << "==================================="<< std::endl;
|
||||
|
||||
Tester<ComplexF,MyComplexF>(funcTimesI());
|
||||
Tester<ComplexF,MyComplexF>(funcTimesMinusI());
|
||||
Tester<ComplexF,MyComplexF>(funcPlus());
|
||||
Tester<ComplexF,MyComplexF>(funcMinus());
|
||||
Tester<ComplexF,MyComplexF>(funcTimes());
|
||||
Tester<ComplexF,MyComplexF>(funcConj());
|
||||
Tester<ComplexF,MyComplexF>(funcAdj());
|
||||
|
||||
std::cout << "==================================="<< std::endl;
|
||||
std::cout << "Testing MyComplexD "<<std::endl;
|
||||
std::cout << "==================================="<< std::endl;
|
||||
|
||||
|
||||
Tester<ComplexD,MyComplexD>(funcTimesI());
|
||||
Tester<ComplexD,MyComplexD>(funcTimesMinusI());
|
||||
Tester<ComplexD,MyComplexD>(funcPlus());
|
||||
Tester<ComplexD,MyComplexD>(funcMinus());
|
||||
Tester<ComplexD,MyComplexD>(funcTimes());
|
||||
Tester<ComplexD,MyComplexD>(funcConj());
|
||||
Tester<ComplexD,MyComplexD>(funcAdj());
|
||||
|
||||
std::cout << "==================================="<< std::endl;
|
||||
std::cout << "Testing MyRealF "<<std::endl;
|
||||
std::cout << "==================================="<< std::endl;
|
||||
|
||||
|
||||
Tester<RealF,MyRealF>(funcPlus());
|
||||
Tester<RealF,MyRealF>(funcMinus());
|
||||
Tester<RealF,MyRealF>(funcTimes());
|
||||
Tester<RealF,MyRealF>(funcAdj());
|
||||
|
||||
std::cout << "==================================="<< std::endl;
|
||||
std::cout << "Testing MyRealD "<<std::endl;
|
||||
std::cout << "==================================="<< std::endl;
|
||||
|
||||
Tester<RealD,MyRealD>(funcPlus());
|
||||
Tester<RealD,MyRealD>(funcMinus());
|
||||
Tester<RealD,MyRealD>(funcTimes());
|
||||
Tester<RealD,MyRealD>(funcAdj());
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_builddir)/lib
|
||||
#
|
||||
# Test code
|
||||
#
|
||||
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez
|
||||
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_simd_new
|
||||
|
||||
Grid_main_SOURCES = Grid_main.cc
|
||||
Grid_main_LDADD = -lGrid
|
||||
@ -30,3 +30,6 @@ Grid_stencil_LDADD = -lGrid
|
||||
|
||||
Grid_simd_SOURCES = Grid_simd.cc
|
||||
Grid_simd_LDADD = -lGrid
|
||||
|
||||
Grid_simd_new_SOURCES = Grid_simd_new.cc
|
||||
Grid_simd_new_LDADD = -lGrid
|
||||
|
Loading…
Reference in New Issue
Block a user