1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

NAMESPACE

This commit is contained in:
paboyle 2018-01-12 18:24:16 +00:00
parent 6ab744c720
commit ec89714cce

View File

@ -25,8 +25,8 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
/*
@ -45,29 +45,29 @@
#include "Grid_generic_types.h"
#include <arm_neon.h>
namespace Grid {
namespace Optimization {
NAMESPACE_BEGIN(Grid);
NAMESPACE_BEGIN(Optimization);
template<class vtype>
union uconv {
template<class vtype>
union uconv {
float32x4_t f;
vtype v;
};
union u128f {
};
union u128f {
float32x4_t v;
float f[4];
};
union u128d {
};
union u128d {
float64x2_t v;
double f[2];
};
// half precision
union u128h {
};
// half precision
union u128h {
float16x8_t v;
uint16_t f[8];
};
};
struct Vsplat{
struct Vsplat{
//Complex float
inline float32x4_t operator()(float a, float b){
float tmp[4]={a,b,a,b};
@ -90,9 +90,9 @@ namespace Optimization {
inline uint32x4_t operator()(Integer a){
return vdupq_n_u32(a);
}
};
};
struct Vstore{
struct Vstore{
//Float
inline void operator()(float32x4_t a, float* F){
vst1q_f32(F, a);
@ -106,9 +106,9 @@ namespace Optimization {
vst1q_u32(I, a);
}
};
};
struct Vstream{ // N:equivalents to _mm_stream_p* in NEON?
struct Vstream{ // N:equivalents to _mm_stream_p* in NEON?
//Float // N:generic
inline void operator()(float * a, float32x4_t b){
memcpy(a,&b,4*sizeof(float));
@ -117,13 +117,11 @@ namespace Optimization {
inline void operator()(double * a, float64x2_t b){
memcpy(a,&b,2*sizeof(double));
}
};
};
// Nils: Vset untested; not used currently in Grid at all;
// git commit 4a8c4ccfba1d05159348d21a9698028ea847e77b
struct Vset{
// Nils: Vset untested; not used currently in Grid at all;
// git commit 4a8c4ccfba1d05159348d21a9698028ea847e77b
struct Vset{
// Complex float
inline float32x4_t operator()(Grid::ComplexF *a){
float tmp[4]={a[1].imag(),a[1].real(),a[0].imag(),a[0].real()};
@ -148,10 +146,10 @@ namespace Optimization {
inline uint32x4_t operator()(Integer *a){
return vld1q_dup_u32(a);
}
};
};
template <typename Out_type, typename In_type>
struct Reduce{
template <typename Out_type, typename In_type>
struct Reduce{
//Need templated class to overload output type
//General form must generate error if compiled
inline Out_type operator()(In_type in){
@ -159,12 +157,12 @@ namespace Optimization {
exit(1);
return 0;
}
};
};
/////////////////////////////////////////////////////
// Arithmetic operations
/////////////////////////////////////////////////////
struct Sum{
/////////////////////////////////////////////////////
// Arithmetic operations
/////////////////////////////////////////////////////
struct Sum{
//Complex/Real float
inline float32x4_t operator()(float32x4_t a, float32x4_t b){
return vaddq_f32(a,b);
@ -177,9 +175,9 @@ namespace Optimization {
inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
return vaddq_u32(a,b);
}
};
};
struct Sub{
struct Sub{
//Complex/Real float
inline float32x4_t operator()(float32x4_t a, float32x4_t b){
return vsubq_f32(a,b);
@ -192,9 +190,9 @@ namespace Optimization {
inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
return vsubq_u32(a,b);
}
};
};
struct MultRealPart{
struct MultRealPart{
inline float32x4_t operator()(float32x4_t a, float32x4_t b){
float32x4_t re = vtrn1q_f32(a, a);
return vmulq_f32(re, b);
@ -203,9 +201,9 @@ namespace Optimization {
float64x2_t re = vzip1q_f64(a, a);
return vmulq_f64(re, b);
}
};
};
struct MaddRealPart{
struct MaddRealPart{
inline float32x4_t operator()(float32x4_t a, float32x4_t b, float32x4_t c){
float32x4_t re = vtrn1q_f32(a, a);
return vfmaq_f32(c, re, b);
@ -214,9 +212,9 @@ namespace Optimization {
float64x2_t re = vzip1q_f64(a, a);
return vfmaq_f64(c, re, b);
}
};
};
struct Div{
struct Div{
// Real float
inline float32x4_t operator()(float32x4_t a, float32x4_t b){
return vdivq_f32(a, b);
@ -225,9 +223,9 @@ namespace Optimization {
inline float64x2_t operator()(float64x2_t a, float64x2_t b){
return vdivq_f64(a, b);
}
};
};
struct MultComplex{
struct MultComplex{
// Complex float
inline float32x4_t operator()(float32x4_t a, float32x4_t b){
@ -275,9 +273,9 @@ namespace Optimization {
// r5 = vmulq_f64(r0, a);
// return vaddq_f64(r4, r5);
}
};
};
struct Mult{
struct Mult{
// Real float
inline float32x4_t mac(float32x4_t a, float32x4_t b, float32x4_t c){
//return vaddq_f32(vmulq_f32(b,c),a);
@ -298,9 +296,9 @@ namespace Optimization {
inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
return vmulq_u32(a,b);
}
};
};
struct Conj{
struct Conj{
// Complex single
inline float32x4_t operator()(float32x4_t in){
// ar ai br bi -> ar -ai br -bi
@ -318,9 +316,9 @@ namespace Optimization {
return vextq_f64(r0, r1, 1); // ar -ai
}
// do not define for integer input
};
};
struct TimesMinusI{
struct TimesMinusI{
//Complex single
inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
// ar ai br bi -> ai -ar ai -br
@ -336,9 +334,9 @@ namespace Optimization {
tmp = vnegq_f64(in);
return vextq_f64(in, tmp, 1);
}
};
};
struct TimesI{
struct TimesI{
//Complex single
inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
// ar ai br bi -> -ai ar -bi br
@ -354,9 +352,9 @@ namespace Optimization {
tmp = vnegq_f64(in);
return vextq_f64(tmp, in, 1);
}
};
};
struct Permute{
struct Permute{
static inline float32x4_t Permute0(float32x4_t in){ // N:ok
// AB CD -> CD AB
@ -387,9 +385,9 @@ namespace Optimization {
return in;
};
};
};
struct Rotate{
struct Rotate{
static inline float32x4_t rotate(float32x4_t in,int n){ // N:ok
switch(n){
@ -423,9 +421,9 @@ namespace Optimization {
template<int n> static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n%4); };
template<int n> static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n%2); };
};
};
struct PrecisionChange {
struct PrecisionChange {
static inline float16x8_t StoH (const float32x4_t &a,const float32x4_t &b) {
float16x4_t h = vcvt_f16_f32(a);
@ -464,12 +462,12 @@ namespace Optimization {
StoD(s1, a, b);
StoD(s2, c, d);
}
};
};
//////////////////////////////////////////////
// Exchange support
//////////////////////////////////////////////
// Exchange support
struct Exchange{
struct Exchange{
static inline void Exchange0(float32x4_t &out1,float32x4_t &out2,float32x4_t in1,float32x4_t in2){
// in1: ABCD -> out1: ABEF
// in2: EFGH -> out2: CDGH
@ -518,82 +516,80 @@ namespace Optimization {
assert(0);
return;
};
};
};
//////////////////////////////////////////////
// Some Template specialization
//////////////////////////////////////////////
// Some Template specialization
//Complex float Reduce
template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, float32x4_t>::operator()(float32x4_t in){
//Complex float Reduce
template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, float32x4_t>::operator()(float32x4_t in){
float32x4_t v1; // two complex
v1 = Optimization::Permute::Permute0(in);
v1 = vaddq_f32(v1,in);
u128f conv; conv.v=v1;
return Grid::ComplexF(conv.f[0],conv.f[1]);
}
//Real float Reduce
template<>
inline Grid::RealF Reduce<Grid::RealF, float32x4_t>::operator()(float32x4_t in){
}
//Real float Reduce
template<>
inline Grid::RealF Reduce<Grid::RealF, float32x4_t>::operator()(float32x4_t in){
return vaddvq_f32(in);
}
}
//Complex double Reduce
template<>
inline Grid::ComplexD Reduce<Grid::ComplexD, float64x2_t>::operator()(float64x2_t in){
//Complex double Reduce
template<>
inline Grid::ComplexD Reduce<Grid::ComplexD, float64x2_t>::operator()(float64x2_t in){
u128d conv; conv.v = in;
return Grid::ComplexD(conv.f[0],conv.f[1]);
}
//Real double Reduce
template<>
inline Grid::RealD Reduce<Grid::RealD, float64x2_t>::operator()(float64x2_t in){
return vaddvq_f64(in);
}
//Integer Reduce
template<>
inline Integer Reduce<Integer, uint32x4_t>::operator()(uint32x4_t in){
return vaddvq_u32(in);
}
}
//Real double Reduce
template<>
inline Grid::RealD Reduce<Grid::RealD, float64x2_t>::operator()(float64x2_t in){
return vaddvq_f64(in);
}
//Integer Reduce
template<>
inline Integer Reduce<Integer, uint32x4_t>::operator()(uint32x4_t in){
return vaddvq_u32(in);
}
NAMESPACE_END(Optimization);
//////////////////////////////////////////////////////////////////////////////////////
// Here assign types
// typedef Optimization::vech SIMD_Htype; // Reduced precision type
typedef float16x8_t SIMD_Htype; // Half precision type
typedef float32x4_t SIMD_Ftype; // Single precision type
typedef float64x2_t SIMD_Dtype; // Double precision type
typedef uint32x4_t SIMD_Itype; // Integer type
typedef float16x8_t SIMD_Htype; // Half precision type
typedef float32x4_t SIMD_Ftype; // Single precision type
typedef float64x2_t SIMD_Dtype; // Double precision type
typedef uint32x4_t SIMD_Itype; // Integer type
inline void v_prefetch0(int size, const char *ptr){}; // prefetch utilities
inline void prefetch_HINT_T0(const char *ptr){};
inline void v_prefetch0(int size, const char *ptr){}; // prefetch utilities
inline void prefetch_HINT_T0(const char *ptr){};
// Function name aliases
typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Vstore VstoreSIMD;
typedef Optimization::Vset VsetSIMD;
typedef Optimization::Vstream VstreamSIMD;
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
// Function name aliases
typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Vstore VstoreSIMD;
typedef Optimization::Vset VsetSIMD;
typedef Optimization::Vstream VstreamSIMD;
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
// Arithmetic operations
typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD;
typedef Optimization::Div DivSIMD;
typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::MultRealPart MultRealPartSIMD;
typedef Optimization::MaddRealPart MaddRealPartSIMD;
typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;
// Arithmetic operations
typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD;
typedef Optimization::Div DivSIMD;
typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::MultRealPart MultRealPartSIMD;
typedef Optimization::MaddRealPart MaddRealPartSIMD;
typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;
}
NAMESPACE_END(Grid);