mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-10 14:10:46 +01:00
Added integer reduce functionality
This commit is contained in:
parent
0933aeefd4
commit
7a53dc3715
@ -27,7 +27,7 @@ Author: neo <cossu@post.kek.jp>
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
/*
|
||||
|
||||
ARMv8 NEON intrinsics layer by
|
||||
@ -37,9 +37,6 @@ Author: neo <cossu@post.kek.jp>
|
||||
SFB/TRR55
|
||||
|
||||
*/
|
||||
//----------------------------------------------------------------------
|
||||
//#ifndef ARM_NEON
|
||||
//#define ARM_NEON
|
||||
|
||||
#ifndef GEN_SIMD_WIDTH
|
||||
#define GEN_SIMD_WIDTH 16u
|
||||
@ -85,11 +82,11 @@ namespace Optimization {
|
||||
double tmp[2]={a,b};
|
||||
return vld1q_f64(tmp);
|
||||
}
|
||||
//Real double // N:tbc
|
||||
//Real double
|
||||
inline float64x2_t operator()(double a){
|
||||
return vdupq_n_f64(a);
|
||||
}
|
||||
//Integer // N:tbc
|
||||
//Integer
|
||||
inline uint32x4_t operator()(Integer a){
|
||||
return vdupq_n_u32(a);
|
||||
}
|
||||
@ -127,33 +124,32 @@ namespace Optimization {
|
||||
// Nils: Vset untested; not used currently in Grid at all;
|
||||
// git commit 4a8c4ccfba1d05159348d21a9698028ea847e77b
|
||||
struct Vset{
|
||||
// Complex float // N:ok
|
||||
// Complex float
|
||||
inline float32x4_t operator()(Grid::ComplexF *a){
|
||||
float tmp[4]={a[1].imag(),a[1].real(),a[0].imag(),a[0].real()};
|
||||
return vld1q_f32(tmp);
|
||||
}
|
||||
// Complex double // N:ok
|
||||
// Complex double
|
||||
inline float64x2_t operator()(Grid::ComplexD *a){
|
||||
double tmp[2]={a[0].imag(),a[0].real()};
|
||||
return vld1q_f64(tmp);
|
||||
}
|
||||
// Real float // N:ok
|
||||
// Real float
|
||||
inline float32x4_t operator()(float *a){
|
||||
float tmp[4]={a[3],a[2],a[1],a[0]};
|
||||
return vld1q_f32(tmp);
|
||||
}
|
||||
// Real double // N:ok
|
||||
// Real double
|
||||
inline float64x2_t operator()(double *a){
|
||||
double tmp[2]={a[1],a[0]};
|
||||
return vld1q_f64(tmp);
|
||||
}
|
||||
// Integer // N:ok
|
||||
// Integer
|
||||
inline uint32x4_t operator()(Integer *a){
|
||||
return vld1q_dup_u32(a);
|
||||
}
|
||||
};
|
||||
|
||||
// N:leaving as is
|
||||
template <typename Out_type, typename In_type>
|
||||
struct Reduce{
|
||||
//Need templated class to overload output type
|
||||
@ -424,11 +420,6 @@ namespace Optimization {
|
||||
}
|
||||
}
|
||||
|
||||
// working, but no restriction on n
|
||||
// template<int n> static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n); };
|
||||
// template<int n> static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n); };
|
||||
|
||||
// restriction on n
|
||||
template<int n> static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n%4); };
|
||||
template<int n> static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n%2); };
|
||||
|
||||
@ -550,7 +541,7 @@ namespace Optimization {
|
||||
|
||||
|
||||
//Complex double Reduce
|
||||
template<> // N:by Boyle
|
||||
template<>
|
||||
inline Grid::ComplexD Reduce<Grid::ComplexD, float64x2_t>::operator()(float64x2_t in){
|
||||
u128d conv; conv.v = in;
|
||||
return Grid::ComplexD(conv.f[0],conv.f[1]);
|
||||
@ -565,9 +556,7 @@ namespace Optimization {
|
||||
//Integer Reduce
|
||||
template<>
|
||||
inline Integer Reduce<Integer, uint32x4_t>::operator()(uint32x4_t in){
|
||||
// FIXME unimplemented
|
||||
printf("Reduce : Missing integer implementation -> FIX\n");
|
||||
assert(0);
|
||||
return vaddvq_u32(in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -607,5 +596,3 @@ namespace Optimization {
|
||||
typedef Optimization::TimesI TimesISIMD;
|
||||
|
||||
}
|
||||
|
||||
//#endif // ARM_NEON
|
||||
|
Loading…
x
Reference in New Issue
Block a user