mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Generic for GPU needs accelerator markup of functions
This commit is contained in:
parent
1c797deb04
commit
408b868475
@ -36,7 +36,7 @@ NAMESPACE_BEGIN(Optimization);
|
|||||||
struct Vsplat{
|
struct Vsplat{
|
||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(T a, T b){
|
accelerator_inline vec<T> operator()(T a, T b){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::r, 2)
|
VECTOR_FOR(i, W<T>::r, 2)
|
||||||
@ -50,7 +50,7 @@ struct Vsplat{
|
|||||||
|
|
||||||
// Real
|
// Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(T a){
|
accelerator_inline vec<T> operator()(T a){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::r, 1)
|
VECTOR_FOR(i, W<T>::r, 1)
|
||||||
@ -65,7 +65,7 @@ struct Vsplat{
|
|||||||
struct Vstore{
|
struct Vstore{
|
||||||
// Real
|
// Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void operator()(vec<T> a, T *D){
|
accelerator_inline void operator()(vec<T> a, T *D){
|
||||||
*((vec<T> *)D) = a;
|
*((vec<T> *)D) = a;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -73,7 +73,7 @@ struct Vstore{
|
|||||||
struct Vstream{
|
struct Vstream{
|
||||||
// Real
|
// Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void operator()(T * a, vec<T> b){
|
accelerator_inline void operator()(T * a, vec<T> b){
|
||||||
*((vec<T> *)a) = b;
|
*((vec<T> *)a) = b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -81,7 +81,7 @@ struct Vstream{
|
|||||||
struct Vset{
|
struct Vset{
|
||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(std::complex<T> *a){
|
accelerator_inline vec<T> operator()(std::complex<T> *a){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::c, 1)
|
VECTOR_FOR(i, W<T>::c, 1)
|
||||||
@ -95,7 +95,7 @@ struct Vset{
|
|||||||
|
|
||||||
// Real
|
// Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(T *a){
|
accelerator_inline vec<T> operator()(T *a){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
out = *((vec<T> *)a);
|
out = *((vec<T> *)a);
|
||||||
@ -110,7 +110,7 @@ struct Vset{
|
|||||||
struct Sum{
|
struct Sum{
|
||||||
// Complex/Real
|
// Complex/Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::r, 1)
|
VECTOR_FOR(i, W<T>::r, 1)
|
||||||
@ -125,7 +125,7 @@ struct Sum{
|
|||||||
struct Sub{
|
struct Sub{
|
||||||
// Complex/Real
|
// Complex/Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::r, 1)
|
VECTOR_FOR(i, W<T>::r, 1)
|
||||||
@ -140,7 +140,7 @@ struct Sub{
|
|||||||
struct Mult{
|
struct Mult{
|
||||||
// Real
|
// Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::r, 1)
|
VECTOR_FOR(i, W<T>::r, 1)
|
||||||
@ -158,7 +158,7 @@ struct Mult{
|
|||||||
|
|
||||||
struct MultRealPart{
|
struct MultRealPart{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::c, 1)
|
VECTOR_FOR(i, W<T>::c, 1)
|
||||||
@ -172,7 +172,7 @@ struct MultRealPart{
|
|||||||
|
|
||||||
struct MaddRealPart{
|
struct MaddRealPart{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
|
accelerator_inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::c, 1)
|
VECTOR_FOR(i, W<T>::c, 1)
|
||||||
@ -187,7 +187,7 @@ struct MaddRealPart{
|
|||||||
struct MultComplex{
|
struct MultComplex{
|
||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::c, 1)
|
VECTOR_FOR(i, W<T>::c, 1)
|
||||||
@ -204,7 +204,7 @@ struct MultComplex{
|
|||||||
struct Div{
|
struct Div{
|
||||||
// Real
|
// Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::r, 1)
|
VECTOR_FOR(i, W<T>::r, 1)
|
||||||
@ -223,7 +223,7 @@ struct Div{
|
|||||||
struct Conj{
|
struct Conj{
|
||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a){
|
accelerator_inline vec<T> operator()(vec<T> a){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::c, 1)
|
VECTOR_FOR(i, W<T>::c, 1)
|
||||||
@ -244,7 +244,7 @@ struct Conj{
|
|||||||
struct TimesMinusI{
|
struct TimesMinusI{
|
||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::c, 1)
|
VECTOR_FOR(i, W<T>::c, 1)
|
||||||
@ -265,7 +265,7 @@ struct TimesMinusI{
|
|||||||
struct TimesI{
|
struct TimesI{
|
||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
VECTOR_FOR(i, W<T>::c, 1)
|
VECTOR_FOR(i, W<T>::c, 1)
|
||||||
@ -280,22 +280,23 @@ struct TimesI{
|
|||||||
#undef timesi
|
#undef timesi
|
||||||
|
|
||||||
struct PrecisionChange {
|
struct PrecisionChange {
|
||||||
static inline vech StoH (const vecf &a,const vecf &b) {
|
static accelerator_inline vech StoH (const vecf &a,const vecf &b) {
|
||||||
vech ret;
|
vech ret;
|
||||||
|
const int nf = W<float>::r;
|
||||||
#ifdef USE_FP16
|
#ifdef USE_FP16
|
||||||
vech *ha = (vech *)&a;
|
vech *ha = (vech *)&a;
|
||||||
vech *hb = (vech *)&b;
|
vech *hb = (vech *)&b;
|
||||||
const int nf = W<float>::r;
|
|
||||||
// VECTOR_FOR(i, nf,1){ ret.v[i] = ( (uint16_t *) &a.v[i])[1] ; }
|
// VECTOR_FOR(i, nf,1){ ret.v[i] = ( (uint16_t *) &a.v[i])[1] ; }
|
||||||
// VECTOR_FOR(i, nf,1){ ret.v[i+nf] = ( (uint16_t *) &b.v[i])[1] ; }
|
// VECTOR_FOR(i, nf,1){ ret.v[i+nf] = ( (uint16_t *) &b.v[i])[1] ; }
|
||||||
VECTOR_FOR(i, nf,1){ ret.v[i] = ha->v[2*i+1]; }
|
VECTOR_FOR(i, nf,1){ ret.v[i] = ha->v[2*i+1]; }
|
||||||
VECTOR_FOR(i, nf,1){ ret.v[i+nf] = hb->v[2*i+1]; }
|
VECTOR_FOR(i, nf,1){ ret.v[i+nf] = hb->v[2*i+1]; }
|
||||||
#else
|
#else
|
||||||
|
VECTOR_FOR(i, nf,1){ ret.v[i]=0; }
|
||||||
assert(0);
|
assert(0);
|
||||||
#endif
|
#endif
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
static inline void HtoS (vech h,vecf &sa,vecf &sb) {
|
static accelerator_inline void HtoS (vech h,vecf &sa,vecf &sb) {
|
||||||
#ifdef USE_FP16
|
#ifdef USE_FP16
|
||||||
const int nf = W<float>::r;
|
const int nf = W<float>::r;
|
||||||
const int nh = W<uint16_t>::r;
|
const int nh = W<uint16_t>::r;
|
||||||
@ -310,26 +311,25 @@ struct PrecisionChange {
|
|||||||
assert(0);
|
assert(0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
static inline vecf DtoS (vecd a,vecd b) {
|
static accelerator_inline vecf DtoS (vecd a,vecd b) {
|
||||||
const int nd = W<double>::r;
|
const int nd = W<double>::r;
|
||||||
const int nf = W<float>::r;
|
|
||||||
vecf ret;
|
vecf ret;
|
||||||
VECTOR_FOR(i, nd,1){ ret.v[i] = a.v[i] ; }
|
VECTOR_FOR(i, nd,1){ ret.v[i] = a.v[i] ; }
|
||||||
VECTOR_FOR(i, nd,1){ ret.v[i+nd] = b.v[i] ; }
|
VECTOR_FOR(i, nd,1){ ret.v[i+nd] = b.v[i] ; }
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
static inline void StoD (vecf s,vecd &a,vecd &b) {
|
static accelerator_inline void StoD (vecf s,vecd &a,vecd &b) {
|
||||||
const int nd = W<double>::r;
|
const int nd = W<double>::r;
|
||||||
VECTOR_FOR(i, nd,1){ a.v[i] = s.v[i] ; }
|
VECTOR_FOR(i, nd,1){ a.v[i] = s.v[i] ; }
|
||||||
VECTOR_FOR(i, nd,1){ b.v[i] = s.v[i+nd] ; }
|
VECTOR_FOR(i, nd,1){ b.v[i] = s.v[i+nd] ; }
|
||||||
}
|
}
|
||||||
static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) {
|
static accelerator_inline vech DtoH (vecd a,vecd b,vecd c,vecd d) {
|
||||||
vecf sa,sb;
|
vecf sa,sb;
|
||||||
sa = DtoS(a,b);
|
sa = DtoS(a,b);
|
||||||
sb = DtoS(c,d);
|
sb = DtoS(c,d);
|
||||||
return StoH(sa,sb);
|
return StoH(sa,sb);
|
||||||
}
|
}
|
||||||
static inline void HtoD (vech h,vecd &a,vecd &b,vecd &c,vecd &d) {
|
static accelerator_inline void HtoD (vech h,vecd &a,vecd &b,vecd &c,vecd &d) {
|
||||||
vecf sa,sb;
|
vecf sa,sb;
|
||||||
HtoS(h,sa,sb);
|
HtoS(h,sa,sb);
|
||||||
StoD(sa,a,b);
|
StoD(sa,a,b);
|
||||||
@ -342,7 +342,7 @@ struct PrecisionChange {
|
|||||||
struct Exchange{
|
struct Exchange{
|
||||||
|
|
||||||
template <typename T,int n>
|
template <typename T,int n>
|
||||||
static inline void ExchangeN(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
static accelerator_inline void ExchangeN(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
||||||
const int w = W<T>::r;
|
const int w = W<T>::r;
|
||||||
unsigned int mask = w >> (n + 1);
|
unsigned int mask = w >> (n + 1);
|
||||||
// std::cout << " Exchange "<<n<<" nsimd "<<w<<" mask 0x" <<std::hex<<mask<<std::dec<<std::endl;
|
// std::cout << " Exchange "<<n<<" nsimd "<<w<<" mask 0x" <<std::hex<<mask<<std::dec<<std::endl;
|
||||||
@ -356,19 +356,19 @@ struct Exchange{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline void Exchange0(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
static accelerator_inline void Exchange0(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
||||||
ExchangeN<T,0>(out1,out2,in1,in2);
|
ExchangeN<T,0>(out1,out2,in1,in2);
|
||||||
};
|
};
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline void Exchange1(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
static accelerator_inline void Exchange1(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
||||||
ExchangeN<T,1>(out1,out2,in1,in2);
|
ExchangeN<T,1>(out1,out2,in1,in2);
|
||||||
};
|
};
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline void Exchange2(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
static accelerator_inline void Exchange2(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
||||||
ExchangeN<T,2>(out1,out2,in1,in2);
|
ExchangeN<T,2>(out1,out2,in1,in2);
|
||||||
};
|
};
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline void Exchange3(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
static accelerator_inline void Exchange3(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
||||||
ExchangeN<T,3>(out1,out2,in1,in2);
|
ExchangeN<T,3>(out1,out2,in1,in2);
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@ -385,7 +385,7 @@ struct Exchange{
|
|||||||
|
|
||||||
#define DECL_PERMUTE_N(n) \
|
#define DECL_PERMUTE_N(n) \
|
||||||
template <typename T> \
|
template <typename T> \
|
||||||
static inline vec<T> Permute##n(vec<T> in) { \
|
static accelerator_inline vec<T> Permute##n(vec<T> in) { \
|
||||||
vec<T> out; \
|
vec<T> out; \
|
||||||
perm(in.v, out.v, n, W<T>::r); \
|
perm(in.v, out.v, n, W<T>::r); \
|
||||||
return out; \
|
return out; \
|
||||||
@ -409,12 +409,12 @@ struct Permute{
|
|||||||
|
|
||||||
struct Rotate{
|
struct Rotate{
|
||||||
|
|
||||||
template <int n, typename T> static inline vec<T> tRotate(vec<T> in){
|
template <int n, typename T> static accelerator_inline vec<T> tRotate(vec<T> in){
|
||||||
return rotate(in, n);
|
return rotate(in, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline vec<T> rotate(vec<T> in, int n){
|
static accelerator_inline vec<T> rotate(vec<T> in, int n){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
|
|
||||||
rot(in.v, out.v, n, W<T>::r);
|
rot(in.v, out.v, n, W<T>::r);
|
||||||
@ -435,7 +435,7 @@ template <typename Out_type, typename In_type>
|
|||||||
struct Reduce{
|
struct Reduce{
|
||||||
//Need templated class to overload output type
|
//Need templated class to overload output type
|
||||||
//General form must generate error if compiled
|
//General form must generate error if compiled
|
||||||
inline Out_type operator()(In_type in){
|
accelerator_inline Out_type operator()(In_type in){
|
||||||
printf("Error, using wrong Reduce function\n");
|
printf("Error, using wrong Reduce function\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
return 0;
|
return 0;
|
||||||
@ -444,7 +444,7 @@ struct Reduce{
|
|||||||
|
|
||||||
//Complex float Reduce
|
//Complex float Reduce
|
||||||
template <>
|
template <>
|
||||||
inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
|
accelerator_inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
|
||||||
float a = 0.f, b = 0.f;
|
float a = 0.f, b = 0.f;
|
||||||
|
|
||||||
acc(in.v, a, 0, 2, W<float>::r);
|
acc(in.v, a, 0, 2, W<float>::r);
|
||||||
@ -455,7 +455,7 @@ inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
|
|||||||
|
|
||||||
//Real float Reduce
|
//Real float Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){
|
accelerator_inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){
|
||||||
float a = 0.;
|
float a = 0.;
|
||||||
|
|
||||||
acc(in.v, a, 0, 1, W<float>::r);
|
acc(in.v, a, 0, 1, W<float>::r);
|
||||||
@ -465,7 +465,7 @@ inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){
|
|||||||
|
|
||||||
//Complex double Reduce
|
//Complex double Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
|
accelerator_inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
|
||||||
double a = 0., b = 0.;
|
double a = 0., b = 0.;
|
||||||
|
|
||||||
acc(in.v, a, 0, 2, W<double>::r);
|
acc(in.v, a, 0, 2, W<double>::r);
|
||||||
@ -476,7 +476,7 @@ inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
|
|||||||
|
|
||||||
//Real double Reduce
|
//Real double Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
|
accelerator_inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
|
||||||
double a = 0.f;
|
double a = 0.f;
|
||||||
|
|
||||||
acc(in.v, a, 0, 1, W<double>::r);
|
acc(in.v, a, 0, 1, W<double>::r);
|
||||||
@ -486,7 +486,7 @@ inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
|
|||||||
|
|
||||||
//Integer Reduce
|
//Integer Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Integer Reduce<Integer, veci>::operator()(veci in){
|
accelerator_inline Integer Reduce<Integer, veci>::operator()(veci in){
|
||||||
Integer a = 0;
|
Integer a = 0;
|
||||||
|
|
||||||
acc(in.v, a, 0, 1, W<Integer>::r);
|
acc(in.v, a, 0, 1, W<Integer>::r);
|
||||||
@ -506,8 +506,8 @@ typedef Optimization::vecd SIMD_Dtype; // Double precision type
|
|||||||
typedef Optimization::veci SIMD_Itype; // Integer type
|
typedef Optimization::veci SIMD_Itype; // Integer type
|
||||||
|
|
||||||
// prefetch utilities
|
// prefetch utilities
|
||||||
inline void v_prefetch0(int size, const char *ptr){};
|
accelerator_inline void v_prefetch0(int size, const char *ptr){};
|
||||||
inline void prefetch_HINT_T0(const char *ptr){};
|
accelerator_inline void prefetch_HINT_T0(const char *ptr){};
|
||||||
|
|
||||||
// Function name aliases
|
// Function name aliases
|
||||||
typedef Optimization::Vsplat VsplatSIMD;
|
typedef Optimization::Vsplat VsplatSIMD;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user