1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Better extract merge for GPU. Let the SIMD header files define the pointer type for

access. GPU redirects through builtin float2, double2 for complex
This commit is contained in:
Peter Boyle 2018-07-05 07:05:13 -04:00
parent 4730d4692a
commit 19b527e83f

View File

@ -113,32 +113,37 @@ void merge(vobj &vec,ExtractBuffer<sobj> &extracted)
template<class vobj> accelerator_inline template<class vobj> accelerator_inline
typename vobj::scalar_object extractLane(int lane, const vobj & __restrict__ vec) typename vobj::scalar_object extractLane(int lane, const vobj & __restrict__ vec)
{ {
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::scalar_object scalar_object; typedef typename vobj::scalar_object scalar_object;
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type;
typedef typename vobj::vector_type vector_type; typedef typename ExtractTypeMap<scalar_type>::extract_type extract_type;
typedef extract_type * pointer;
constexpr int words=sizeof(vobj)/sizeof(vector_type); constexpr int words=sizeof(vobj)/sizeof(vector_type);
constexpr int Nsimd=vector_type::Nsimd(); constexpr int Nsimd=vector_type::Nsimd();
scalar_object extracted; scalar_object extracted;
scalar_type * __restrict__ sp = (scalar_type *)&extracted; // Type pun pointer __restrict__ sp = (pointer)&extracted; // Type pun
scalar_type * __restrict__ vp = (scalar_type *)&vec; pointer __restrict__ vp = (pointer)&vec;
for(int w=0;w<words;w++){ for(int w=0;w<words;w++){
sp[w]=vp[w*Nsimd+lane]; sp[w]=vp[w*Nsimd+lane];
} }
return extracted; return extracted;
} }
template<class vobj> accelerator_inline template<class vobj> accelerator_inline
void insertLane(int lane, vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted) void insertLane(int lane, vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted)
{ {
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type; typedef typename vobj::vector_type vector_type;
typedef typename vector_type::scalar_type scalar_type;
typedef typename ExtractTypeMap<scalar_type>::extract_type extract_type;
typedef extract_type * pointer;
constexpr int words=sizeof(vobj)/sizeof(vector_type); constexpr int words=sizeof(vobj)/sizeof(vector_type);
constexpr int Nsimd=vector_type::Nsimd(); constexpr int Nsimd=vector_type::Nsimd();
scalar_type * __restrict__ sp = (scalar_type *)&extracted; pointer __restrict__ sp = (pointer)&extracted;
scalar_type * __restrict__ vp = (scalar_type *)&vec; pointer __restrict__ vp = (pointer)&vec;
for(int w=0;w<words;w++){ for(int w=0;w<words;w++){
vp[w*Nsimd+lane]=sp[w]; vp[w*Nsimd+lane]=sp[w];
} }