mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Better extract merge for GPU. Let the SIMD header files define the pointer type for
access. GPU redirects through builtin float2, double2 for complex
This commit is contained in:
parent
4730d4692a
commit
19b527e83f
@ -113,32 +113,37 @@ void merge(vobj &vec,ExtractBuffer<sobj> &extracted)
|
|||||||
template<class vobj> accelerator_inline
|
template<class vobj> accelerator_inline
|
||||||
typename vobj::scalar_object extractLane(int lane, const vobj & __restrict__ vec)
|
typename vobj::scalar_object extractLane(int lane, const vobj & __restrict__ vec)
|
||||||
{
|
{
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
typedef typename vobj::scalar_object scalar_object;
|
typedef typename vobj::scalar_object scalar_object;
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::vector_type vector_type;
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename ExtractTypeMap<scalar_type>::extract_type extract_type;
|
||||||
|
typedef extract_type * pointer;
|
||||||
|
|
||||||
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
constexpr int Nsimd=vector_type::Nsimd();
|
constexpr int Nsimd=vector_type::Nsimd();
|
||||||
|
|
||||||
scalar_object extracted;
|
scalar_object extracted;
|
||||||
scalar_type * __restrict__ sp = (scalar_type *)&extracted; // Type pun
|
pointer __restrict__ sp = (pointer)&extracted; // Type pun
|
||||||
scalar_type * __restrict__ vp = (scalar_type *)&vec;
|
pointer __restrict__ vp = (pointer)&vec;
|
||||||
for(int w=0;w<words;w++){
|
for(int w=0;w<words;w++){
|
||||||
sp[w]=vp[w*Nsimd+lane];
|
sp[w]=vp[w*Nsimd+lane];
|
||||||
}
|
}
|
||||||
return extracted;
|
return extracted;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> accelerator_inline
|
template<class vobj> accelerator_inline
|
||||||
void insertLane(int lane, vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted)
|
void insertLane(int lane, vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted)
|
||||||
{
|
{
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
typedef typename vector_type::scalar_type scalar_type;
|
||||||
|
typedef typename ExtractTypeMap<scalar_type>::extract_type extract_type;
|
||||||
|
typedef extract_type * pointer;
|
||||||
|
|
||||||
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
constexpr int Nsimd=vector_type::Nsimd();
|
constexpr int Nsimd=vector_type::Nsimd();
|
||||||
|
|
||||||
scalar_type * __restrict__ sp = (scalar_type *)&extracted;
|
pointer __restrict__ sp = (pointer)&extracted;
|
||||||
scalar_type * __restrict__ vp = (scalar_type *)&vec;
|
pointer __restrict__ vp = (pointer)&vec;
|
||||||
for(int w=0;w<words;w++){
|
for(int w=0;w<words;w++){
|
||||||
vp[w*Nsimd+lane]=sp[w];
|
vp[w*Nsimd+lane]=sp[w];
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user