mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-13 01:05:36 +00:00
Unrolled loops
This commit is contained in:
parent
ca4eadd4ab
commit
36f471e333
@ -64,6 +64,43 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
|
||||||
|
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
||||||
|
//typename vsimd::vector_type::datum
|
||||||
|
typename vsimd::scalar_type
|
||||||
|
coalescedRead(const vsimd & __restrict__ vec,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
||||||
|
{
|
||||||
|
// typedef typename vsimd::vector_type::datum S;
|
||||||
|
typedef typename vsimd::scalar_type S;
|
||||||
|
S * __restrict__ p=(S *)&vec;
|
||||||
|
return p[lane];
|
||||||
|
}
|
||||||
|
template<int ptype,class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
||||||
|
//typename vsimd::vector_type::datum
|
||||||
|
typename vsimd::scalar_type
|
||||||
|
coalescedReadPermute(const vsimd & __restrict__ vec,int doperm,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
||||||
|
{
|
||||||
|
// typedef typename vsimd::vector_type::datum S;
|
||||||
|
typedef typename vsimd::scalar_type S;
|
||||||
|
|
||||||
|
S * __restrict__ p=(S *)&vec;
|
||||||
|
int mask = vsimd::Nsimd() >> (ptype + 1);
|
||||||
|
int plane= doperm ? lane ^ mask : lane;
|
||||||
|
return p[plane];
|
||||||
|
}
|
||||||
|
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
||||||
|
void coalescedWrite(vsimd & __restrict__ vec,
|
||||||
|
// const typename vsimd::vector_type::datum & __restrict__ extracted,
|
||||||
|
const typename vsimd::scalar_type & __restrict__ extracted,
|
||||||
|
int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
||||||
|
{
|
||||||
|
// typedef typename vsimd::vector_type::datum S;
|
||||||
|
typedef typename vsimd::scalar_type S;
|
||||||
|
S * __restrict__ p=(S *)&vec;
|
||||||
|
p[lane]=extracted;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
// Extract and insert slices on the GPU
|
// Extract and insert slices on the GPU
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
|
Loading…
Reference in New Issue
Block a user