1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Unrolled loops

This commit is contained in:
u61464 2021-02-09 16:09:23 -08:00
parent ca4eadd4ab
commit 36f471e333

View File

@ -64,6 +64,43 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__
}
#else
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
//typename vsimd::vector_type::datum
typename vsimd::scalar_type
coalescedRead(const vsimd & __restrict__ vec,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
{
// typedef typename vsimd::vector_type::datum S;
typedef typename vsimd::scalar_type S;
S * __restrict__ p=(S *)&vec;
return p[lane];
}
template<int ptype,class vsimd,IfSimd<vsimd> = 0> accelerator_inline
//typename vsimd::vector_type::datum
typename vsimd::scalar_type
coalescedReadPermute(const vsimd & __restrict__ vec,int doperm,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
{
// typedef typename vsimd::vector_type::datum S;
typedef typename vsimd::scalar_type S;
S * __restrict__ p=(S *)&vec;
int mask = vsimd::Nsimd() >> (ptype + 1);
int plane= doperm ? lane ^ mask : lane;
return p[plane];
}
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
void coalescedWrite(vsimd & __restrict__ vec,
// const typename vsimd::vector_type::datum & __restrict__ extracted,
const typename vsimd::scalar_type & __restrict__ extracted,
int lane=acceleratorSIMTlane(vsimd::Nsimd()))
{
// typedef typename vsimd::vector_type::datum S;
typedef typename vsimd::scalar_type S;
S * __restrict__ p=(S *)&vec;
p[lane]=extracted;
}
//////////////////////////////////////////
// Extract and insert slices on the GPU
//////////////////////////////////////////