1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Optoin to use GpuComplex iin Wilson kernel

This commit is contained in:
u61464 2021-02-10 06:51:23 -08:00
parent 36f471e333
commit 9295eeadfe
2 changed files with 52 additions and 8 deletions

View File

@ -60,11 +60,25 @@ template<class pair>
class GpuComplex {
public:
pair z;
typedef decltype(z.x) real;
typedef decltype(z.x) Real;
public:
accelerator_inline GpuComplex() = default;
accelerator_inline GpuComplex(real re,real im) { z.x=re; z.y=im; };
accelerator_inline GpuComplex(Real re,Real im) { z.x=re; z.y=im; };
accelerator_inline GpuComplex(const GpuComplex &zz) { z = zz.z;};
accelerator_inline Real real(void) const { return z.x; };
accelerator_inline Real imag(void) const { return z.y; };
accelerator_inline GpuComplex &operator*=(const GpuComplex &r) {
*this = (*this) * r;
return *this;
}
accelerator_inline GpuComplex &operator+=(const GpuComplex &r) {
*this = (*this) + r;
return *this;
}
accelerator_inline GpuComplex &operator-=(const GpuComplex &r) {
*this = (*this) - r;
return *this;
}
friend accelerator_inline GpuComplex operator+(const GpuComplex &lhs,const GpuComplex &rhs) {
GpuComplex r ;
r.z.x = lhs.z.x + rhs.z.x;
@ -157,6 +171,11 @@ typedef GpuVector<NSIMD_RealD, double > GpuVectorRD;
typedef GpuVector<NSIMD_ComplexD, GpuComplexD > GpuVectorCD;
typedef GpuVector<NSIMD_Integer, Integer > GpuVectorI;
accelerator_inline GpuComplexF timesI(const GpuComplexF &r) { return(GpuComplexF(-r.imag(),r.real()));}
accelerator_inline GpuComplexD timesI(const GpuComplexD &r) { return(GpuComplexD(-r.imag(),r.real()));}
accelerator_inline GpuComplexF timesMinusI(const GpuComplexF &r){ return(GpuComplexF(r.imag(),-r.real()));}
accelerator_inline GpuComplexD timesMinusI(const GpuComplexD &r){ return(GpuComplexD(r.imag(),-r.real()));}
accelerator_inline float half2float(half h)
{
float f;

View File

@ -65,22 +65,20 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__
#else
#if 0
// Use the scalar as our own complex on GPU
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
//typename vsimd::vector_type::datum
typename vsimd::scalar_type
coalescedRead(const vsimd & __restrict__ vec,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
{
// typedef typename vsimd::vector_type::datum S;
typedef typename vsimd::scalar_type S;
S * __restrict__ p=(S *)&vec;
return p[lane];
}
template<int ptype,class vsimd,IfSimd<vsimd> = 0> accelerator_inline
//typename vsimd::vector_type::datum
typename vsimd::scalar_type
coalescedReadPermute(const vsimd & __restrict__ vec,int doperm,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
{
// typedef typename vsimd::vector_type::datum S;
typedef typename vsimd::scalar_type S;
S * __restrict__ p=(S *)&vec;
@ -90,16 +88,43 @@ coalescedReadPermute(const vsimd & __restrict__ vec,int doperm,int lane=accelera
}
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
void coalescedWrite(vsimd & __restrict__ vec,
// const typename vsimd::vector_type::datum & __restrict__ extracted,
const typename vsimd::scalar_type & __restrict__ extracted,
int lane=acceleratorSIMTlane(vsimd::Nsimd()))
{
// typedef typename vsimd::vector_type::datum S;
typedef typename vsimd::scalar_type S;
S * __restrict__ p=(S *)&vec;
p[lane]=extracted;
}
#else
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
typename vsimd::vector_type::datum
coalescedRead(const vsimd & __restrict__ vec,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
{
typedef typename vsimd::vector_type::datum S;
S * __restrict__ p=(S *)&vec;
return p[lane];
}
template<int ptype,class vsimd,IfSimd<vsimd> = 0> accelerator_inline
typename vsimd::vector_type::datum
coalescedReadPermute(const vsimd & __restrict__ vec,int doperm,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
{
typedef typename vsimd::vector_type::datum S;
S * __restrict__ p=(S *)&vec;
int mask = vsimd::Nsimd() >> (ptype + 1);
int plane= doperm ? lane ^ mask : lane;
return p[plane];
}
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
void coalescedWrite(vsimd & __restrict__ vec,
const typename vsimd::vector_type::datum & __restrict__ extracted,
int lane=acceleratorSIMTlane(vsimd::Nsimd()))
{
typedef typename vsimd::vector_type::datum S;
S * __restrict__ p=(S *)&vec;
p[lane]=extracted;
}
#endif
//////////////////////////////////////////
// Extract and insert slices on the GPU