mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Optoin to use GpuComplex iin Wilson kernel
This commit is contained in:
parent
36f471e333
commit
9295eeadfe
@ -60,11 +60,25 @@ template<class pair>
|
|||||||
class GpuComplex {
|
class GpuComplex {
|
||||||
public:
|
public:
|
||||||
pair z;
|
pair z;
|
||||||
typedef decltype(z.x) real;
|
typedef decltype(z.x) Real;
|
||||||
public:
|
public:
|
||||||
accelerator_inline GpuComplex() = default;
|
accelerator_inline GpuComplex() = default;
|
||||||
accelerator_inline GpuComplex(real re,real im) { z.x=re; z.y=im; };
|
accelerator_inline GpuComplex(Real re,Real im) { z.x=re; z.y=im; };
|
||||||
accelerator_inline GpuComplex(const GpuComplex &zz) { z = zz.z;};
|
accelerator_inline GpuComplex(const GpuComplex &zz) { z = zz.z;};
|
||||||
|
accelerator_inline Real real(void) const { return z.x; };
|
||||||
|
accelerator_inline Real imag(void) const { return z.y; };
|
||||||
|
accelerator_inline GpuComplex &operator*=(const GpuComplex &r) {
|
||||||
|
*this = (*this) * r;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
accelerator_inline GpuComplex &operator+=(const GpuComplex &r) {
|
||||||
|
*this = (*this) + r;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
accelerator_inline GpuComplex &operator-=(const GpuComplex &r) {
|
||||||
|
*this = (*this) - r;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
friend accelerator_inline GpuComplex operator+(const GpuComplex &lhs,const GpuComplex &rhs) {
|
friend accelerator_inline GpuComplex operator+(const GpuComplex &lhs,const GpuComplex &rhs) {
|
||||||
GpuComplex r ;
|
GpuComplex r ;
|
||||||
r.z.x = lhs.z.x + rhs.z.x;
|
r.z.x = lhs.z.x + rhs.z.x;
|
||||||
@ -157,6 +171,11 @@ typedef GpuVector<NSIMD_RealD, double > GpuVectorRD;
|
|||||||
typedef GpuVector<NSIMD_ComplexD, GpuComplexD > GpuVectorCD;
|
typedef GpuVector<NSIMD_ComplexD, GpuComplexD > GpuVectorCD;
|
||||||
typedef GpuVector<NSIMD_Integer, Integer > GpuVectorI;
|
typedef GpuVector<NSIMD_Integer, Integer > GpuVectorI;
|
||||||
|
|
||||||
|
accelerator_inline GpuComplexF timesI(const GpuComplexF &r) { return(GpuComplexF(-r.imag(),r.real()));}
|
||||||
|
accelerator_inline GpuComplexD timesI(const GpuComplexD &r) { return(GpuComplexD(-r.imag(),r.real()));}
|
||||||
|
accelerator_inline GpuComplexF timesMinusI(const GpuComplexF &r){ return(GpuComplexF(r.imag(),-r.real()));}
|
||||||
|
accelerator_inline GpuComplexD timesMinusI(const GpuComplexD &r){ return(GpuComplexD(r.imag(),-r.real()));}
|
||||||
|
|
||||||
accelerator_inline float half2float(half h)
|
accelerator_inline float half2float(half h)
|
||||||
{
|
{
|
||||||
float f;
|
float f;
|
||||||
|
@ -65,22 +65,20 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__
|
|||||||
#else
|
#else
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// Use the scalar as our own complex on GPU
|
||||||
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
||||||
//typename vsimd::vector_type::datum
|
|
||||||
typename vsimd::scalar_type
|
typename vsimd::scalar_type
|
||||||
coalescedRead(const vsimd & __restrict__ vec,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
coalescedRead(const vsimd & __restrict__ vec,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
||||||
{
|
{
|
||||||
// typedef typename vsimd::vector_type::datum S;
|
|
||||||
typedef typename vsimd::scalar_type S;
|
typedef typename vsimd::scalar_type S;
|
||||||
S * __restrict__ p=(S *)&vec;
|
S * __restrict__ p=(S *)&vec;
|
||||||
return p[lane];
|
return p[lane];
|
||||||
}
|
}
|
||||||
template<int ptype,class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
template<int ptype,class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
||||||
//typename vsimd::vector_type::datum
|
|
||||||
typename vsimd::scalar_type
|
typename vsimd::scalar_type
|
||||||
coalescedReadPermute(const vsimd & __restrict__ vec,int doperm,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
coalescedReadPermute(const vsimd & __restrict__ vec,int doperm,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
||||||
{
|
{
|
||||||
// typedef typename vsimd::vector_type::datum S;
|
|
||||||
typedef typename vsimd::scalar_type S;
|
typedef typename vsimd::scalar_type S;
|
||||||
|
|
||||||
S * __restrict__ p=(S *)&vec;
|
S * __restrict__ p=(S *)&vec;
|
||||||
@ -90,16 +88,43 @@ coalescedReadPermute(const vsimd & __restrict__ vec,int doperm,int lane=accelera
|
|||||||
}
|
}
|
||||||
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
||||||
void coalescedWrite(vsimd & __restrict__ vec,
|
void coalescedWrite(vsimd & __restrict__ vec,
|
||||||
// const typename vsimd::vector_type::datum & __restrict__ extracted,
|
|
||||||
const typename vsimd::scalar_type & __restrict__ extracted,
|
const typename vsimd::scalar_type & __restrict__ extracted,
|
||||||
int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
||||||
{
|
{
|
||||||
// typedef typename vsimd::vector_type::datum S;
|
|
||||||
typedef typename vsimd::scalar_type S;
|
typedef typename vsimd::scalar_type S;
|
||||||
S * __restrict__ p=(S *)&vec;
|
S * __restrict__ p=(S *)&vec;
|
||||||
p[lane]=extracted;
|
p[lane]=extracted;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
||||||
|
typename vsimd::vector_type::datum
|
||||||
|
coalescedRead(const vsimd & __restrict__ vec,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
||||||
|
{
|
||||||
|
typedef typename vsimd::vector_type::datum S;
|
||||||
|
S * __restrict__ p=(S *)&vec;
|
||||||
|
return p[lane];
|
||||||
|
}
|
||||||
|
template<int ptype,class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
||||||
|
typename vsimd::vector_type::datum
|
||||||
|
coalescedReadPermute(const vsimd & __restrict__ vec,int doperm,int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
||||||
|
{
|
||||||
|
typedef typename vsimd::vector_type::datum S;
|
||||||
|
|
||||||
|
S * __restrict__ p=(S *)&vec;
|
||||||
|
int mask = vsimd::Nsimd() >> (ptype + 1);
|
||||||
|
int plane= doperm ? lane ^ mask : lane;
|
||||||
|
return p[plane];
|
||||||
|
}
|
||||||
|
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
||||||
|
void coalescedWrite(vsimd & __restrict__ vec,
|
||||||
|
const typename vsimd::vector_type::datum & __restrict__ extracted,
|
||||||
|
int lane=acceleratorSIMTlane(vsimd::Nsimd()))
|
||||||
|
{
|
||||||
|
typedef typename vsimd::vector_type::datum S;
|
||||||
|
S * __restrict__ p=(S *)&vec;
|
||||||
|
p[lane]=extracted;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
// Extract and insert slices on the GPU
|
// Extract and insert slices on the GPU
|
||||||
|
Loading…
Reference in New Issue
Block a user