mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-11 03:46:55 +01:00
Threading support rework.
Placed parallel pragmas as macros; implemented deterministic thread reduction in style of BFM.
This commit is contained in:
@ -13,6 +13,9 @@ namespace Grid {
|
||||
vzero(*this);
|
||||
return (*this);
|
||||
}
|
||||
vComplexD( Zero & z){
|
||||
vzero(*this);
|
||||
}
|
||||
vComplexD()=default;
|
||||
vComplexD(ComplexD a){
|
||||
vsplat(*this,a);
|
||||
@ -286,8 +289,8 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
|
||||
return ret;
|
||||
}
|
||||
|
||||
friend inline vComplexD timesMinusI(const vComplexD &in){
|
||||
vComplexD ret; vzero(ret);
|
||||
friend inline void timesMinusI(vComplexD &ret,const vComplexD &in){
|
||||
vzero(ret);
|
||||
vComplexD tmp;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
tmp.v =_mm256_addsub_pd(ret.v,in.v); // r,-i
|
||||
@ -304,11 +307,10 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
friend inline vComplexD timesI(const vComplexD &in){
|
||||
vComplexD ret; vzero(ret);
|
||||
friend inline void timesI(vComplexD &ret, const vComplexD &in){
|
||||
vzero(ret);
|
||||
vComplexD tmp;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
tmp.v =_mm256_shuffle_pd(in.v,in.v,0x5);
|
||||
@ -325,9 +327,21 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
friend inline vComplexD timesMinusI(const vComplexD &in){
|
||||
vComplexD ret;
|
||||
timesMinusI(ret,in);
|
||||
return ret;
|
||||
}
|
||||
|
||||
friend inline vComplexD timesI(const vComplexD &in){
|
||||
vComplexD ret;
|
||||
timesI(ret,in);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// REDUCE FIXME must be a cleaner implementation
|
||||
friend inline ComplexD Reduce(const vComplexD & in)
|
||||
{
|
||||
|
@ -28,6 +28,9 @@ namespace Grid {
|
||||
vzero(*this);
|
||||
return (*this);
|
||||
}
|
||||
vComplexF( Zero & z){
|
||||
vzero(*this);
|
||||
}
|
||||
vComplexF()=default;
|
||||
vComplexF(ComplexF a){
|
||||
vsplat(*this,a);
|
||||
@ -363,8 +366,7 @@ namespace Grid {
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
friend inline vComplexF timesMinusI(const vComplexF &in){
|
||||
vComplexF ret;
|
||||
friend inline void timesMinusI( vComplexF &ret,const vComplexF &in){
|
||||
vzero(ret);
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
|
||||
@ -381,10 +383,9 @@ namespace Grid {
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
friend inline vComplexF timesI(const vComplexF &in){
|
||||
vComplexF ret; vzero(ret);
|
||||
friend inline void timesI(vComplexF &ret,const vComplexF &in){
|
||||
vzero(ret);
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
cvec tmp =_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));//i,r
|
||||
ret.v =_mm256_addsub_ps(ret.v,tmp); //i,-r
|
||||
@ -400,8 +401,18 @@ namespace Grid {
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
friend inline vComplexF timesMinusI(const vComplexF &in){
|
||||
vComplexF ret;
|
||||
timesMinusI(ret,in);
|
||||
return ret;
|
||||
}
|
||||
friend inline vComplexF timesI(const vComplexF &in){
|
||||
vComplexF ret;
|
||||
timesI(ret,in);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// Unary negation
|
||||
friend inline vComplexF operator -(const vComplexF &r) {
|
||||
|
@ -17,6 +17,10 @@ namespace Grid {
|
||||
vRealD(Zero &zero){
|
||||
zeroit(*this);
|
||||
}
|
||||
vRealD & operator = ( Zero & z){
|
||||
vzero(*this);
|
||||
return (*this);
|
||||
}
|
||||
|
||||
friend inline void mult(vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) * (*r);}
|
||||
friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);}
|
||||
|
@ -18,6 +18,10 @@ namespace Grid {
|
||||
vRealF(Zero &zero){
|
||||
zeroit(*this);
|
||||
}
|
||||
vRealF & operator = ( Zero & z){
|
||||
vzero(*this);
|
||||
return (*this);
|
||||
}
|
||||
////////////////////////////////////
|
||||
// Arithmetic operator overloads +,-,*
|
||||
////////////////////////////////////
|
||||
|
Reference in New Issue
Block a user