mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Threading support rework.
Placed parallel pragmas as macros; implemented deterministic thread reduction in style of BFM.
This commit is contained in:
		@@ -13,6 +13,9 @@ namespace Grid {
 | 
			
		||||
            vzero(*this);
 | 
			
		||||
            return (*this);
 | 
			
		||||
        }
 | 
			
		||||
        vComplexD( Zero & z){
 | 
			
		||||
            vzero(*this);
 | 
			
		||||
        }
 | 
			
		||||
        vComplexD()=default;
 | 
			
		||||
        vComplexD(ComplexD a){
 | 
			
		||||
	  vsplat(*this,a);
 | 
			
		||||
@@ -286,8 +289,8 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
 | 
			
		||||
            return ret;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        friend inline vComplexD timesMinusI(const vComplexD &in){
 | 
			
		||||
	  vComplexD ret; vzero(ret);
 | 
			
		||||
        friend inline void timesMinusI(vComplexD &ret,const vComplexD &in){
 | 
			
		||||
	  vzero(ret);
 | 
			
		||||
	  vComplexD tmp;
 | 
			
		||||
#if defined (AVX1)|| defined (AVX2)
 | 
			
		||||
	  tmp.v    =_mm256_addsub_pd(ret.v,in.v); // r,-i
 | 
			
		||||
@@ -304,11 +307,10 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
            assert(0);
 | 
			
		||||
#endif
 | 
			
		||||
	  return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
        friend inline vComplexD timesI(const vComplexD &in){
 | 
			
		||||
	  vComplexD ret; vzero(ret);
 | 
			
		||||
        friend inline void timesI(vComplexD &ret, const vComplexD &in){
 | 
			
		||||
	  vzero(ret);
 | 
			
		||||
	  vComplexD tmp;
 | 
			
		||||
#if defined (AVX1)|| defined (AVX2)
 | 
			
		||||
	  tmp.v    =_mm256_shuffle_pd(in.v,in.v,0x5);
 | 
			
		||||
@@ -325,9 +327,21 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
            assert(0);
 | 
			
		||||
#endif
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
        friend inline vComplexD timesMinusI(const vComplexD &in){
 | 
			
		||||
	  vComplexD ret; 
 | 
			
		||||
	  timesMinusI(ret,in);
 | 
			
		||||
	  return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
        friend inline vComplexD timesI(const vComplexD &in){
 | 
			
		||||
	  vComplexD ret; 
 | 
			
		||||
	  timesI(ret,in);
 | 
			
		||||
	  return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// REDUCE FIXME must be a cleaner implementation
 | 
			
		||||
       friend inline ComplexD Reduce(const vComplexD & in)
 | 
			
		||||
       { 
 | 
			
		||||
 
 | 
			
		||||
@@ -28,6 +28,9 @@ namespace Grid {
 | 
			
		||||
            vzero(*this);
 | 
			
		||||
            return (*this);
 | 
			
		||||
        }
 | 
			
		||||
        vComplexF( Zero & z){
 | 
			
		||||
            vzero(*this);
 | 
			
		||||
        }
 | 
			
		||||
        vComplexF()=default;
 | 
			
		||||
        vComplexF(ComplexF a){
 | 
			
		||||
	  vsplat(*this,a);
 | 
			
		||||
@@ -363,8 +366,7 @@ namespace Grid {
 | 
			
		||||
#endif
 | 
			
		||||
            return ret;
 | 
			
		||||
        }
 | 
			
		||||
        friend inline vComplexF timesMinusI(const vComplexF &in){
 | 
			
		||||
	  vComplexF ret; 
 | 
			
		||||
        friend inline void timesMinusI( vComplexF &ret,const vComplexF &in){
 | 
			
		||||
	  vzero(ret);
 | 
			
		||||
#if defined (AVX1)|| defined (AVX2)
 | 
			
		||||
	  cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
 | 
			
		||||
@@ -381,10 +383,9 @@ namespace Grid {
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
            assert(0);
 | 
			
		||||
#endif
 | 
			
		||||
	  return ret;
 | 
			
		||||
	}
 | 
			
		||||
        friend inline vComplexF timesI(const vComplexF &in){
 | 
			
		||||
	  vComplexF ret; vzero(ret);
 | 
			
		||||
        friend inline void timesI(vComplexF &ret,const vComplexF &in){
 | 
			
		||||
	  vzero(ret);
 | 
			
		||||
#if defined (AVX1)|| defined (AVX2)
 | 
			
		||||
	  cvec tmp =_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));//i,r
 | 
			
		||||
          ret.v    =_mm256_addsub_ps(ret.v,tmp);     //i,-r
 | 
			
		||||
@@ -400,8 +401,18 @@ namespace Grid {
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
            assert(0);
 | 
			
		||||
#endif
 | 
			
		||||
	}
 | 
			
		||||
        friend inline vComplexF timesMinusI(const vComplexF &in){
 | 
			
		||||
	  vComplexF ret; 
 | 
			
		||||
	  timesMinusI(ret,in);
 | 
			
		||||
	  return ret;
 | 
			
		||||
	}
 | 
			
		||||
        friend inline vComplexF timesI(const vComplexF &in){
 | 
			
		||||
	  vComplexF ret; 
 | 
			
		||||
	  timesI(ret,in);
 | 
			
		||||
	  return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
        // Unary negation
 | 
			
		||||
        friend inline vComplexF operator -(const vComplexF &r) {
 | 
			
		||||
 
 | 
			
		||||
@@ -17,6 +17,10 @@ namespace Grid {
 | 
			
		||||
        vRealD(Zero &zero){
 | 
			
		||||
	  zeroit(*this);
 | 
			
		||||
	}
 | 
			
		||||
        vRealD & operator = ( Zero & z){
 | 
			
		||||
	  vzero(*this);
 | 
			
		||||
	  return (*this);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        friend inline void mult(vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) * (*r);}
 | 
			
		||||
        friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);}
 | 
			
		||||
 
 | 
			
		||||
@@ -18,6 +18,10 @@ namespace Grid {
 | 
			
		||||
        vRealF(Zero &zero){
 | 
			
		||||
	  zeroit(*this);
 | 
			
		||||
	}
 | 
			
		||||
        vRealF & operator = ( Zero & z){
 | 
			
		||||
	  vzero(*this);
 | 
			
		||||
	  return (*this);
 | 
			
		||||
        }
 | 
			
		||||
        ////////////////////////////////////
 | 
			
		||||
        // Arithmetic operator overloads +,-,*
 | 
			
		||||
        ////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user