mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	More cleanup of Grid_simd.h
This commit is contained in:
		
							
								
								
									
										128
									
								
								lib/Grid_simd.h
									
									
									
									
									
								
							
							
						
						
									
										128
									
								
								lib/Grid_simd.h
									
									
									
									
									
								
							@@ -44,49 +44,49 @@ namespace Grid {
 | 
				
			|||||||
  inline ComplexF innerProduct(const ComplexF & l, const ComplexF & r) { return conjugate(l)*r; }
 | 
					  inline ComplexF innerProduct(const ComplexF & l, const ComplexF & r) { return conjugate(l)*r; }
 | 
				
			||||||
  inline RealD innerProduct(const RealD & l, const RealD & r) { return l*r; }
 | 
					  inline RealD innerProduct(const RealD & l, const RealD & r) { return l*r; }
 | 
				
			||||||
  inline RealF innerProduct(const RealF & l, const RealF & r) { return l*r; }
 | 
					  inline RealF innerProduct(const RealF & l, const RealF & r) { return l*r; }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
    //Provide support functions for basic real and complex data types required by Grid
 | 
					  //Provide support functions for basic real and complex data types required by Grid
 | 
				
			||||||
    //Single and double precision versions. Should be able to template this once only.
 | 
					  //Single and double precision versions. Should be able to template this once only.
 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
    inline void mac (ComplexD * __restrict__ y,const ComplexD * __restrict__ a,const ComplexD *__restrict__ x){ *y = (*a) * (*x)+(*y); };
 | 
					  inline void mac (ComplexD * __restrict__ y,const ComplexD * __restrict__ a,const ComplexD *__restrict__ x){ *y = (*a) * (*x)+(*y); };
 | 
				
			||||||
    inline void mult(ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) * (*r);}
 | 
					  inline void mult(ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) * (*r);}
 | 
				
			||||||
    inline void sub (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) - (*r);}
 | 
					  inline void sub (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) - (*r);}
 | 
				
			||||||
    inline void add (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) + (*r);}
 | 
					  inline void add (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) + (*r);}
 | 
				
			||||||
    // conjugate already supported for complex
 | 
					  // conjugate already supported for complex
 | 
				
			||||||
    
 | 
					  
 | 
				
			||||||
    inline void mac (ComplexF * __restrict__ y,const ComplexF * __restrict__ a,const ComplexF *__restrict__ x){ *y = (*a) * (*x)+(*y); }
 | 
					  inline void mac (ComplexF * __restrict__ y,const ComplexF * __restrict__ a,const ComplexF *__restrict__ x){ *y = (*a) * (*x)+(*y); }
 | 
				
			||||||
    inline void mult(ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) * (*r); }
 | 
					  inline void mult(ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) * (*r); }
 | 
				
			||||||
    inline void sub (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) - (*r); }
 | 
					  inline void sub (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) - (*r); }
 | 
				
			||||||
    inline void add (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) + (*r); }
 | 
					  inline void add (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) + (*r); }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
    //conjugate already supported for complex
 | 
					  //conjugate already supported for complex
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
    inline ComplexF timesI(const ComplexF &r)     { return(r*ComplexF(0.0,1.0));}
 | 
					  inline ComplexF timesI(const ComplexF &r)     { return(r*ComplexF(0.0,1.0));}
 | 
				
			||||||
    inline ComplexD timesI(const ComplexD &r)     { return(r*ComplexD(0.0,1.0));}
 | 
					  inline ComplexD timesI(const ComplexD &r)     { return(r*ComplexD(0.0,1.0));}
 | 
				
			||||||
    inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
 | 
					  inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
 | 
				
			||||||
    inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
 | 
					  inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
 | 
				
			||||||
    inline void timesI(ComplexF &ret,const ComplexF &r)     { ret = timesI(r);}
 | 
					  inline void timesI(ComplexF &ret,const ComplexF &r)     { ret = timesI(r);}
 | 
				
			||||||
    inline void timesI(ComplexD &ret,const ComplexD &r)     { ret = timesI(r);}
 | 
					  inline void timesI(ComplexD &ret,const ComplexD &r)     { ret = timesI(r);}
 | 
				
			||||||
    inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
 | 
					  inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
 | 
				
			||||||
    inline void timesMinusI(ComplexD &ret,const ComplexD &r){ ret = timesMinusI(r);}
 | 
					  inline void timesMinusI(ComplexD &ret,const ComplexD &r){ ret = timesMinusI(r);}
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
    inline void mac (RealD * __restrict__ y,const RealD * __restrict__ a,const RealD *__restrict__ x){  *y = (*a) * (*x)+(*y);}
 | 
					  inline void mac (RealD * __restrict__ y,const RealD * __restrict__ a,const RealD *__restrict__ x){  *y = (*a) * (*x)+(*y);}
 | 
				
			||||||
    inline void mult(RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) * (*r);}
 | 
					  inline void mult(RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) * (*r);}
 | 
				
			||||||
    inline void sub (RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) - (*r);}
 | 
					  inline void sub (RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) - (*r);}
 | 
				
			||||||
    inline void add (RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) + (*r);}
 | 
					  inline void add (RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) + (*r);}
 | 
				
			||||||
    
 | 
					  
 | 
				
			||||||
    inline void mac (RealF * __restrict__ y,const RealF * __restrict__ a,const RealF *__restrict__ x){  *y = (*a) * (*x)+(*y); }
 | 
					  inline void mac (RealF * __restrict__ y,const RealF * __restrict__ a,const RealF *__restrict__ x){  *y = (*a) * (*x)+(*y); }
 | 
				
			||||||
    inline void mult(RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) * (*r); }
 | 
					  inline void mult(RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) * (*r); }
 | 
				
			||||||
    inline void sub (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) - (*r); }
 | 
					  inline void sub (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) - (*r); }
 | 
				
			||||||
    inline void add (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) + (*r); }
 | 
					  inline void add (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) + (*r); }
 | 
				
			||||||
    
 | 
					  
 | 
				
			||||||
    inline void vstream(ComplexF &l, const ComplexF &r){ l=r;}
 | 
					  inline void vstream(ComplexF &l, const ComplexF &r){ l=r;}
 | 
				
			||||||
    inline void vstream(ComplexD &l, const ComplexD &r){ l=r;}
 | 
					  inline void vstream(ComplexD &l, const ComplexD &r){ l=r;}
 | 
				
			||||||
    inline void vstream(RealF &l, const RealF &r){ l=r;}
 | 
					  inline void vstream(RealF &l, const RealF &r){ l=r;}
 | 
				
			||||||
    inline void vstream(RealD &l, const RealD &r){ l=r;}
 | 
					  inline void vstream(RealD &l, const RealD &r){ l=r;}
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  class Zero{};
 | 
					  class Zero{};
 | 
				
			||||||
  static Zero zero;
 | 
					  static Zero zero;
 | 
				
			||||||
  template<class itype> inline void zeroit(itype &arg){ arg=zero;};
 | 
					  template<class itype> inline void zeroit(itype &arg){ arg=zero;};
 | 
				
			||||||
@@ -94,52 +94,12 @@ namespace Grid {
 | 
				
			|||||||
  template<>            inline void zeroit(ComplexD &arg){ arg=0; };
 | 
					  template<>            inline void zeroit(ComplexD &arg){ arg=0; };
 | 
				
			||||||
  template<>            inline void zeroit(RealF &arg){ arg=0; };
 | 
					  template<>            inline void zeroit(RealF &arg){ arg=0; };
 | 
				
			||||||
  template<>            inline void zeroit(RealD &arg){ arg=0; };
 | 
					  template<>            inline void zeroit(RealD &arg){ arg=0; };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <simd/Grid_vector_types.h>
 | 
					#include <simd/Grid_vector_types.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					 | 
				
			||||||
  // NB: Template the following on "type Complex" and then implement *,+,- for 
 | 
					 | 
				
			||||||
  // ComplexF, ComplexD, RealF, RealD above to
 | 
					 | 
				
			||||||
  // get full generality of binops with scalars.
 | 
					 | 
				
			||||||
   inline void mac (vComplexF *__restrict__ y,const ComplexF *__restrict__ a,const vComplexF *__restrict__ x){ *y = (*a)*(*x)+(*y); };
 | 
					 | 
				
			||||||
   inline void mult(vComplexF *__restrict__ y,const ComplexF *__restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) * (*r); }
 | 
					 | 
				
			||||||
   inline void sub (vComplexF *__restrict__ y,const ComplexF *__restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) - (*r); }
 | 
					 | 
				
			||||||
   inline void add (vComplexF *__restrict__ y,const ComplexF *__restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) + (*r); }
 | 
					 | 
				
			||||||
   inline void mac (vComplexF *__restrict__ y,const vComplexF *__restrict__ a,const ComplexF *__restrict__ x){ *y = (*a)*(*x)+(*y); };
 | 
					 | 
				
			||||||
   inline void mult(vComplexF *__restrict__ y,const vComplexF *__restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) * (*r); }
 | 
					 | 
				
			||||||
   inline void sub (vComplexF *__restrict__ y,const vComplexF *__restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) - (*r); }
 | 
					 | 
				
			||||||
   inline void add (vComplexF *__restrict__ y,const vComplexF *__restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) + (*r); }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   inline void mac (vComplexD *__restrict__ y,const ComplexD *__restrict__ a,const vComplexD *__restrict__ x){ *y = (*a)*(*x)+(*y); };
 | 
					 | 
				
			||||||
   inline void mult(vComplexD *__restrict__ y,const ComplexD *__restrict__ l,const vComplexD *__restrict__ r){ *y = (*l) * (*r); }
 | 
					 | 
				
			||||||
   inline void sub (vComplexD *__restrict__ y,const ComplexD *__restrict__ l,const vComplexD *__restrict__ r){ *y = (*l) - (*r); }
 | 
					 | 
				
			||||||
   inline void add (vComplexD *__restrict__ y,const ComplexD *__restrict__ l,const vComplexD *__restrict__ r){ *y = (*l) + (*r); }
 | 
					 | 
				
			||||||
   inline void mac (vComplexD *__restrict__ y,const vComplexD *__restrict__ a,const ComplexD *__restrict__ x){ *y = (*a)*(*x)+(*y); };
 | 
					 | 
				
			||||||
   inline void mult(vComplexD *__restrict__ y,const vComplexD *__restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) * (*r); }
 | 
					 | 
				
			||||||
   inline void sub (vComplexD *__restrict__ y,const vComplexD *__restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) - (*r); }
 | 
					 | 
				
			||||||
   inline void add (vComplexD *__restrict__ y,const vComplexD *__restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) + (*r); }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   inline void mac (vRealF *__restrict__ y,const RealF *__restrict__ a,const vRealF *__restrict__ x){ *y = (*a)*(*x)+(*y); };
 | 
					 | 
				
			||||||
   inline void mult(vRealF *__restrict__ y,const RealF *__restrict__ l,const vRealF *__restrict__ r){ *y = (*l) * (*r); }
 | 
					 | 
				
			||||||
   inline void sub (vRealF *__restrict__ y,const RealF *__restrict__ l,const vRealF *__restrict__ r){ *y = (*l) - (*r); }
 | 
					 | 
				
			||||||
   inline void add (vRealF *__restrict__ y,const RealF *__restrict__ l,const vRealF *__restrict__ r){ *y = (*l) + (*r); }
 | 
					 | 
				
			||||||
   inline void mac (vRealF *__restrict__ y,const vRealF *__restrict__ a,const RealF *__restrict__ x){ *y = (*a)*(*x)+(*y); };
 | 
					 | 
				
			||||||
   inline void mult(vRealF *__restrict__ y,const vRealF *__restrict__ l,const RealF *__restrict__ r){ *y = (*l) * (*r); }
 | 
					 | 
				
			||||||
   inline void sub (vRealF *__restrict__ y,const vRealF *__restrict__ l,const RealF *__restrict__ r){ *y = (*l) - (*r); }
 | 
					 | 
				
			||||||
   inline void add (vRealF *__restrict__ y,const vRealF *__restrict__ l,const RealF *__restrict__ r){ *y = (*l) + (*r); }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   inline void mac (vRealD *__restrict__ y,const RealD *__restrict__ a,const vRealD *__restrict__ x){ *y = (*a)*(*x)+(*y); };
 | 
					 | 
				
			||||||
   inline void mult(vRealD *__restrict__ y,const RealD *__restrict__ l,const vRealD *__restrict__ r){ *y = (*l) * (*r); }
 | 
					 | 
				
			||||||
   inline void sub (vRealD *__restrict__ y,const RealD *__restrict__ l,const vRealD *__restrict__ r){ *y = (*l) - (*r); }
 | 
					 | 
				
			||||||
   inline void add (vRealD *__restrict__ y,const RealD *__restrict__ l,const vRealD *__restrict__ r){ *y = (*l) + (*r); }
 | 
					 | 
				
			||||||
   inline void mac (vRealD *__restrict__ y,const vRealD *__restrict__ a,const RealD *__restrict__ x){ *y = (*a)*(*x)+(*y); };
 | 
					 | 
				
			||||||
   inline void mult(vRealD *__restrict__ y,const vRealD *__restrict__ l,const RealD *__restrict__ r){ *y = (*l) * (*r); }
 | 
					 | 
				
			||||||
   inline void sub (vRealD *__restrict__ y,const vRealD *__restrict__ l,const RealD *__restrict__ r){ *y = (*l) - (*r); }
 | 
					 | 
				
			||||||
   inline void add (vRealD *__restrict__ y,const vRealD *__restrict__ l,const RealD *__restrict__ r){ *y = (*l) + (*r); }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Default precision
 | 
					  // Default precision
 | 
				
			||||||
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
 | 
					#ifdef GRID_DEFAULT_PRECISION_DOUBLE
 | 
				
			||||||
  typedef vRealD vReal;
 | 
					  typedef vRealD vReal;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2,7 +2,7 @@
 | 
				
			|||||||
/*! @file Grid_vector_types.h
 | 
					/*! @file Grid_vector_types.h
 | 
				
			||||||
  @brief Defines templated class Grid_simd to deal with inner vector types
 | 
					  @brief Defines templated class Grid_simd to deal with inner vector types
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
// Time-stamp: <2015-05-26 13:22:36 neo>
 | 
					// Time-stamp: <2015-05-26 13:44:54 neo>
 | 
				
			||||||
//---------------------------------------------------------------------------
 | 
					//---------------------------------------------------------------------------
 | 
				
			||||||
#ifndef GRID_VECTOR_TYPES
 | 
					#ifndef GRID_VECTOR_TYPES
 | 
				
			||||||
#define GRID_VECTOR_TYPES
 | 
					#define GRID_VECTOR_TYPES
 | 
				
			||||||
@@ -156,6 +156,18 @@ inline void Gpermute(vsimd &y,const vsimd &b,int perm){
 | 
				
			|||||||
    friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
 | 
					    friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
 | 
				
			||||||
    friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
 | 
					    friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    friend inline void mac (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ a,const Grid_simd   *__restrict__ x){ *y = (*a)*(*x)+(*y); };
 | 
				
			||||||
 | 
					    friend inline void mult(Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd   *__restrict__ r){ *y = (*l) * (*r); }
 | 
				
			||||||
 | 
					    friend inline void sub (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd   *__restrict__ r){ *y = (*l) - (*r); }
 | 
				
			||||||
 | 
					    friend inline void add (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd   *__restrict__ r){ *y = (*l) + (*r); }
 | 
				
			||||||
 | 
					    friend inline void mac (Grid_simd *__restrict__ y,const Grid_simd   *__restrict__ a,const Scalar_type *__restrict__ x){ *y = (*a)*(*x)+(*y); };
 | 
				
			||||||
 | 
					    friend inline void mult(Grid_simd *__restrict__ y,const Grid_simd   *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) * (*r); }
 | 
				
			||||||
 | 
					    friend inline void sub (Grid_simd *__restrict__ y,const Grid_simd   *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) - (*r); }
 | 
				
			||||||
 | 
					    friend inline void add (Grid_simd *__restrict__ y,const Grid_simd   *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) + (*r); }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    //not for integer types... 
 | 
					    //not for integer types... 
 | 
				
			||||||
    template <  class S = Scalar_type, NotEnableIf<std::is_integral < S >, int> = 0 > 
 | 
					    template <  class S = Scalar_type, NotEnableIf<std::is_integral < S >, int> = 0 > 
 | 
				
			||||||
    friend inline Grid_simd adj(const Grid_simd &in){ return conjugate(in); }
 | 
					    friend inline Grid_simd adj(const Grid_simd &in){ return conjugate(in); }
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user