mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Optimisation and syntax pretty
This commit is contained in:
		@@ -62,7 +62,7 @@ public:
 | 
				
			|||||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Expression Template closure support
 | 
					  // Expression Template closure support
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  template <typename Op, typename T1>                         inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
 | 
					  template <typename Op, typename T1>                         strong_inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
    for(int ss=0;ss<_grid->oSites();ss++){
 | 
					    for(int ss=0;ss<_grid->oSites();ss++){
 | 
				
			||||||
@@ -71,7 +71,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
    return *this;
 | 
					    return *this;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  template <typename Op, typename T1,typename T2>             inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
 | 
					  template <typename Op, typename T1,typename T2>             strong_inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
    for(int ss=0;ss<_grid->oSites();ss++){
 | 
					    for(int ss=0;ss<_grid->oSites();ss++){
 | 
				
			||||||
@@ -80,7 +80,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
    return *this;
 | 
					    return *this;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
 | 
					  template <typename Op, typename T1,typename T2,typename T3> strong_inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
    for(int ss=0;ss<_grid->oSites();ss++){
 | 
					    for(int ss=0;ss<_grid->oSites();ss++){
 | 
				
			||||||
@@ -132,14 +132,14 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
        checkerboard=0;
 | 
					        checkerboard=0;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
 | 
					    template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
        for(int ss=0;ss<_grid->oSites();ss++){
 | 
					        for(int ss=0;ss<_grid->oSites();ss++){
 | 
				
			||||||
            this->_odata[ss]=r;
 | 
					            this->_odata[ss]=r;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return *this;
 | 
					        return *this;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
 | 
					    template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
 | 
				
			||||||
      conformable(*this,r);
 | 
					      conformable(*this,r);
 | 
				
			||||||
      std::cout<<"Lattice operator ="<<std::endl;
 | 
					      std::cout<<"Lattice operator ="<<std::endl;
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
@@ -150,21 +150,21 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // *=,+=,-= operators inherit behvour from correspond */+/- operation
 | 
					    // *=,+=,-= operators inherit behvour from correspond */+/- operation
 | 
				
			||||||
    template<class T> inline Lattice<vobj> &operator *=(const T &r) {
 | 
					    template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
 | 
				
			||||||
        *this = (*this)*r;
 | 
					        *this = (*this)*r;
 | 
				
			||||||
        return *this;
 | 
					        return *this;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template<class T> inline Lattice<vobj> &operator -=(const T &r) {
 | 
					    template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) {
 | 
				
			||||||
        *this = (*this)-r;
 | 
					        *this = (*this)-r;
 | 
				
			||||||
        return *this;
 | 
					        return *this;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    template<class T> inline Lattice<vobj> &operator +=(const T &r) {
 | 
					    template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) {
 | 
				
			||||||
        *this = (*this)+r;
 | 
					        *this = (*this)+r;
 | 
				
			||||||
        return *this;
 | 
					        return *this;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
					    strong_inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
				
			||||||
        conformable(lhs,rhs);
 | 
					        conformable(lhs,rhs);
 | 
				
			||||||
        Lattice<vobj> ret(lhs._grid);
 | 
					        Lattice<vobj> ret(lhs._grid);
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
@@ -176,7 +176,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 }; // class Lattice
 | 
					 }; // class Lattice
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  template<class vobj> inline std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
 | 
					  template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
 | 
				
			||||||
    std::vector<int> gcoor;
 | 
					    std::vector<int> gcoor;
 | 
				
			||||||
    typedef typename vobj::scalar_object sobj;
 | 
					    typedef typename vobj::scalar_object sobj;
 | 
				
			||||||
    sobj ss;
 | 
					    sobj ss;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -26,7 +26,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
    // Trace Index level dependent operation
 | 
					    // Trace Index level dependent operation
 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
    template<int Index,class vobj>
 | 
					    template<int Index,class vobj>
 | 
				
			||||||
    inline auto traceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
 | 
					    inline auto latTraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
 | 
					      Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -27,13 +27,14 @@ strong_inline  void mac(iScalar<rtype> * __restrict__ ret,const iScalar<vtype> *
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class rrtype,class ltype,class rtype,int N>
 | 
					template<class rrtype,class ltype,class rtype,int N>
 | 
				
			||||||
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
 | 
					strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
 | 
				
			||||||
    for(int c2=0;c2<N;c2++){
 | 
					 | 
				
			||||||
    for(int c1=0;c1<N;c1++){
 | 
					 | 
				
			||||||
    for(int c3=0;c3<N;c3++){
 | 
					    for(int c3=0;c3<N;c3++){
 | 
				
			||||||
 | 
					    for(int c1=0;c1<N;c1++){
 | 
				
			||||||
 | 
					    for(int c2=0;c2<N;c2++){
 | 
				
			||||||
        mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
 | 
					        mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
 | 
				
			||||||
    }}}
 | 
					    }}}
 | 
				
			||||||
    return;
 | 
					    return;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class rrtype,class ltype,class rtype,int N>
 | 
					template<class rrtype,class ltype,class rtype,int N>
 | 
				
			||||||
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
 | 
					strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
 | 
				
			||||||
    for(int c1=0;c1<N;c1++){
 | 
					    for(int c1=0;c1<N;c1++){
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -15,15 +15,22 @@ strong_inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> *
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rrtype,class ltype,class rtype,int N>
 | 
					template<class rrtype,class ltype,class rtype,int N>
 | 
				
			||||||
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
 | 
					strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
 | 
				
			||||||
    for(int c2=0;c2<N;c2++){
 | 
					 | 
				
			||||||
  for(int c1=0;c1<N;c1++){
 | 
					  for(int c1=0;c1<N;c1++){
 | 
				
			||||||
        mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]);
 | 
					    int c3=0;
 | 
				
			||||||
 | 
					    for(int c2=0;c2<N;c2++){
 | 
				
			||||||
 | 
					      mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
  for(int c3=1;c3<N;c3++){
 | 
					  for(int c3=1;c3<N;c3++){
 | 
				
			||||||
 | 
					    for(int c1=0;c1<N;c1++){
 | 
				
			||||||
 | 
					      for(int c2=0;c2<N;c2++){
 | 
				
			||||||
	mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
 | 
						mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }}
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
    return;
 | 
					    return;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class rrtype,class ltype,class rtype,int N>
 | 
					template<class rrtype,class ltype,class rtype,int N>
 | 
				
			||||||
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
 | 
					strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
 | 
				
			||||||
    for(int c2=0;c2<N;c2++){
 | 
					    for(int c2=0;c2<N;c2++){
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user