mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Bringing expression templates for faster vector loops
This commit is contained in:
		@@ -20,18 +20,106 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
extern int GridCshiftPermuteMap[4][16];
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////
 | 
			
		||||
// Basic expressions used in Expression Template
 | 
			
		||||
////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
class LatticeBase {};
 | 
			
		||||
class LatticeExpressionBase {};
 | 
			
		||||
 | 
			
		||||
template <typename Op, typename T1>                           
 | 
			
		||||
class LatticeUnaryExpression  : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
 | 
			
		||||
 public:
 | 
			
		||||
 LatticeUnaryExpression(const std::pair<Op,std::tuple<T1> > &arg): std::pair<Op,std::tuple<T1> >(arg) {};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <typename Op, typename T1, typename T2>              
 | 
			
		||||
class LatticeBinaryExpression : public std::pair<Op,std::tuple<T1,T2> > , public LatticeExpressionBase {
 | 
			
		||||
 public:
 | 
			
		||||
 LatticeBinaryExpression(const std::pair<Op,std::tuple<T1,T2> > &arg): std::pair<Op,std::tuple<T1,T2> >(arg) {};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <typename Op, typename T1, typename T2, typename T3> 
 | 
			
		||||
class LatticeTrinaryExpression :public std::pair<Op,std::tuple<T1,T2,T3> >, public LatticeExpressionBase {
 | 
			
		||||
 public:
 | 
			
		||||
 LatticeTrinaryExpression(const std::pair<Op,std::tuple<T1,T2,T3> > &arg): std::pair<Op,std::tuple<T1,T2,T3> >(arg) {};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
class Lattice
 | 
			
		||||
class Lattice : public LatticeBase
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
    GridBase *_grid;
 | 
			
		||||
    int checkerboard;
 | 
			
		||||
    std::vector<vobj,alignedAllocator<vobj> > _odata;
 | 
			
		||||
    //std::valarray<vobj> _odata;
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
    typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
    typedef typename vobj::vector_type vector_type;
 | 
			
		||||
    typedef vobj vector_object;
 | 
			
		||||
    
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Expression Template closure support
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template <typename Op, typename T1>                         inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
 | 
			
		||||
  {
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
      vobj tmp= eval(ss,expr);
 | 
			
		||||
      vstream(_odata[ss] ,tmp);
 | 
			
		||||
    }
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
  template <typename Op, typename T1,typename T2>             inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
 | 
			
		||||
  {
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
      vobj tmp= eval(ss,expr);
 | 
			
		||||
      vstream(_odata[ss] ,tmp);
 | 
			
		||||
    }
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
  template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
 | 
			
		||||
  {
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
      vobj tmp= eval(ss,expr);
 | 
			
		||||
      vstream(_odata[ss] ,tmp);
 | 
			
		||||
    }
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
  //GridFromExpression is tricky to do
 | 
			
		||||
  template<class Op,class T1>
 | 
			
		||||
    Lattice(const LatticeUnaryExpression<Op,T1> & expr):    _grid(nullptr){
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
    _odata.resize(_grid->oSites());
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
      _odata[ss] = eval(ss,expr);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class Op,class T1, class T2>
 | 
			
		||||
  Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr):    _grid(nullptr){
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
    _odata.resize(_grid->oSites());
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
      _odata[ss] = eval(ss,expr);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class Op,class T1, class T2, class T3>
 | 
			
		||||
  Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr):    _grid(nullptr){
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
    _odata.resize(_grid->oSites());
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
      _odata[ss] = eval(ss,expr);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Constructor requires "grid" passed.
 | 
			
		||||
@@ -54,7 +142,7 @@ public:
 | 
			
		||||
    template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
 | 
			
		||||
      conformable(*this,r);
 | 
			
		||||
      std::cout<<"Lattice operator ="<<std::endl;
 | 
			
		||||
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
            this->_odata[ss]=r._odata[ss];
 | 
			
		||||
        }
 | 
			
		||||
@@ -88,8 +176,17 @@ public:
 | 
			
		||||
 }; // class Lattice
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define GRID_LATTICE_EXPRESSION_TEMPLATES
 | 
			
		||||
 | 
			
		||||
#include <lattice/Grid_lattice_conformable.h>
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
 | 
			
		||||
#include <lattice/Grid_lattice_ET.h>
 | 
			
		||||
#else 
 | 
			
		||||
#include <lattice/Grid_lattice_overload.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include <lattice/Grid_lattice_arith.h>
 | 
			
		||||
 | 
			
		||||
#include <lattice/Grid_lattice_trace.h>
 | 
			
		||||
#include <lattice/Grid_lattice_transpose.h>
 | 
			
		||||
#include <lattice/Grid_lattice_local.h>
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user