mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Bringing expression templates for faster vector loops
This commit is contained in:
		@@ -20,18 +20,106 @@ namespace Grid {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
extern int GridCshiftPermuteMap[4][16];
 | 
					extern int GridCshiftPermuteMap[4][16];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Basic expressions used in Expression Template
 | 
				
			||||||
 | 
					////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class LatticeBase {};
 | 
				
			||||||
 | 
					class LatticeExpressionBase {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename Op, typename T1>                           
 | 
				
			||||||
 | 
					class LatticeUnaryExpression  : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					 LatticeUnaryExpression(const std::pair<Op,std::tuple<T1> > &arg): std::pair<Op,std::tuple<T1> >(arg) {};
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename Op, typename T1, typename T2>              
 | 
				
			||||||
 | 
					class LatticeBinaryExpression : public std::pair<Op,std::tuple<T1,T2> > , public LatticeExpressionBase {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					 LatticeBinaryExpression(const std::pair<Op,std::tuple<T1,T2> > &arg): std::pair<Op,std::tuple<T1,T2> >(arg) {};
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename Op, typename T1, typename T2, typename T3> 
 | 
				
			||||||
 | 
					class LatticeTrinaryExpression :public std::pair<Op,std::tuple<T1,T2,T3> >, public LatticeExpressionBase {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					 LatticeTrinaryExpression(const std::pair<Op,std::tuple<T1,T2,T3> > &arg): std::pair<Op,std::tuple<T1,T2,T3> >(arg) {};
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class vobj>
 | 
					template<class vobj>
 | 
				
			||||||
class Lattice
 | 
					class Lattice : public LatticeBase
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    GridBase *_grid;
 | 
					    GridBase *_grid;
 | 
				
			||||||
    int checkerboard;
 | 
					    int checkerboard;
 | 
				
			||||||
    std::vector<vobj,alignedAllocator<vobj> > _odata;
 | 
					    std::vector<vobj,alignedAllocator<vobj> > _odata;
 | 
				
			||||||
    //std::valarray<vobj> _odata;
 | 
					 | 
				
			||||||
public:
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
    typedef typename vobj::scalar_type scalar_type;
 | 
					    typedef typename vobj::scalar_type scalar_type;
 | 
				
			||||||
    typedef typename vobj::vector_type vector_type;
 | 
					    typedef typename vobj::vector_type vector_type;
 | 
				
			||||||
 | 
					    typedef vobj vector_object;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Expression Template closure support
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  template <typename Op, typename T1>                         inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					#pragma omp parallel for
 | 
				
			||||||
 | 
					    for(int ss=0;ss<_grid->oSites();ss++){
 | 
				
			||||||
 | 
					      vobj tmp= eval(ss,expr);
 | 
				
			||||||
 | 
					      vstream(_odata[ss] ,tmp);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return *this;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  template <typename Op, typename T1,typename T2>             inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					#pragma omp parallel for
 | 
				
			||||||
 | 
					    for(int ss=0;ss<_grid->oSites();ss++){
 | 
				
			||||||
 | 
					      vobj tmp= eval(ss,expr);
 | 
				
			||||||
 | 
					      vstream(_odata[ss] ,tmp);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return *this;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					#pragma omp parallel for
 | 
				
			||||||
 | 
					    for(int ss=0;ss<_grid->oSites();ss++){
 | 
				
			||||||
 | 
					      vobj tmp= eval(ss,expr);
 | 
				
			||||||
 | 
					      vstream(_odata[ss] ,tmp);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return *this;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  //GridFromExpression is tricky to do
 | 
				
			||||||
 | 
					  template<class Op,class T1>
 | 
				
			||||||
 | 
					    Lattice(const LatticeUnaryExpression<Op,T1> & expr):    _grid(nullptr){
 | 
				
			||||||
 | 
					    GridFromExpression(_grid,expr);
 | 
				
			||||||
 | 
					    assert(_grid!=nullptr);
 | 
				
			||||||
 | 
					    _odata.resize(_grid->oSites());
 | 
				
			||||||
 | 
					#pragma omp parallel for
 | 
				
			||||||
 | 
					    for(int ss=0;ss<_grid->oSites();ss++){
 | 
				
			||||||
 | 
					      _odata[ss] = eval(ss,expr);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  template<class Op,class T1, class T2>
 | 
				
			||||||
 | 
					  Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr):    _grid(nullptr){
 | 
				
			||||||
 | 
					    GridFromExpression(_grid,expr);
 | 
				
			||||||
 | 
					    assert(_grid!=nullptr);
 | 
				
			||||||
 | 
					    _odata.resize(_grid->oSites());
 | 
				
			||||||
 | 
					#pragma omp parallel for
 | 
				
			||||||
 | 
					    for(int ss=0;ss<_grid->oSites();ss++){
 | 
				
			||||||
 | 
					      _odata[ss] = eval(ss,expr);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  template<class Op,class T1, class T2, class T3>
 | 
				
			||||||
 | 
					  Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr):    _grid(nullptr){
 | 
				
			||||||
 | 
					    GridFromExpression(_grid,expr);
 | 
				
			||||||
 | 
					    assert(_grid!=nullptr);
 | 
				
			||||||
 | 
					    _odata.resize(_grid->oSites());
 | 
				
			||||||
 | 
					#pragma omp parallel for
 | 
				
			||||||
 | 
					    for(int ss=0;ss<_grid->oSites();ss++){
 | 
				
			||||||
 | 
					      _odata[ss] = eval(ss,expr);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    //////////////////////////////////////////////////////////////////
 | 
					    //////////////////////////////////////////////////////////////////
 | 
				
			||||||
    // Constructor requires "grid" passed.
 | 
					    // Constructor requires "grid" passed.
 | 
				
			||||||
@@ -54,7 +142,7 @@ public:
 | 
				
			|||||||
    template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
 | 
					    template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
 | 
				
			||||||
      conformable(*this,r);
 | 
					      conformable(*this,r);
 | 
				
			||||||
      std::cout<<"Lattice operator ="<<std::endl;
 | 
					      std::cout<<"Lattice operator ="<<std::endl;
 | 
				
			||||||
 | 
					#pragma omp parallel for
 | 
				
			||||||
        for(int ss=0;ss<_grid->oSites();ss++){
 | 
					        for(int ss=0;ss<_grid->oSites();ss++){
 | 
				
			||||||
            this->_odata[ss]=r._odata[ss];
 | 
					            this->_odata[ss]=r._odata[ss];
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@@ -88,8 +176,17 @@ public:
 | 
				
			|||||||
 }; // class Lattice
 | 
					 }; // class Lattice
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define GRID_LATTICE_EXPRESSION_TEMPLATES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <lattice/Grid_lattice_conformable.h>
 | 
					#include <lattice/Grid_lattice_conformable.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
 | 
				
			||||||
 | 
					#include <lattice/Grid_lattice_ET.h>
 | 
				
			||||||
 | 
					#else 
 | 
				
			||||||
 | 
					#include <lattice/Grid_lattice_overload.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
#include <lattice/Grid_lattice_arith.h>
 | 
					#include <lattice/Grid_lattice_arith.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <lattice/Grid_lattice_trace.h>
 | 
					#include <lattice/Grid_lattice_trace.h>
 | 
				
			||||||
#include <lattice/Grid_lattice_transpose.h>
 | 
					#include <lattice/Grid_lattice_transpose.h>
 | 
				
			||||||
#include <lattice/Grid_lattice_local.h>
 | 
					#include <lattice/Grid_lattice_local.h>
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user