1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Bringing expression templates for faster vector loops

This commit is contained in:
Peter Boyle 2015-05-10 15:22:31 +01:00
parent b2e0f72a7e
commit e3acb36de6

View File

@ -20,18 +20,106 @@ namespace Grid {
extern int GridCshiftPermuteMap[4][16]; extern int GridCshiftPermuteMap[4][16];
////////////////////////////////////////////////
// Basic expressions used in Expression Template
////////////////////////////////////////////////
class LatticeBase {};
class LatticeExpressionBase {};
template <typename Op, typename T1>
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
public:
LatticeUnaryExpression(const std::pair<Op,std::tuple<T1> > &arg): std::pair<Op,std::tuple<T1> >(arg) {};
};
template <typename Op, typename T1, typename T2>
class LatticeBinaryExpression : public std::pair<Op,std::tuple<T1,T2> > , public LatticeExpressionBase {
public:
LatticeBinaryExpression(const std::pair<Op,std::tuple<T1,T2> > &arg): std::pair<Op,std::tuple<T1,T2> >(arg) {};
};
template <typename Op, typename T1, typename T2, typename T3>
class LatticeTrinaryExpression :public std::pair<Op,std::tuple<T1,T2,T3> >, public LatticeExpressionBase {
public:
LatticeTrinaryExpression(const std::pair<Op,std::tuple<T1,T2,T3> > &arg): std::pair<Op,std::tuple<T1,T2,T3> >(arg) {};
};
template<class vobj> template<class vobj>
class Lattice class Lattice : public LatticeBase
{ {
public: public:
GridBase *_grid; GridBase *_grid;
int checkerboard; int checkerboard;
std::vector<vobj,alignedAllocator<vobj> > _odata; std::vector<vobj,alignedAllocator<vobj> > _odata;
//std::valarray<vobj> _odata;
public:
public:
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type; typedef typename vobj::vector_type vector_type;
typedef vobj vector_object;
////////////////////////////////////////////////////////////////////////////////
// Expression Template closure support
////////////////////////////////////////////////////////////////////////////////
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
{
#pragma omp parallel for
for(int ss=0;ss<_grid->oSites();ss++){
vobj tmp= eval(ss,expr);
vstream(_odata[ss] ,tmp);
}
return *this;
}
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
{
#pragma omp parallel for
for(int ss=0;ss<_grid->oSites();ss++){
vobj tmp= eval(ss,expr);
vstream(_odata[ss] ,tmp);
}
return *this;
}
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
{
#pragma omp parallel for
for(int ss=0;ss<_grid->oSites();ss++){
vobj tmp= eval(ss,expr);
vstream(_odata[ss] ,tmp);
}
return *this;
}
//GridFromExpression is tricky to do
template<class Op,class T1>
Lattice(const LatticeUnaryExpression<Op,T1> & expr): _grid(nullptr){
GridFromExpression(_grid,expr);
assert(_grid!=nullptr);
_odata.resize(_grid->oSites());
#pragma omp parallel for
for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss] = eval(ss,expr);
}
};
template<class Op,class T1, class T2>
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr): _grid(nullptr){
GridFromExpression(_grid,expr);
assert(_grid!=nullptr);
_odata.resize(_grid->oSites());
#pragma omp parallel for
for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss] = eval(ss,expr);
}
};
template<class Op,class T1, class T2, class T3>
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr): _grid(nullptr){
GridFromExpression(_grid,expr);
assert(_grid!=nullptr);
_odata.resize(_grid->oSites());
#pragma omp parallel for
for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss] = eval(ss,expr);
}
};
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// Constructor requires "grid" passed. // Constructor requires "grid" passed.
@ -54,7 +142,7 @@ public:
template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){ template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
conformable(*this,r); conformable(*this,r);
std::cout<<"Lattice operator ="<<std::endl; std::cout<<"Lattice operator ="<<std::endl;
#pragma omp parallel for
for(int ss=0;ss<_grid->oSites();ss++){ for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss]; this->_odata[ss]=r._odata[ss];
} }
@ -88,8 +176,17 @@ public:
}; // class Lattice }; // class Lattice
} }
#define GRID_LATTICE_EXPRESSION_TEMPLATES
#include <lattice/Grid_lattice_conformable.h> #include <lattice/Grid_lattice_conformable.h>
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
#include <lattice/Grid_lattice_ET.h>
#else
#include <lattice/Grid_lattice_overload.h>
#endif
#include <lattice/Grid_lattice_arith.h> #include <lattice/Grid_lattice_arith.h>
#include <lattice/Grid_lattice_trace.h> #include <lattice/Grid_lattice_trace.h>
#include <lattice/Grid_lattice_transpose.h> #include <lattice/Grid_lattice_transpose.h>
#include <lattice/Grid_lattice_local.h> #include <lattice/Grid_lattice_local.h>