1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Hand unrolled version of dslash in a separate class.

Useful to compare; raises Intel compiler from 9GFlop/s to 17.5 Gflops.
                   on ivybridge core. Raises Clang form 14.5 to 17.5
This commit is contained in:
Peter Boyle
2015-05-26 19:54:03 +01:00
parent c2ffb1a098
commit 20100d0a40
9 changed files with 157 additions and 370 deletions

View File

@ -47,6 +47,11 @@ class LatticeTrinaryExpression :public std::pair<Op,std::tuple<T1,T2,T3> >, publ
LatticeTrinaryExpression(const std::pair<Op,std::tuple<T1,T2,T3> > &arg): std::pair<Op,std::tuple<T1,T2,T3> >(arg) {};
};
void inline conformable(GridBase *lhs,GridBase *rhs)
{
assert(lhs == rhs);
}
template<class vobj>
class Lattice : public LatticeBase
{
@ -60,7 +65,8 @@ public:
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
typedef vobj vector_object;
////////////////////////////////////////////////////////////////////////////////
// Expression Template closure support
////////////////////////////////////////////////////////////////////////////////
@ -276,17 +282,15 @@ PARALLEL_FOR_LOOP
}
#include <lattice/Grid_lattice_conformable.h>
#include <lattice/Grid_lattice_conformable.h>
#define GRID_LATTICE_EXPRESSION_TEMPLATES
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
#include <lattice/Grid_lattice_ET.h>
#else
#include <lattice/Grid_lattice_overload.h>
#endif
#include <lattice/Grid_lattice_arith.h>
#include <lattice/Grid_lattice_trace.h>
#include <lattice/Grid_lattice_transpose.h>
#include <lattice/Grid_lattice_local.h>

View File

@ -3,16 +3,11 @@
namespace Grid {
template<class obj1,class obj2>
void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs)
template<class obj1,class obj2> void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs)
{
assert(lhs._grid == rhs._grid);
assert(lhs.checkerboard == rhs.checkerboard);
}
void inline conformable(const GridBase *lhs,GridBase *rhs)
{
assert(lhs == rhs);
}
}
#endif