1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Enhanced SIMD interfacing

This commit is contained in:
Peter Boyle
2015-05-12 20:41:44 +01:00
parent c6baa3e657
commit 556befaaaa
15 changed files with 43 additions and 43 deletions

View File

@ -67,6 +67,10 @@ inline void GridFromExpression(GridBase * &grid,const T1& lat) // Lattice leaf
}
grid=lat._grid;
}
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
inline void GridFromExpression(GridBase * &grid,const T1& notlat) // non-lattice leaf
{
}
template <typename Op, typename T1>
inline void GridFromExpression(GridBase * &grid,const LatticeUnaryExpression<Op,T1 > &expr)
{
@ -86,10 +90,6 @@ inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression<
GridFromExpression(grid,std::get<1>(expr.second));
GridFromExpression(grid,std::get<2>(expr.second));
}
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
inline void GridFromExpression(GridBase * &grid,const T1& notlat) // non-lattice leaf
{
}
////////////////////////////////////////////
// Unary operators and funcs

View File

@ -145,7 +145,7 @@ PARALLEL_FOR_LOOP
template<class sobj,class vobj>
inline void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
vobj tmp = a*lhs._odata[ss];
vstream(ret._odata[ss],tmp+rhs._odata[ss]);

View File

@ -64,7 +64,8 @@ public:
////////////////////////////////////////////////////////////////////////////////
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
{
PARALLEL_FOR_LOOP
//PARALLEL_FOR_LOOP
#pragma omp parallel for
for(int ss=0;ss<_grid->oSites();ss++){
vobj tmp= eval(ss,expr);
vstream(_odata[ss] ,tmp);
@ -73,7 +74,8 @@ PARALLEL_FOR_LOOP
}
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
{
PARALLEL_FOR_LOOP
// PARALLEL_FOR_LOOP
#pragma omp parallel for
for(int ss=0;ss<_grid->oSites();ss++){
vobj tmp= eval(ss,expr);
vstream(_odata[ss] ,tmp);
@ -82,7 +84,8 @@ PARALLEL_FOR_LOOP
}
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
{
PARALLEL_FOR_LOOP
//PARALLEL_FOR_LOOP
#pragma omp parallel for
for(int ss=0;ss<_grid->oSites();ss++){
vobj tmp= eval(ss,expr);
vstream(_odata[ss] ,tmp);
@ -176,15 +179,16 @@ PARALLEL_FOR_LOOP
}; // class Lattice
}
#undef GRID_LATTICE_EXPRESSION_TEMPLATES
#include <lattice/Grid_lattice_conformable.h>
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
#define GRID_LATTICE_EXPRESSION_TEMPLATES
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
#include <lattice/Grid_lattice_ET.h>
#else
#include <lattice/Grid_lattice_overload.h>
#endif
#include <lattice/Grid_lattice_arith.h>
#include <lattice/Grid_lattice_trace.h>