Enhanced SIMD interfacing

2026-01-08 02:49:33 +00:00 · 2015-05-12 20:41:44 +01:00
parent c6baa3e657
commit 556befaaaa
15 changed files with 43 additions and 43 deletions
--- a/lib/lattice/Grid_lattice_ET.h
+++ b/lib/lattice/Grid_lattice_ET.h
@@ -67,6 +67,10 @@ inline void GridFromExpression(GridBase * &grid,const T1& lat)   // Lattice leaf
  } 
  grid=lat._grid;
 }
+template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
+inline void GridFromExpression(GridBase * &grid,const T1& notlat)   // non-lattice leaf
+{
+}
 template <typename Op, typename T1>
 inline void GridFromExpression(GridBase * &grid,const LatticeUnaryExpression<Op,T1 > &expr)
 {
@@ -86,10 +90,6 @@ inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression<
  GridFromExpression(grid,std::get<1>(expr.second));
  GridFromExpression(grid,std::get<2>(expr.second));
 }
-template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
-inline void GridFromExpression(GridBase * &grid,const T1& notlat)   // non-lattice leaf
-{
-}

 ////////////////////////////////////////////
 // Unary operators and funcs
--- a/lib/lattice/Grid_lattice_arith.h
+++ b/lib/lattice/Grid_lattice_arith.h
@@ -145,7 +145,7 @@ PARALLEL_FOR_LOOP
  template<class sobj,class vobj>
  inline void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
    conformable(lhs,rhs);
-PARALLEL_FOR_LOOP
+#pragma omp parallel for
    for(int ss=0;ss<lhs._grid->oSites();ss++){
      vobj tmp = a*lhs._odata[ss];
      vstream(ret._odata[ss],tmp+rhs._odata[ss]);
--- a/lib/lattice/Grid_lattice_base.h
+++ b/lib/lattice/Grid_lattice_base.h
@@ -64,7 +64,8 @@ public:
  ////////////////////////////////////////////////////////////////////////////////
  template <typename Op, typename T1>                         inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
  {
-PARALLEL_FOR_LOOP
+    //PARALLEL_FOR_LOOP
+#pragma omp parallel for
    for(int ss=0;ss<_grid->oSites();ss++){
      vobj tmp= eval(ss,expr);
      vstream(_odata[ss] ,tmp);
@@ -73,7 +74,8 @@ PARALLEL_FOR_LOOP
  }
  template <typename Op, typename T1,typename T2>             inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
  {
-PARALLEL_FOR_LOOP
+    // PARALLEL_FOR_LOOP
+#pragma omp parallel for
    for(int ss=0;ss<_grid->oSites();ss++){
      vobj tmp= eval(ss,expr);
      vstream(_odata[ss] ,tmp);
@@ -82,7 +84,8 @@ PARALLEL_FOR_LOOP
  }
  template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
  {
-PARALLEL_FOR_LOOP
+    //PARALLEL_FOR_LOOP
+#pragma omp parallel for
    for(int ss=0;ss<_grid->oSites();ss++){
      vobj tmp= eval(ss,expr);
      vstream(_odata[ss] ,tmp);
@@ -176,15 +179,16 @@ PARALLEL_FOR_LOOP
 }; // class Lattice
 }

-#undef GRID_LATTICE_EXPRESSION_TEMPLATES

 #include <lattice/Grid_lattice_conformable.h>

-#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
+#define GRID_LATTICE_EXPRESSION_TEMPLATES
+#ifdef  GRID_LATTICE_EXPRESSION_TEMPLATES
 #include <lattice/Grid_lattice_ET.h>
 #else 
 #include <lattice/Grid_lattice_overload.h>
 #endif
+
 #include <lattice/Grid_lattice_arith.h>

 #include <lattice/Grid_lattice_trace.h>