Sizable improvement in multigrid for unsquared.

6000 matmuls CG unprec 2000 matmuls CG prec (4000 eo muls) 1050 matmuls PGCR on 16^3 x 32 x 8 m=.01 Substantial effort on timing and logging infrastructure
2026-06-25 13:03:30 +01:00 · 2015-07-24 01:31:13 +09:00
parent 11c99d5e66
commit d1afebf71e
67 changed files with 945 additions and 753 deletions
@@ -132,18 +132,18 @@ inline void CBFromExpression(int &cb,const T1& lat)   // Lattice leaf
    assert(cb==lat.checkerboard);
  } 
  cb=lat.checkerboard;
-  //  std::cout<<"Lattice leaf cb "<<cb<<std::endl;
+  //  std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
 }
 template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
 inline void CBFromExpression(int &cb,const T1& notlat)   // non-lattice leaf
 {
-  //  std::cout<<"Non lattice leaf cb"<<cb<<std::endl;
+  //  std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
 }
 template <typename Op, typename T1>
 inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
 {
  CBFromExpression(cb,std::get<0>(expr.second));// recurse 
-  //  std::cout<<"Unary node cb "<<cb<<std::endl;
+  //  std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
 }

 template <typename Op, typename T1, typename T2>
@@ -151,7 +151,7 @@ inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &ex
 {
  CBFromExpression(cb,std::get<0>(expr.second));// recurse
  CBFromExpression(cb,std::get<1>(expr.second));
-  //  std::cout<<"Binary node cb "<<cb<<std::endl;
+  //  std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
 }
 template <typename Op, typename T1, typename T2, typename T3>
 inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr) 
@@ -159,7 +159,7 @@ inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3
  CBFromExpression(cb,std::get<0>(expr.second));// recurse
  CBFromExpression(cb,std::get<1>(expr.second));
  CBFromExpression(cb,std::get<2>(expr.second));
-  //  std::cout<<"Trinary node cb "<<cb<<std::endl;
+  //  std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
 }

 ////////////////////////////////////////////
@@ -370,7 +370,7 @@ using namespace Grid;
   tmp.func(eval(0,v1),eval(0,v2));

   auto var = v1+v2;
-   std::cout<<typeid(var).name()<<std::endl;
+   std::cout<<GridLogMessage<<typeid(var).name()<<std::endl;

   v3=v1+v2;
   v3=v1+v2+v1*v2;
@@ -221,7 +221,7 @@ PARALLEL_FOR_LOOP
    template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
      this->checkerboard = r.checkerboard;
      conformable(*this,r);
-      std::cout<<"Lattice operator ="<<std::endl;
+      std::cout<<GridLogMessage<<"Lattice operator ="<<std::endl;
 PARALLEL_FOR_LOOP
        for(int ss=0;ss<_grid->oSites();ss++){
            this->_odata[ss]=r._odata[ss];
@@ -125,7 +125,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
  assert(grid!=NULL);

  // FIXME
-  std::cout<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
+  std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;

  const int    Nd = grid->_ndimension;
  const int Nsimd = grid->Nsimd();
@@ -233,7 +233,8 @@ namespace Grid {
      int words=sizeof(scalar_object)/sizeof(scalar_type);

      std::vector<scalar_object> buf(Nsimd);
-      
+
+PARALLEL_FOR_LOOP
      for(int ss=0;ss<osites;ss++){
 	for(int si=0;si<Nsimd;si++){

@@ -23,7 +23,7 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
  template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
    half.checkerboard = cb;
    int ssh=0;
-PARALLEL_FOR_LOOP
+    //PARALLEL_FOR_LOOP
    for(int ss=0;ss<full._grid->oSites();ss++){
      std::vector<int> coor;
      int cbos;
@@ -40,7 +40,7 @@ PARALLEL_FOR_LOOP
  template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
    int cb = half.checkerboard;
    int ssh=0;
-PARALLEL_FOR_LOOP
+    //PARALLEL_FOR_LOOP
    for(int ss=0;ss<full._grid->oSites();ss++){
      std::vector<int> coor;
      int cbos;
@@ -158,6 +158,7 @@ template<class vobj,class CComplex>

  fine_inner = localInnerProduct(fineX,fineY);
  blockSum(coarse_inner,fine_inner);
+PARALLEL_FOR_LOOP
  for(int ss=0;ss<coarse->oSites();ss++){
    CoarseInner._odata[ss] = coarse_inner._odata[ss];
  }