Merge branch 'master' of https://github.com/paboyle/Grid

2026-05-15 06:34:31 +01:00 · 2015-12-09 12:48:44 +00:00
parent 967be91692 26161addd0
commit a32a59fc43
24 changed files with 406 additions and 221 deletions
@@ -266,11 +266,8 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
    if( this->HandOptDslash ) {
 #pragma omp parallel for schedule(static)
      for(int ss=0;ss<U._grid->oSites();ss++){
+	int sU=ss;
 	for(int s=0;s<Ls;s++){
-	  int sU=ss;
-	  if (    LebesgueOrder::UseLebesgueOrder ) {
-	    sU=lo.Reorder(ss);
-	  }
 	  int sF = s+Ls*sU;
 	  Kernels::DiracOptHandDhopSiteDag(st,U,comm_buf,sF,sU,in,out);
 	  }
@@ -323,52 +320,42 @@ PARALLEL_FOR_LOOP
      //      Counter.Report();
      //      }
    } else if( this->HandOptDslash ) {
+      /*

-#pragma omp parallel for 
+#pragma omp parallel for schedule(static)
      for(int t=0;t<threads;t++){

 	int hyperthread = t%HT;
 	int core        = t/HT;

-        int sswork, swork,soff, sU,sF;
-
-	sswork = (nwork + cores-1)/cores;
+        int sswork, swork,soff,ssoff,  sU,sF;
+	
+	GridThread::GetWork(nwork,core,sswork,ssoff,cores);
 	GridThread::GetWork(Ls   , hyperthread, swork, soff,HT);

 	for(int ss=0;ss<sswork;ss++){
-	  sU=ss+core*sswork; // max locality within an L2 slice
-	  if ( LebesgueOrder::UseLebesgueOrder ) {
-	    sU = lo.Reorder(sU);
+	  sU=ss+ ssoff;
+	  for(int s=soff;s<soff+swork;s++){
+	    sF = s+Ls*sU;
+	    Kernels::DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out);
 	  }
-	  if ( sU < nwork ) {
-	    for(int s=soff;s<soff+swork;s++){
-	      sF = s+Ls*sU;
-	      Kernels::DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out);
-	    }
-	  }
-	}
-      }
-
-      /*
-#pragma omp parallel for schedule(static)
-      for(int ss=0;ss<U._grid->oSites();ss++){
-	for(int s=0;s<Ls;s++){
-	  int sU=ss;
-	  if (    LebesgueOrder::UseLebesgueOrder ) {
-	    sU=lo.Reorder(ss);
-	  }
-	  int sF = s+Ls*sU;
-	  Kernels::DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out);
 	}
      }
      */

+#pragma omp parallel for schedule(static)
+      for(int ss=0;ss<U._grid->oSites();ss++){
+	int sU=ss;
+	for(int s=0;s<Ls;s++){
+	  int sF = s+Ls*sU;
+	  Kernels::DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out);
+	}
+      }
    } else { 
 PARALLEL_FOR_LOOP
      for(int ss=0;ss<U._grid->oSites();ss++){
+	int sU=ss;
 	for(int s=0;s<Ls;s++){
-	  //	  int sU=lo.Reorder(ss);
-	  int sU=ss;
 	  int sF = s+Ls*sU; 
 	  Kernels::DiracOptDhopSite(st,U,comm_buf,sF,sU,in,out);
 	}
@@ -29,7 +29,7 @@ namespace Grid {
 			  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 			  int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
 #if defined(AVX512) || defined(IMCI)
-     void DiracOptAsmDhopSite(CartesianStencil &st,DoubledGaugeField &U,
+     void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
 			      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 			      int sF,int sU,const FermionField &in, FermionField &out,uint64_t *);
 #else