Merge branch 'develop' into feature/gparity_HMC

2025-12-20 12:44:30 +00:00 · 2021-02-08 09:31:49 -05:00
parent cee6a37639 cd99edcc5f
commit 6a824033f8
5 changed files with 81 additions and 3 deletions
--- a/Grid/communicator/Communicator_base.h
+++ b/Grid/communicator/Communicator_base.h
@@ -1,4 +1,3 @@
-
 /*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 
@@ -108,6 +107,8 @@ public:
  ////////////////////////////////////////////////////////////
  // Reduction
  ////////////////////////////////////////////////////////////
+  void GlobalMax(RealD &);
+  void GlobalMax(RealF &);
  void GlobalSum(RealF &);
  void GlobalSumVector(RealF *,int N);
  void GlobalSum(RealD &);
--- a/Grid/communicator/Communicator_mpi3.cc
+++ b/Grid/communicator/Communicator_mpi3.cc
@@ -275,6 +275,16 @@ void CartesianCommunicator::GlobalXOR(uint64_t &u){
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
  assert(ierr==0);
 }
+void CartesianCommunicator::GlobalMax(float &f)
+{
+  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_MAX,communicator);
+  assert(ierr==0);
+}
+void CartesianCommunicator::GlobalMax(double &d)
+{
+  int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_MAX,communicator);
+  assert(ierr==0);
+}
 void CartesianCommunicator::GlobalSum(float &f){
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
  assert(ierr==0);
--- a/Grid/communicator/Communicator_none.cc
+++ b/Grid/communicator/Communicator_none.cc
@@ -67,6 +67,8 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)

 CartesianCommunicator::~CartesianCommunicator(){}

+void CartesianCommunicator::GlobalMax(float &){}
+void CartesianCommunicator::GlobalMax(double &){}
 void CartesianCommunicator::GlobalSum(float &){}
 void CartesianCommunicator::GlobalSumVector(float *,int N){}
 void CartesianCommunicator::GlobalSum(double &){}
--- a/Grid/lattice/Lattice_reduction.h
+++ b/Grid/lattice/Lattice_reduction.h
@@ -96,8 +96,34 @@ inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites)
  ssobj ret = ssum;
  return ret;
 }
+/*
+Threaded max, don't use for now
+template<class Double>
+inline Double max(const Double *arg, Integer osites)
+{
+  //  const int Nsimd = vobj::Nsimd();
+  const int nthread = GridThread::GetThreads();

-
+  std::vector<Double> maxarray(nthread);
+  
+  thread_for(thr,nthread, {
+    int nwork, mywork, myoff;
+    nwork = osites;
+    GridThread::GetWork(nwork,thr,mywork,myoff);
+    Double max=arg[0];
+    for(int ss=myoff;ss<mywork+myoff; ss++){
+      if( arg[ss] > max ) max = arg[ss];
+    }
+    maxarray[thr]=max;
+  });
+  
+  Double tmax=maxarray[0];
+  for(int i=0;i<nthread;i++){
+    if (maxarray[i]>tmax) tmax = maxarray[i];
+  } 
+  return tmax;
+}
+*/
 template<class vobj>
 inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
 {
@@ -140,6 +166,31 @@ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
  ComplexD nrm = innerProduct(arg,arg);
  return real(nrm); 
 }
+template<class vobj> inline RealD maxLocalNorm2(const Lattice<vobj> &arg)
+{
+  typedef typename vobj::tensor_reduced vscalar;
+  typedef typename vobj::scalar_object  scalar;
+  typedef typename getPrecision<vobj>::real_scalar_type rscalar;
+
+  Lattice<vscalar> inner = localNorm2(arg);
+
+  auto grid = arg.Grid();
+
+  RealD max;
+  for(int l=0;l<grid->lSites();l++){
+    Coordinate coor;
+    scalar val;
+    RealD r;
+    grid->LocalIndexToLocalCoor(l,coor);
+    peekLocalSite(val,inner,coor);
+    r=real(TensorRemove(val));
+    if( (l==0) || (r>max)){
+      max=r;
+    }
+  }
+  grid->GlobalMax(max);
+  return max;
+}

 // Double inner product
 template<class vobj>
--- a/tests/core/Test_main.cc
+++ b/tests/core/Test_main.cc
@@ -231,6 +231,19 @@ int main(int argc, char **argv) {
      scalar = localInnerProduct(cVec, cVec);
      scalar = localNorm2(cVec);

+      std::cout << "Testing maxLocalNorm2" <<std::endl;
+      for(Integer gsite=0;gsite<Fine.gSites();gsite++){
+
+	TComplex big(10.0);
+	Coordinate coor;
+
+	random(FineRNG, scalar);
+	Fine.GlobalIndexToGlobalCoor(gsite,coor);
+        pokeSite(big,scalar,coor);
+	
+	RealD Linfty = maxLocalNorm2(scalar);
+	assert(Linfty == 100.0);
+      }
      //     -=,+=,*=,()
      //     add,+,sub,-,mult,mac,*
      //     adj,conjugate
@@ -549,7 +562,8 @@ int main(int argc, char **argv) {

                  std::vector<int> shiftcoor = coor;
                  shiftcoor[dir] = (shiftcoor[dir] + shift + latt_size[dir]) %
-                                   (latt_size[dir] / mpi_layout[dir]);
+                                   (latt_size[dir]);
+		  //                                   (latt_size[dir] / mpi_layout[dir]);

                  std::vector<int> rl(4);
                  for (int dd = 0; dd < 4; dd++) {