diff --git a/Grid/communicator/Communicator_base.h b/Grid/communicator/Communicator_base.h index bb06d43f..a15f9789 100644 --- a/Grid/communicator/Communicator_base.h +++ b/Grid/communicator/Communicator_base.h @@ -1,4 +1,3 @@ - /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -108,6 +107,8 @@ public: //////////////////////////////////////////////////////////// // Reduction //////////////////////////////////////////////////////////// + void GlobalMax(RealD &); + void GlobalMax(RealF &); void GlobalSum(RealF &); void GlobalSumVector(RealF *,int N); void GlobalSum(RealD &); diff --git a/Grid/communicator/Communicator_mpi3.cc b/Grid/communicator/Communicator_mpi3.cc index c6543851..5713fe35 100644 --- a/Grid/communicator/Communicator_mpi3.cc +++ b/Grid/communicator/Communicator_mpi3.cc @@ -275,6 +275,16 @@ void CartesianCommunicator::GlobalXOR(uint64_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); assert(ierr==0); } +void CartesianCommunicator::GlobalMax(float &f) +{ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_MAX,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalMax(double &d) +{ + int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_MAX,communicator); + assert(ierr==0); +} void CartesianCommunicator::GlobalSum(float &f){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); assert(ierr==0); diff --git a/Grid/communicator/Communicator_none.cc b/Grid/communicator/Communicator_none.cc index 6cb431a2..beb2cc97 100644 --- a/Grid/communicator/Communicator_none.cc +++ b/Grid/communicator/Communicator_none.cc @@ -67,6 +67,8 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors) CartesianCommunicator::~CartesianCommunicator(){} +void CartesianCommunicator::GlobalMax(float &){} +void CartesianCommunicator::GlobalMax(double &){} void CartesianCommunicator::GlobalSum(float &){} void CartesianCommunicator::GlobalSumVector(float *,int N){} void CartesianCommunicator::GlobalSum(double &){} diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index c2955485..7338fd41 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -96,8 +96,34 @@ inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites) ssobj ret = ssum; return ret; } +/* +Threaded max, don't use for now +template +inline Double max(const Double *arg, Integer osites) +{ + // const int Nsimd = vobj::Nsimd(); + const int nthread = GridThread::GetThreads(); - + std::vector maxarray(nthread); + + thread_for(thr,nthread, { + int nwork, mywork, myoff; + nwork = osites; + GridThread::GetWork(nwork,thr,mywork,myoff); + Double max=arg[0]; + for(int ss=myoff;ss max ) max = arg[ss]; + } + maxarray[thr]=max; + }); + + Double tmax=maxarray[0]; + for(int i=0;itmax) tmax = maxarray[i]; + } + return tmax; +} +*/ template inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) { @@ -140,6 +166,31 @@ template inline RealD norm2(const Lattice &arg){ ComplexD nrm = innerProduct(arg,arg); return real(nrm); } +template inline RealD maxLocalNorm2(const Lattice &arg) +{ + typedef typename vobj::tensor_reduced vscalar; + typedef typename vobj::scalar_object scalar; + typedef typename getPrecision::real_scalar_type rscalar; + + Lattice inner = localNorm2(arg); + + auto grid = arg.Grid(); + + RealD max; + for(int l=0;llSites();l++){ + Coordinate coor; + scalar val; + RealD r; + grid->LocalIndexToLocalCoor(l,coor); + peekLocalSite(val,inner,coor); + r=real(TensorRemove(val)); + if( (l==0) || (r>max)){ + max=r; + } + } + grid->GlobalMax(max); + return max; +} // Double inner product template diff --git a/tests/core/Test_main.cc b/tests/core/Test_main.cc index d7ed04ba..d3e6bfbd 100644 --- a/tests/core/Test_main.cc +++ b/tests/core/Test_main.cc @@ -231,6 +231,19 @@ int main(int argc, char **argv) { scalar = localInnerProduct(cVec, cVec); scalar = localNorm2(cVec); + std::cout << "Testing maxLocalNorm2" < shiftcoor = coor; shiftcoor[dir] = (shiftcoor[dir] + shift + latt_size[dir]) % - (latt_size[dir] / mpi_layout[dir]); + (latt_size[dir]); + // (latt_size[dir] / mpi_layout[dir]); std::vector rl(4); for (int dd = 0; dd < 4; dd++) {