Merge branch 'develop' into feature/mres_schur

* develop: (26 commits) Added the ability to apply a custom "filter" to the conjugate momentum in the Integrator classes, applied both after refresh and after applying the forces Added a conjugate momentum "filter" that applies a phase to each site. With sites set to 1.0 or 0.0 this acts as a mask and enables, for example, the freezing of inactive gauge links in DDHMC Added tests/forces/Test_momentum_filter demonstrating the use of the filter to freeze boundary links Correct misleading ac help string Enable performance counting in WilsonFermion like in others changed back A2AUtils warning changed if and accelerator_for - no runtime errors any more Mac OS (Darwin) sed -i flag for in-place editing differs from posix / gnu Seems the intention with AutoConf produced Grid/Config.h was to use sed to translate standard PACKAGE_ #defines into GRID_ however due to missing '' after -i this hasn't been working. Perhaps it is too late to fix this, since we don't know who/what is relying on this downstream? ... but if they are, and AutoConf is being used, then likely these #defines have just been redefined anyway. Seems reasonable to redefine PACKAGE and VERSION as well, as none of these macros are used throughout Grid or Hadrons. Fixed compile issues with maxLocalNorm2 for non-scalar lattices maxLocalNorm2 test now reuses the random field MADWF 5d source option for hadrons - look at Grid of source Abort on GPU error maxLocalNorm2() change back benchmark_ITT prettify Flop cout matches DiRAC-ITT-2020 revert changes merge develop fixes weird bug in 2pt function... revert changes final version, tested on CPU and GPU bugfix ...
2025-07-30 19:27:06 +01:00 · 2021-02-24 18:07:27 +00:00
parent 2983b6fdf6 157fd1428d
commit d620b303ff
14 changed files with 1225 additions and 787 deletions
--- a/Grid/communicator/Communicator_base.h
+++ b/Grid/communicator/Communicator_base.h
@@ -1,4 +1,3 @@
-
 /*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 
@@ -108,6 +107,8 @@ public:
  ////////////////////////////////////////////////////////////
  // Reduction
  ////////////////////////////////////////////////////////////
+  void GlobalMax(RealD &);
+  void GlobalMax(RealF &);
  void GlobalSum(RealF &);
  void GlobalSumVector(RealF *,int N);
  void GlobalSum(RealD &);
--- a/Grid/communicator/Communicator_mpi3.cc
+++ b/Grid/communicator/Communicator_mpi3.cc
@@ -275,6 +275,16 @@ void CartesianCommunicator::GlobalXOR(uint64_t &u){
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
  assert(ierr==0);
 }
+void CartesianCommunicator::GlobalMax(float &f)
+{
+  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_MAX,communicator);
+  assert(ierr==0);
+}
+void CartesianCommunicator::GlobalMax(double &d)
+{
+  int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_MAX,communicator);
+  assert(ierr==0);
+}
 void CartesianCommunicator::GlobalSum(float &f){
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
  assert(ierr==0);
--- a/Grid/communicator/Communicator_none.cc
+++ b/Grid/communicator/Communicator_none.cc
@@ -67,6 +67,8 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)

 CartesianCommunicator::~CartesianCommunicator(){}

+void CartesianCommunicator::GlobalMax(float &){}
+void CartesianCommunicator::GlobalMax(double &){}
 void CartesianCommunicator::GlobalSum(float &){}
 void CartesianCommunicator::GlobalSumVector(float *,int N){}
 void CartesianCommunicator::GlobalSum(double &){}
--- a/Grid/lattice/Lattice_reduction.h
+++ b/Grid/lattice/Lattice_reduction.h
@@ -96,8 +96,34 @@ inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites)
  ssobj ret = ssum;
  return ret;
 }
+/*
+Threaded max, don't use for now
+template<class Double>
+inline Double max(const Double *arg, Integer osites)
+{
+  //  const int Nsimd = vobj::Nsimd();
+  const int nthread = GridThread::GetThreads();

-
+  std::vector<Double> maxarray(nthread);
+  
+  thread_for(thr,nthread, {
+    int nwork, mywork, myoff;
+    nwork = osites;
+    GridThread::GetWork(nwork,thr,mywork,myoff);
+    Double max=arg[0];
+    for(int ss=myoff;ss<mywork+myoff; ss++){
+      if( arg[ss] > max ) max = arg[ss];
+    }
+    maxarray[thr]=max;
+  });
+  
+  Double tmax=maxarray[0];
+  for(int i=0;i<nthread;i++){
+    if (maxarray[i]>tmax) tmax = maxarray[i];
+  } 
+  return tmax;
+}
+*/
 template<class vobj>
 inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
 {
@@ -141,6 +167,32 @@ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
  return real(nrm); 
 }

+//The global maximum of the site norm2
+template<class vobj> inline RealD maxLocalNorm2(const Lattice<vobj> &arg)
+{
+  typedef typename vobj::tensor_reduced vscalar;  //iScalar<iScalar<.... <vPODtype> > >
+  typedef typename vscalar::scalar_object  scalar;   //iScalar<iScalar<.... <PODtype> > >
+
+  Lattice<vscalar> inner = localNorm2(arg);
+
+  auto grid = arg.Grid();
+
+  RealD max;
+  for(int l=0;l<grid->lSites();l++){
+    Coordinate coor;
+    scalar val;
+    RealD r;
+    grid->LocalIndexToLocalCoor(l,coor);
+    peekLocalSite(val,inner,coor);
+    r=real(TensorRemove(val));
+    if( (l==0) || (r>max)){
+      max=r;
+    }
+  }
+  grid->GlobalMax(max);
+  return max;
+}
+
 // Double inner product
 template<class vobj>
 inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
--- a/Grid/qcd/action/fermion/MADWF.h
+++ b/Grid/qcd/action/fermion/MADWF.h
@@ -85,7 +85,7 @@ class MADWF
      maxiter     =_maxiter;
    };
   
-  void operator() (const FermionFieldo &src4,FermionFieldo &sol5)
+  void operator() (const FermionFieldo &src,FermionFieldo &sol5)
  {
    std::cout << GridLogMessage<< " ************************************************" << std::endl;
    std::cout << GridLogMessage<< "  MADWF-like algorithm                           " << std::endl;
@@ -114,8 +114,16 @@ class MADWF
    ///////////////////////////////////////
    //Import source, include Dminus factors
    ///////////////////////////////////////
-    Mato.ImportPhysicalFermionSource(src4,b); 
-    std::cout << GridLogMessage << " src4 " <<norm2(src4)<<std::endl;
+    GridBase *src_grid = src.Grid();
+
+    assert( (src_grid == Mato.GaugeGrid()) || (src_grid == Mato.FermionGrid()));
+
+    if ( src_grid == Mato.GaugeGrid() ) {
+      Mato.ImportPhysicalFermionSource(src,b);
+    } else {
+      b=src;
+    }
+    std::cout << GridLogMessage << " src " <<norm2(src)<<std::endl;
    std::cout << GridLogMessage << " b    " <<norm2(b)<<std::endl;

    defect = b;
--- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h
+++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h
@@ -397,6 +397,7 @@ void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, co
 template <class Impl>
 void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
 {
+  DhopCalls+=2;
  conformable(in.Grid(), _grid);  // verifies full grid
  conformable(in.Grid(), out.Grid());

@@ -408,6 +409,7 @@ void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int da
 template <class Impl>
 void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
 {
+  DhopCalls++;
  conformable(in.Grid(), _cbgrid);    // verifies half grid
  conformable(in.Grid(), out.Grid());  // drops the cb check

@@ -420,6 +422,7 @@ void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int
 template <class Impl>
 void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
 {
+  DhopCalls++;
  conformable(in.Grid(), _cbgrid);    // verifies half grid
  conformable(in.Grid(), out.Grid());  // drops the cb check

--- a/Grid/qcd/hmc/integrators/Integrator.h
+++ b/Grid/qcd/hmc/integrators/Integrator.h
@@ -33,6 +33,7 @@ directory
 #define INTEGRATOR_INCLUDED

 #include <memory>
+#include "MomentumFilter.h"

 NAMESPACE_BEGIN(Grid);

@@ -78,8 +79,19 @@ protected:
  RepresentationPolicy Representations;
  IntegratorParameters Params;

+  //Filters allow the user to manipulate the conjugate momentum, for example to freeze links in DDHMC
+  //It is applied whenever the momentum is updated / refreshed
+  //The default filter does nothing
+  MomentumFilterBase<MomentaField> const* MomFilter;
+
  const ActionSet<Field, RepresentationPolicy> as;

+  //Get a pointer to a shared static instance of the "do-nothing" momentum filter to serve as a default
+  static MomentumFilterBase<MomentaField> const* getDefaultMomFilter(){ 
+    static MomentumFilterNone<MomentaField> filter;
+    return &filter;
+  }
+
  void update_P(Field& U, int level, double ep) 
  {
    t_P[level] += ep;
@@ -135,6 +147,8 @@ protected:

    // Force from the other representations
    as[level].apply(update_P_hireps, Representations, Mom, U, ep);
+
+    MomFilter->applyFilter(Mom);
  }

  void update_U(Field& U, double ep) 
@@ -174,11 +188,23 @@ public:
    t_P.resize(levels, 0.0);
    t_U = 0.0;
    // initialization of smearer delegated outside of Integrator
+
+    //Default the momentum filter to "do-nothing"
+    MomFilter = getDefaultMomFilter();
  };

  virtual ~Integrator() {}

  virtual std::string integrator_name() = 0;
+  
+  //Set the momentum filter allowing for manipulation of the conjugate momentum
+  void setMomentumFilter(const MomentumFilterBase<MomentaField> &filter){
+    MomFilter = &filter;
+  }
+
+  //Access the conjugate momentum
+  const MomentaField & getMomentum() const{ return P; }
+  

  void print_parameters()
  {
@@ -249,6 +275,8 @@ public:
      // Refresh the higher representation actions
      as[level].apply(refresh_hireps, Representations, pRNG);
    }
+
+    MomFilter->applyFilter(P);
  }

  // to be used by the actionlevel class to iterate
--- a/Grid/qcd/hmc/integrators/MomentumFilter.h
+++ b/Grid/qcd/hmc/integrators/MomentumFilter.h
@@ -0,0 +1,94 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/qcd/hmc/integrators/MomentumFilter.h
+
+Copyright (C) 2015
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+//--------------------------------------------------------------------
+#ifndef MOMENTUM_FILTER
+#define MOMENTUM_FILTER
+
+NAMESPACE_BEGIN(Grid);
+
+//These filter objects allow the user to manipulate the conjugate momentum as part of the update / refresh
+
+template<typename MomentaField>
+struct MomentumFilterBase{
+  virtual void applyFilter(MomentaField &P) const;
+};
+
+//Do nothing
+template<typename MomentaField>
+struct MomentumFilterNone: public MomentumFilterBase<MomentaField>{
+  void applyFilter(MomentaField &P) const override{}
+};
+
+//Multiply each site/direction by a Lorentz vector complex number field
+//Can be used to implement a mask, zeroing out sites
+template<typename MomentaField>
+struct MomentumFilterApplyPhase: public MomentumFilterBase<MomentaField>{
+  typedef typename MomentaField::vector_type vector_type; //SIMD-vectorized complex type
+  typedef typename MomentaField::scalar_type scalar_type; //scalar complex type
+  typedef iVector<iScalar<iScalar<vector_type> >, Nd > LorentzScalarType; //complex phase for each site/direction
+  typedef Lattice<LorentzScalarType> LatticeLorentzScalarType;
+  
+  LatticeLorentzScalarType phase;
+ 
+  MomentumFilterApplyPhase(const LatticeLorentzScalarType _phase): phase(_phase){}
+
+  //Default to uniform field of (1,0)
+  MomentumFilterApplyPhase(GridBase* _grid): phase(_grid){
+    LorentzScalarType one;
+    for(int mu=0;mu<Nd;mu++)
+      one(mu)()() = scalar_type(1.);
+    
+    phase = one;
+  }
+
+  void applyFilter(MomentaField &P) const override{
+    conformable(P,phase);
+    autoView( P_v , P, AcceleratorWrite);
+    autoView( phase_v , phase, AcceleratorRead);
+
+    accelerator_for(ss,P_v.size(),MomentaField::vector_type::Nsimd(),{
+    	auto site_mom = P_v(ss);
+    	auto site_phase = phase_v(ss);
+	for(int mu=0;mu<Nd;mu++)
+	  site_mom(mu) = site_mom(mu) * site_phase(mu);
+    	coalescedWrite(P_v[ss], site_mom);
+      });
+    
+  }
+
+
+};
+
+
+
+
+NAMESPACE_END(Grid);
+
+#endif
--- a/Grid/qcd/utils/BaryonUtils.h
+++ b/Grid/qcd/utils/BaryonUtils.h
--- a/Grid/threads/Accelerator.cc
+++ b/Grid/threads/Accelerator.cc
@@ -1,6 +1,7 @@
 #include <Grid/GridCore.h>

 NAMESPACE_BEGIN(Grid);
+int      acceleratorAbortOnGpuError=1;
 uint32_t accelerator_threads=2;
 uint32_t acceleratorThreads(void)       {return accelerator_threads;};
 void     acceleratorThreads(uint32_t t) {accelerator_threads = t;};
--- a/Grid/threads/Accelerator.h
+++ b/Grid/threads/Accelerator.h
@@ -100,6 +100,8 @@ void     acceleratorInit(void);
 #define accelerator        __host__ __device__
 #define accelerator_inline __host__ __device__ inline

+extern int acceleratorAbortOnGpuError;
+
 accelerator_inline int acceleratorSIMTlane(int Nsimd) {
 #ifdef GRID_SIMT
  return threadIdx.z; 
@@ -140,6 +142,7 @@ void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda)
      printf("Cuda error %s \n", cudaGetErrorString( err ));		\
      puts(__FILE__);							\
      printf("Line %d\n",__LINE__);					\
+      if (acceleratorAbortOnGpuError) assert(err==cudaSuccess);		\
    }									\
  }

--- a/configure.ac
+++ b/configure.ac
@@ -7,7 +7,12 @@ AM_INIT_AUTOMAKE([subdir-objects 1.13])
 AM_EXTRA_RECURSIVE_TARGETS([tests bench])
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_SRCDIR([Grid/Grid.h])
-AC_CONFIG_HEADERS([Grid/Config.h],[sed -i 's|PACKAGE_|GRID_|' Grid/Config.h])
+AC_CONFIG_HEADERS([Grid/Config.h],[[$SED_INPLACE -e 's|PACKAGE_|GRID_|' -e 's|[[:space:]]PACKAGE[[:space:]]| GRID_PACKAGE |' -e 's|[[:space:]]VERSION[[:space:]]| GRID_PACKAGE_VERSION |' Grid/Config.h]],
+    [if test x"$host_os" == x"${host_os#darwin}" ; then]
+        [SED_INPLACE="sed -i"]
+    [else]
+        [SED_INPLACE="sed -i .bak"]
+    [fi])
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])

 ################ Get git info
@@ -125,7 +130,7 @@ esac

 ############### fermions
 AC_ARG_ENABLE([fermion-reps],
-     [AC_HELP_STRING([--fermion-reps=yes|no], [enable extra fermion representation support])],
+     [AC_HELP_STRING([--enable-fermion-reps=yes|no], [enable extra fermion representation support])],
     [ac_FERMION_REPS=${enable_fermion_reps}], [ac_FERMION_REPS=yes])

 AM_CONDITIONAL(BUILD_FERMION_REPS, [ test "${ac_FERMION_REPS}X" == "yesX" ])
--- a/tests/core/Test_main.cc
+++ b/tests/core/Test_main.cc
@@ -231,6 +231,20 @@ int main(int argc, char **argv) {
      scalar = localInnerProduct(cVec, cVec);
      scalar = localNorm2(cVec);

+      std::cout << "Testing maxLocalNorm2" <<std::endl;
+      
+      LatticeComplex rand_scalar(&Fine);
+      random(FineRNG, rand_scalar);  //uniform [0,1]
+      for(Integer gsite=0;gsite<Fine.gSites();gsite++){ //check on every site independently
+	scalar = rand_scalar;
+	TComplex big(10.0);
+	Coordinate coor;
+	Fine.GlobalIndexToGlobalCoor(gsite,coor);
+        pokeSite(big,scalar,coor);
+	
+	RealD Linfty = maxLocalNorm2(scalar);
+	assert(Linfty == 100.0);
+      }
      //     -=,+=,*=,()
      //     add,+,sub,-,mult,mac,*
      //     adj,conjugate
@@ -549,7 +563,8 @@ int main(int argc, char **argv) {

                  std::vector<int> shiftcoor = coor;
                  shiftcoor[dir] = (shiftcoor[dir] + shift + latt_size[dir]) %
-                                   (latt_size[dir] / mpi_layout[dir]);
+                                   (latt_size[dir]);
+		  //                                   (latt_size[dir] / mpi_layout[dir]);

                  std::vector<int> rl(4);
                  for (int dd = 0; dd < 4; dd++) {
--- a/tests/forces/Test_momentum_filter.cc
+++ b/tests/forces/Test_momentum_filter.cc
@@ -0,0 +1,154 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_wilson_force.cc
+
+    Copyright (C) 2015
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace std;
+using namespace Grid;
+
+//Get the mu-directected links on the upper boundary and the bulk remainder
+template<typename Field>
+void getLinksBoundaryBulk(Field &bound, Field &bulk,  Field &from, const Coordinate &latt_size){
+  bound = Zero(); bulk = Zero();
+  for(int mu=0;mu<Nd;mu++){
+    LatticeInteger mucoor(bound.Grid());
+    LatticeCoordinate(mucoor, mu);
+
+    bound = where( mucoor == (Integer)(latt_size[mu] - 1), from, bound );
+    bulk = where( mucoor != (Integer)(latt_size[mu] - 1), from, bulk );
+  }
+}
+
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  Coordinate latt_size   = GridDefaultLatt();
+  Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  Coordinate mpi_layout  = GridDefaultMpi();
+
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  int threads = GridThread::GetThreads();
+  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
+
+  std::vector<int> seeds({1,2,3,4});
+
+  GridParallelRNG          pRNG(&Grid);
+  pRNG.SeedFixedIntegers(seeds);
+
+  typedef PeriodicGimplR Gimpl;
+  typedef WilsonGaugeAction<Gimpl> GaugeAction;
+  typedef NoHirep Representation; //fundamental
+  typedef NoSmearing<Gimpl> Smearing;
+  typedef MinimumNorm2<Gimpl, Smearing> Omelyan;
+  typedef Gimpl::Field Field;
+  typedef MomentumFilterApplyPhase<Field> Filter;
+  Filter filter(&Grid);
+  
+  //Setup a filter that disables link update on links passing through the global lattice boundary
+  typedef Filter::LatticeLorentzScalarType MaskType;
+  typedef Filter::LorentzScalarType MaskSiteType;
+
+  MaskSiteType zero, one;
+  for(int mu=0;mu<Nd;mu++){
+    zero(mu)()() = 0.;
+    one(mu)()() = 1.;
+  }
+  MaskType zeroField(&Grid), oneField(&Grid);
+  zeroField = zero;
+  oneField = one;
+
+  
+  filter.phase = oneField; //make every site 1.0
+
+  //Zero mu-directed links at upper boundary
+  for(int mu=0;mu<Nd;mu++){
+    LatticeInteger mucoor(&Grid);
+    LatticeCoordinate(mucoor, mu);
+    
+    filter.phase = where( mucoor == (Integer)(latt_size[mu] - 1) , zeroField, filter.phase );
+  }
+
+  //Start with a random gauge field
+  Field U(&Grid);
+  SU<Nc>::HotConfiguration(pRNG,U);
+
+  //Get the original links on the bulk and boundary for later use
+  Field Ubnd_orig(&Grid), Ubulk_orig(&Grid);
+  getLinksBoundaryBulk(Ubnd_orig, Ubulk_orig, U, latt_size);
+
+  ActionSet<Field,Representation> actions(1);  
+  double beta=6;
+  GaugeAction gauge_action(beta);
+  actions[0].push_back(&gauge_action);
+
+  Smearing smear;
+  IntegratorParameters params(1,1.); //1 MD step
+  Omelyan integrator(&Grid, params, actions, smear);
+  
+  integrator.setMomentumFilter(filter);
+
+  integrator.refresh(U, pRNG); //doesn't actually change the gauge field
+
+  //Check the momentum is zero on the boundary
+  const auto &P = integrator.getMomentum();
+  Field Pbnd(&Grid), Pbulk(&Grid);
+  getLinksBoundaryBulk(Pbnd, Pbulk, const_cast<Field&>(P), latt_size);
+
+  RealD Pbnd_nrm = norm2(Pbnd); //expect zero
+  std::cout << GridLogMessage << "After refresh, norm2 of mu-directed conjugate momentum on boundary is: " << Pbnd_nrm << " (expect 0)" << std::endl;
+  RealD Pbulk_nrm = norm2(Pbulk); //expect non-zero
+  std::cout << GridLogMessage << "After refresh, norm2 of bulk conjugate momentum is: " << Pbulk_nrm << " (expect non-zero)" << std::endl;
+
+  //Evolve the gauge field
+  integrator.integrate(U);
+
+  //Check momentum is still zero on boundary
+  getLinksBoundaryBulk(Pbnd, Pbulk, const_cast<Field&>(P), latt_size);
+  
+  Pbnd_nrm = norm2(Pbnd); //expect zero
+  std::cout << GridLogMessage << "After integrate, norm2 of mu-directed conjugate momentum on boundary is: " << Pbnd_nrm << " (expect 0)" << std::endl;
+  Pbulk_nrm = norm2(Pbulk); //expect non-zero
+  std::cout << GridLogMessage << "After integrate, norm2 of bulk conjugate momentum is: " << Pbulk_nrm << " (expect non-zero)" << std::endl;
+
+  //Get the new bulk and bound links
+  Field Ubnd_new(&Grid), Ubulk_new(&Grid);
+  getLinksBoundaryBulk(Ubnd_new, Ubulk_new, U, latt_size);
+
+  Field Ubnd_diff = Ubnd_new - Ubnd_orig;
+  Field Ubulk_diff = Ubulk_new - Ubulk_orig;
+
+  RealD Ubnd_change = norm2( Ubnd_diff );
+  RealD Ubulk_change = norm2( Ubulk_diff );
+  std::cout << GridLogMessage << "After integrate, norm2 of change in mu-directed boundary links is : " << Ubnd_change << " (expect 0)" << std::endl;
+  std::cout << GridLogMessage << "After integrate, norm2 of change in bulk links is : " << Ubulk_change << " (expect non-zero)" << std::endl;
+
+  Grid_finalize();
+}