Update

Updated for verbose on host vs. device side csum
Update on aurora
2026-03-14 08:16:10 +00:00 · 2025-12-02 23:22:42 +00:00 · 2025-12-02 23:15:32 +00:00 · 2025-12-02 22:24:54 +00:00 · 2025-12-02 16:01:37 -05:00 · 2025-12-02 15:51:29 -05:00
73 changed files with 255 additions and 3291 deletions
--- a/Grid/Makefile.am
+++ b/Grid/Makefile.am
@@ -54,24 +54,22 @@ Version.h: version-cache
 include Make.inc
 include Eigen.inc

-if BUILD_FERMION_INSTANTIATIONS
-  extra_sources+=$(WILS_FERMION_FILES)
-  extra_sources+=$(STAG_FERMION_FILES)
+extra_sources+=$(WILS_FERMION_FILES)
+extra_sources+=$(STAG_FERMION_FILES)
 if BUILD_ZMOBIUS
-    extra_sources+=$(ZWILS_FERMION_FILES)
+  extra_sources+=$(ZWILS_FERMION_FILES)
 endif
 if BUILD_GPARITY
-    extra_sources+=$(GP_FERMION_FILES)
+  extra_sources+=$(GP_FERMION_FILES)
 endif
 if BUILD_FERMION_REPS
-    extra_sources+=$(ADJ_FERMION_FILES)
-    extra_sources+=$(TWOIND_FERMION_FILES)
+  extra_sources+=$(ADJ_FERMION_FILES)
+  extra_sources+=$(TWOIND_FERMION_FILES)
 endif
 if BUILD_SP
    extra_sources+=$(SP_FERMION_FILES)
 if BUILD_FERMION_REPS
-      extra_sources+=$(SP_TWOIND_FERMION_FILES)
-endif
+    extra_sources+=$(SP_TWOIND_FERMION_FILES)
 endif
 endif

--- a/Grid/algorithms/blas/BatchedBlas.h
+++ b/Grid/algorithms/blas/BatchedBlas.h
@@ -28,7 +28,6 @@ Author: Peter Boyle <pboyle@bnl.gov>
 #pragma once

 #ifdef GRID_HIP
-#include <hip/hip_version.h>
 #include <hipblas/hipblas.h>
 #endif
 #ifdef GRID_CUDA
@@ -256,29 +255,16 @@ public:
    if ( OpB == GridBLAS_OP_N ) hOpB = HIPBLAS_OP_N;
    if ( OpB == GridBLAS_OP_T ) hOpB = HIPBLAS_OP_T;
    if ( OpB == GridBLAS_OP_C ) hOpB = HIPBLAS_OP_C;
-#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR >=7)
    auto err = hipblasZgemmBatched(gridblasHandle,
 				   hOpA,
 				   hOpB,
 				   m,n,k,
-				   (hipDoubleComplex *) &alpha_p[0],
-				   (hipDoubleComplex **)&Amk[0], lda,
-				   (hipDoubleComplex **)&Bkn[0], ldb,
-				   (hipDoubleComplex *) &beta_p[0],
-				   (hipDoubleComplex **)&Cmn[0], ldc,
+				   (hipblasDoubleComplex *) &alpha_p[0],
+				   (hipblasDoubleComplex **)&Amk[0], lda,
+				   (hipblasDoubleComplex **)&Bkn[0], ldb,
+				   (hipblasDoubleComplex *) &beta_p[0],
+				   (hipblasDoubleComplex **)&Cmn[0], ldc,
 				   batchCount);
-#else
-    auto err = hipblasZgemmBatched(gridblasHandle,
-                                   hOpA,
-                                   hOpB,
-                                   m,n,k,
-                                   (hipblasDoubleComplex *) &alpha_p[0],
-                                   (hipblasDoubleComplex **)&Amk[0], lda,
-                                   (hipblasDoubleComplex **)&Bkn[0], ldb,
-                                   (hipblasDoubleComplex *) &beta_p[0],
-                                   (hipblasDoubleComplex **)&Cmn[0], ldc,
-                                   batchCount);
-#endif
    //	 std::cout << " hipblas return code " <<(int)err<<std::endl;
    GRID_ASSERT(err==HIPBLAS_STATUS_SUCCESS);
 #endif
@@ -517,30 +503,17 @@ public:
    if ( OpB == GridBLAS_OP_N ) hOpB = HIPBLAS_OP_N;
    if ( OpB == GridBLAS_OP_T ) hOpB = HIPBLAS_OP_T;
    if ( OpB == GridBLAS_OP_C ) hOpB = HIPBLAS_OP_C;
-#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR >=7)
    auto err = hipblasCgemmBatched(gridblasHandle,
 				   hOpA,
 				   hOpB,
 				   m,n,k,
-				   (hipComplex *) &alpha_p[0],
-				   (hipComplex **)&Amk[0], lda,
-				   (hipComplex **)&Bkn[0], ldb,
-				   (hipComplex *) &beta_p[0],
-				   (hipComplex **)&Cmn[0], ldc,
+				   (hipblasComplex *) &alpha_p[0],
+				   (hipblasComplex **)&Amk[0], lda,
+				   (hipblasComplex **)&Bkn[0], ldb,
+				   (hipblasComplex *) &beta_p[0],
+				   (hipblasComplex **)&Cmn[0], ldc,
 				   batchCount);
-#else
-    auto err = hipblasCgemmBatched(gridblasHandle,
-                                   hOpA,
-                                   hOpB,
-                                   m,n,k,
-                                   (hipblasComplex *) &alpha_p[0],
-                                   (hipblasComplex **)&Amk[0], lda,
-                                   (hipblasComplex **)&Bkn[0], ldb,
-                                   (hipblasComplex *) &beta_p[0],
-                                   (hipblasComplex **)&Cmn[0], ldc,
-                                   batchCount);

-#endif
    GRID_ASSERT(err==HIPBLAS_STATUS_SUCCESS);
 #endif
 #ifdef GRID_CUDA
@@ -1121,19 +1094,11 @@ public:
    GRID_ASSERT(info.size()==batchCount);

 #ifdef GRID_HIP
-#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR >=7)
    auto err = hipblasZgetrfBatched(gridblasHandle,(int)n,
-				    (hipDoubleComplex **)&Ann[0], (int)n,
+				    (hipblasDoubleComplex **)&Ann[0], (int)n,
 				    (int*) &ipiv[0],
 				    (int*) &info[0],
 				    (int)batchCount);
-#else
-    auto err = hipblasZgetrfBatched(gridblasHandle,(int)n,
-                                    (hipblasDoubleComplex **)&Ann[0], (int)n,
-                                    (int*) &ipiv[0],
-                                    (int*) &info[0],
-                                    (int)batchCount);
-#endif
    GRID_ASSERT(err==HIPBLAS_STATUS_SUCCESS);
 #endif
 #ifdef GRID_CUDA
@@ -1159,20 +1124,11 @@ public:
    GRID_ASSERT(info.size()==batchCount);

 #ifdef GRID_HIP
-#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR >=7)
    auto err = hipblasCgetrfBatched(gridblasHandle,(int)n,
-				    (hipComplex **)&Ann[0], (int)n,
+				    (hipblasComplex **)&Ann[0], (int)n,
 				    (int*) &ipiv[0],
 				    (int*) &info[0],
 				    (int)batchCount);
-#else
-    auto err = hipblasCgetrfBatched(gridblasHandle,(int)n,
-                                    (hipblasComplex **)&Ann[0], (int)n,
-                                    (int*) &ipiv[0],
-                                    (int*) &info[0],
-                                    (int)batchCount);
-#endif
-
    GRID_ASSERT(err==HIPBLAS_STATUS_SUCCESS);
 #endif
 #ifdef GRID_CUDA
@@ -1245,22 +1201,12 @@ public:
    GRID_ASSERT(Cnn.size()==batchCount);

 #ifdef GRID_HIP
-#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR >=7)
    auto err = hipblasZgetriBatched(gridblasHandle,(int)n,
-				    (hipDoubleComplex **)&Ann[0], (int)n,
+				    (hipblasDoubleComplex **)&Ann[0], (int)n,
 				    (int*) &ipiv[0],
-				    (hipDoubleComplex **)&Cnn[0], (int)n,
+				    (hipblasDoubleComplex **)&Cnn[0], (int)n,
 				    (int*) &info[0],
 				    (int)batchCount);
-#else
-    auto err = hipblasZgetriBatched(gridblasHandle,(int)n,
-                                    (hipblasDoubleComplex **)&Ann[0], (int)n,
-                                    (int*) &ipiv[0],
-                                    (hipblasDoubleComplex **)&Cnn[0], (int)n,
-                                    (int*) &info[0],
-                                    (int)batchCount);
-
-#endif
    GRID_ASSERT(err==HIPBLAS_STATUS_SUCCESS);
 #endif
 #ifdef GRID_CUDA
@@ -1289,21 +1235,12 @@ public:
    GRID_ASSERT(Cnn.size()==batchCount);

 #ifdef GRID_HIP
-#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR >=7)
    auto err = hipblasCgetriBatched(gridblasHandle,(int)n,
-				    (hipComplex **)&Ann[0], (int)n,
+				    (hipblasComplex **)&Ann[0], (int)n,
 				    (int*) &ipiv[0],
-				    (hipComplex **)&Cnn[0], (int)n,
+				    (hipblasComplex **)&Cnn[0], (int)n,
 				    (int*) &info[0],
 				    (int)batchCount);
-#else
-    auto err = hipblasCgetriBatched(gridblasHandle,(int)n,
-                                    (hipblasComplex **)&Ann[0], (int)n,
-                                    (int*) &ipiv[0],
-                                    (hipblasComplex **)&Cnn[0], (int)n,
-                                    (int*) &info[0],
-                                    (int)batchCount);
-#endif
    GRID_ASSERT(err==HIPBLAS_STATUS_SUCCESS);
 #endif
 #ifdef GRID_CUDA
--- a/Grid/algorithms/iterative/AdefMrhs.h
+++ b/Grid/algorithms/iterative/AdefMrhs.h
@@ -92,8 +92,8 @@ class TwoLevelCGmrhs
  // Vector case
  virtual void operator() (std::vector<Field> &src, std::vector<Field> &x)
  {
-    SolveSingleSystem(src,x);
-	// SolvePrecBlockCG(src,x);
+    //    SolveSingleSystem(src,x);
+    SolvePrecBlockCG(src,x);
  }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/Grid/algorithms/multigrid/Aggregates.h
+++ b/Grid/algorithms/multigrid/Aggregates.h
@@ -97,7 +97,7 @@ public:

    RealD scale;

-    ConjugateGradient<FineField> CG(1.0e-4,2000,false);
+    ConjugateGradient<FineField> CG(1.0e-3,400,false);
    FineField noise(FineGrid);
    FineField Mn(FineGrid);

@@ -131,10 +131,7 @@ public:
    RealD scale;

    TrivialPrecon<FineField> simple_fine;
-    //    PrecGeneralisedConjugateResidualNonHermitian<FineField> GCR(0.001,10,DiracOp,simple_fine,30,30);
-    //    PrecGeneralisedConjugateResidualNonHermitian<FineField> GCR(0.001,10,DiracOp,simple_fine,12,12);
-    //    PrecGeneralisedConjugateResidualNonHermitian<FineField> GCR(0.001,30,DiracOp,simple_fine,12,12);
-    PrecGeneralisedConjugateResidualNonHermitian<FineField> GCR(0.001,30,DiracOp,simple_fine,10,10);
+    PrecGeneralisedConjugateResidualNonHermitian<FineField> GCR(0.001,30,DiracOp,simple_fine,12,12);
    FineField noise(FineGrid);
    FineField src(FineGrid);
    FineField guess(FineGrid);
@@ -149,16 +146,16 @@ public:
      
      DiracOp.Op(noise,Mn); std::cout<<GridLogMessage << "noise   ["<<b<<"] <n|Op|n> "<<innerProduct(noise,Mn)<<std::endl;

-      for(int i=0;i<3;i++){
+      for(int i=0;i<2;i++){
 	//  void operator() (const Field &src, Field &psi){
 #if 1
-	if (i==0)std::cout << GridLogMessage << " inverting on noise "<<std::endl;
+	std::cout << GridLogMessage << " inverting on noise "<<std::endl;
 	src = noise;
 	guess=Zero();
 	GCR(src,guess);
 	subspace[b] = guess;
 #else
-	if (i==0)std::cout << GridLogMessage << " inverting on zero "<<std::endl;
+	std::cout << GridLogMessage << " inverting on zero "<<std::endl;
 	src=Zero();
 	guess = noise;
 	GCR(src,guess);
@@ -170,7 +167,7 @@ public:

      }

-      DiracOp.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|Op|f> "<<innerProduct(noise,Mn)<<" <f|OpDagOp|f>"<<norm2(Mn)<<std::endl;
+      DiracOp.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|Op|f> "<<innerProduct(noise,Mn)<<std::endl;
      subspace[b]   = noise;

    }
--- a/Grid/communicator/Communicator_mpi3.cc
+++ b/Grid/communicator/Communicator_mpi3.cc
@@ -589,6 +589,7 @@ double CartesianCommunicator::StencilSendToRecvFromPrepare(std::vector<CommsRequ
      srq.PacketType = InterNodeRecv;
      srq.bytes      = rbytes;
      srq.req        = rrq;
+      srq.dest       = from;
      srq.host_buf   = host_recv;
      srq.device_buf = recv;
      srq.tag        = tag;
@@ -765,7 +766,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
      srq.host_buf   = NULL;
      srq.device_buf = xmit;
      srq.tag        = -1;
-      srq.dest       = dest;
+      srq.dest       = from;
      srq.commdir    = dir;
      list.push_back(srq);
    }
@@ -817,12 +818,40 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
    int ierr = MPI_Waitall(MpiRequests.size(),&MpiRequests[0],&status[0]); // Sends are guaranteed in order. No harm in not completing.
    GRID_ASSERT(ierr==0);
  }
-  
+
  //  for(int r=0;r<nreq;r++){
  //    if ( list[r].PacketType==InterNodeRecv ) {
  //      acceleratorCopyToDeviceAsynch(list[r].host_buf,list[r].device_buf,list[r].bytes);
  //    }
  //  }
+  // Get global error count in comms receive
+#define AUDIT_FLIGHT_RECORDER_ERRORS
+#ifdef  AUDIT_FLIGHT_RECORDER_ERRORS
+  uint64_t EC = FlightRecorder::CommsErrorCount();
+  if (EC) std::cerr << " global sum error count "<<EC<<std::endl;
+  this->GlobalSum(EC);
+  if (EC) {
+    for(int r=0;r<list.size();r++){
+#ifdef GRID_CHECKSUM_COMMS
+	uint64_t rbytes_data = list[r].bytes - 8;
+#else
+	uint64_t rbytes_data = list[r].bytes;
+#endif
+      if (list[r].PacketType == InterNodeReceiveHtoD) {
+	std::cerr << " calling xor reduce "<<std::endl;
+	uint64_t csg = gpu_xor((uint64_t*)list[r].device_buf,rbytes_data/8);
+	uint64_t csh = cpu_xor((uint64_t*)list[r].host_buf,rbytes_data/8);
+	std::cerr << " Packet "<<r<<" Receive from " <<list[r].dest<<" host csum "<<csh<<" gpu csum "<<csg<<std::endl;
+      } if (list[r].PacketType == InterNodeXmitISend ) {
+	std::cerr << " calling xor reduce "<<std::endl;
+	uint64_t csg = gpu_xor((uint64_t*)list[r].device_buf,rbytes_data/8);
+	uint64_t csh = cpu_xor((uint64_t*)list[r].host_buf,rbytes_data/8);
+	std::cerr << " Packet "<<r<<" Send to " <<list[r].dest<<" host csum "<<csh<<" gpu csum "<<csg<<std::endl;
+      }
+    }
+  }
+#endif
+
 #ifdef GRID_CHECKSUM_COMMS
  for(int r=0;r<list.size();r++){
    if ( list[r].PacketType == InterNodeReceiveHtoD ) {
--- a/Grid/lattice/Lattice_reduction_sycl.h
+++ b/Grid/lattice/Lattice_reduction_sycl.h
@@ -68,8 +68,7 @@ inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osite
  return result;
 }

-
-template<class Word> Word svm_xor(Word *vec,uint64_t L)
+template<class Word> Word gpu_xor(Word *vec,uint64_t L)
 {
  Word identity;  identity=0;
  Word ret = 0;
@@ -87,6 +86,18 @@ template<class Word> Word svm_xor(Word *vec,uint64_t L)
  theGridAccelerator->wait();
  return ret;
 }
+template<class Word> Word cpu_xor(Word *vec,uint64_t L)
+{
+  Word csum=0;
+  for(uint64_t w=0;w<L;w++){
+    csum = csum ^ vec[w];
+  }
+  return csum;
+}
+template<class Word> Word svm_xor(Word *vec,uint64_t L)
+{
+  gpu_xor(vec,L);
+}
 template<class Word> Word checksum_gpu(Word *vec,uint64_t L)
 {
  Word identity;  identity=0;
--- a/Grid/qcd/QCD.h
+++ b/Grid/qcd/QCD.h
@@ -596,32 +596,16 @@ template<int Index,class vobj> inline vobj transposeColour(const vobj &lhs){
 //////////////////////////////////////////
 // Trace lattice and non-lattice
 //////////////////////////////////////////
-#define GRID_UNOP(name)   name
-#define GRID_DEF_UNOP(op, name)						\
-  template <typename T1, typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value,T1>::type * = nullptr> \
-  inline auto op(const T1 &arg) ->decltype(LatticeUnaryExpression<GRID_UNOP(name),T1>(GRID_UNOP(name)(), arg)) \
-  {									\
-    return     LatticeUnaryExpression<GRID_UNOP(name),T1>(GRID_UNOP(name)(), arg); \
-  }
-
 template<int Index,class vobj>
 inline auto traceSpin(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(vobj()))>
 {
  return traceIndex<SpinIndex>(lhs);
 }
-
-GridUnopClass(UnaryTraceSpin, traceIndex<SpinIndex>(a));
-GRID_DEF_UNOP(traceSpin, UnaryTraceSpin);
-
 template<int Index,class vobj>
 inline auto traceColour(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(vobj()))>
 {
  return traceIndex<ColourIndex>(lhs);
 }
-
-GridUnopClass(UnaryTraceColour, traceIndex<ColourIndex>(a));
-GRID_DEF_UNOP(traceColour, UnaryTraceColour);
-
 template<int Index,class vobj>
 inline auto traceSpin(const vobj &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(lhs))>
 {
@@ -633,8 +617,6 @@ inline auto traceColour(const vobj &lhs) -> Lattice<decltype(traceIndex<ColourIn
  return traceIndex<ColourIndex>(lhs);
 }

-#undef GRID_UNOP
-#undef GRID_DEF_UNOP
 //////////////////////////////////////////
 // Current types
 //////////////////////////////////////////
--- a/Grid/qcd/modules/ObservableModules.h
+++ b/Grid/qcd/modules/ObservableModules.h
@@ -103,18 +103,6 @@ class PolyakovMod: public ObservableModule<PolyakovLogger<Impl>, NoParameters>{
  PolyakovMod(): ObsBase(NoParameters()){}
 };

-template < class Impl >
-class SpatialPolyakovMod: public ObservableModule<SpatialPolyakovLogger<Impl>, NoParameters>{
-  typedef ObservableModule<SpatialPolyakovLogger<Impl>, NoParameters> ObsBase;
-  using ObsBase::ObsBase; // for constructors
-
-  // acquire resource
-  virtual void initialize(){
-    this->ObservablePtr.reset(new SpatialPolyakovLogger<Impl>());
-  }
-  public:
-  SpatialPolyakovMod(): ObsBase(NoParameters()){}
-};

 template < class Impl >
 class TopologicalChargeMod: public ObservableModule<TopologicalCharge<Impl>, TopologyObsParameters>{
--- a/Grid/qcd/observables/polyakov_loop.h
+++ b/Grid/qcd/observables/polyakov_loop.h
@@ -2,12 +2,11 @@

 Grid physics library, www.github.com/paboyle/Grid

-Source file: ./Grid/qcd/observables/polyakov_loop.h
+Source file: ./lib/qcd/modules/polyakov_line.h

-Copyright (C) 2025
+Copyright (C) 2017

 Author: David Preti <david.preti@csic.es>
-Author: Alexis Verney-Provatas <2414441@swansea.ac.uk>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -61,43 +60,4 @@ class PolyakovLogger : public HmcObservable<typename Impl::Field> {
  }
 };

-template <class Impl>
-class SpatialPolyakovLogger : public HmcObservable<typename Impl::Field> {
- public:
-    // here forces the Impl to be of gauge fields
-    // if not the compiler will complain
-    INHERIT_GIMPL_TYPES(Impl);
-
-     // necessary for HmcObservable compatibility
-    typedef typename Impl::Field Field;
-
-    void TrajectoryComplete(int traj,
-                            Field &U,
-                            GridSerialRNG &sRNG,
-                            GridParallelRNG &pRNG) {
-
-    // Save current numerical output precision
-    int def_prec = std::cout.precision();
-
-    // Assume that the dimensions are D=3+1
-    int Ndim = 3;
-    ComplexD polyakov;
-   
-    // Iterate over the spatial directions and print the average spatial polyakov loop
-    // over them 
-    for (int idx=0; idx<Ndim; idx++) {
-        polyakov = WilsonLoops<Impl>::avgPolyakovLoop(U, idx);
-    
-        std::cout << GridLogMessage
-            << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
-            << "Polyakov Loop in the " << idx << " spatial direction : [ " << traj << " ] "<< polyakov << std::endl;
-
-    }
-
-    // Return to original output precision
-    std::cout.precision(def_prec);
-
-  }
-};
-
 NAMESPACE_END(Grid);
--- a/Grid/qcd/utils/Sp2n.impl.h
+++ b/Grid/qcd/utils/Sp2n.impl.h
@@ -254,9 +254,9 @@ static void testGenerators(GroupName::Sp) {
  }
 }

-template <class vtype, int N>
-static Lattice<iScalar<iScalar<iMatrix<vtype, N> > > >
-ProjectOnGeneralGroup(const Lattice<iScalar<iScalar<iMatrix<vtype, N> > > > &Umu, GroupName::Sp) {
+template <int N>
+static Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > >
+ProjectOnGeneralGroup(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu, GroupName::Sp) {
  return ProjectOnSpGroup(Umu);
 }

--- a/Grid/qcd/utils/WilsonLoops.h
+++ b/Grid/qcd/utils/WilsonLoops.h
@@ -177,43 +177,25 @@ public:
  }

  //////////////////////////////////////////////////
-  // average Polyakov loop in mu direction over all directions != mu
+  // average over all x,y,z the temporal loop
  //////////////////////////////////////////////////
-  static ComplexD avgPolyakovLoop(const GaugeField &Umu, const int mu) {  //assume Nd=4
-    
-    // Protect against bad value of mu [0, 3]
-    if ((mu < 0 ) || (mu > 3)) {
-      std::cout << GridLogError << "Index is not an integer inclusively between 0 and 3." << std::endl;
-      exit(1);
-    }
-
-    // U_loop is U_{mu}
-    GaugeMat U_loop(Umu.Grid()), P(Umu.Grid());
+  static ComplexD avgPolyakovLoop(const GaugeField &Umu) {  //assume Nd=4
+    GaugeMat Ut(Umu.Grid()), P(Umu.Grid());
    ComplexD out;
    int T = Umu.Grid()->GlobalDimensions()[3];
    int X = Umu.Grid()->GlobalDimensions()[0];
    int Y = Umu.Grid()->GlobalDimensions()[1];
    int Z = Umu.Grid()->GlobalDimensions()[2];

-    // Number of sites in mu direction
-    int N_mu = Umu.Grid()->GlobalDimensions()[mu];
-
-    U_loop = peekLorentz(Umu, mu); //Select direction
-    P = U_loop;
-    for (int t=1;t<N_mu;t++){ 
-      P = Gimpl::CovShiftForward(U_loop,mu,P);
+    Ut = peekLorentz(Umu,3); //Select temporal direction
+    P = Ut;
+    for (int t=1;t<T;t++){ 
+      P = Gimpl::CovShiftForward(Ut,3,P);
    }
   RealD norm = 1.0/(Nc*X*Y*Z*T);
   out = sum(trace(P))*norm;
   return out;   
-  }  
-
-  /////////////////////////////////////////////////
-  // overload for temporal Polyakov loop
-  /////////////////////////////////////////////////
-  static ComplexD avgPolyakovLoop(const GaugeField &Umu) { 
-    return avgPolyakovLoop(Umu, 3);
-  }
+}

  //////////////////////////////////////////////////
  // average over traced single links
--- a/Grid/util/FlightRecorder.cc
+++ b/Grid/util/FlightRecorder.cc
@@ -47,6 +47,7 @@ int32_t  FlightRecorder::CsumLoggingCounter;
 int32_t  FlightRecorder::NormLoggingCounter;
 int32_t  FlightRecorder::ReductionLoggingCounter;
 uint64_t FlightRecorder::ErrorCounter;
+uint64_t FlightRecorder::CommsErrorCounter;

 std::vector<double> FlightRecorder::NormLogVector;
 std::vector<double> FlightRecorder::ReductionLogVector;
@@ -118,6 +119,10 @@ void FlightRecorder::SetLoggingModeVerify(void)
  ResetCounters();
  LoggingMode = LoggingModeVerify;
 }
+uint64_t FlightRecorder::CommsErrorCount(void)
+{
+  return CommsErrorCounter;
+}
 uint64_t FlightRecorder::ErrorCount(void)
 {
  return ErrorCounter;
@@ -312,6 +317,7 @@ void FlightRecorder::xmitLog(void *buf,uint64_t bytes)
 	if ( !ContinueOnFail ) GRID_ASSERT(0);

 	ErrorCounter++;
+
      } else {
 	if ( PrintEntireLog ) { 
 	  std::cerr<<"FlightRecorder::XmitLog : VALID "<< XmitLoggingCounter <<" "<< std::hexfloat << _xor << " "<<  XmitLogVector[XmitLoggingCounter] <<std::endl;
@@ -357,7 +363,9 @@ void FlightRecorder::recvLog(void *buf,uint64_t bytes,int rank)
 	
 	if ( !ContinueOnFail ) GRID_ASSERT(0);

+	CommsErrorCounter++;
 	ErrorCounter++;
+	
      } else {
 	if ( PrintEntireLog ) { 
 	  std::cerr<<"FlightRecorder::RecvLog : VALID "<< RecvLoggingCounter <<" "<< std::hexfloat << _xor << " "<<  RecvLogVector[RecvLoggingCounter] <<std::endl;
--- a/Grid/util/FlightRecorder.h
+++ b/Grid/util/FlightRecorder.h
@@ -9,8 +9,8 @@ class FlightRecorder {
    LoggingModeRecord,
    LoggingModeVerify
  };
-  
  static int                   LoggingMode;
+  static uint64_t              CommsErrorCounter;
  static uint64_t              ErrorCounter;
  static const char *                StepName;
  static int32_t               StepLoggingCounter;
@@ -39,6 +39,7 @@ class FlightRecorder {
  static void Truncate(void);
  static void ResetCounters(void);
  static uint64_t ErrorCount(void);
+  static uint64_t CommsErrorCount(void);
  static void xmitLog(void *,uint64_t bytes);
  static void recvLog(void *,uint64_t bytes,int rank);
 };
--- a/HMC/FTHMC2p1f.cc
+++ b/HMC/FTHMC2p1f.cc
@@ -24,11 +24,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 #if Nc == 3
 #include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
@@ -234,4 +230,3 @@ int main(int argc, char **argv)
 #endif
 } // main

-#endif
--- a/HMC/FTHMC2p1f_3GeV.cc
+++ b/HMC/FTHMC2p1f_3GeV.cc
@@ -25,11 +25,7 @@ directory
 *************************************************************************************/
 /*  END LEGAL */

-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 #if Nc == 3
 #include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
@@ -235,4 +231,5 @@ int main(int argc, char **argv)
 #endif
 } // main

-#endif
+
+
--- a/HMC/HMC2p1f_3GeV.cc
+++ b/HMC/HMC2p1f_3GeV.cc
@@ -24,11 +24,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 #if Nc == 3
 #include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
@@ -234,4 +230,5 @@ int main(int argc, char **argv)
 #endif
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1f.cc
+++ b/HMC/Mobius2p1f.cc
@@ -27,11 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 int main(int argc, char **argv) {
  using namespace Grid;
@@ -199,4 +195,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1fEOFA.cc
+++ b/HMC/Mobius2p1fEOFA.cc
@@ -28,11 +28,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 #ifdef GRID_DEFAULT_PRECISION_DOUBLE
 #define MIXED_PRECISION
@@ -453,4 +449,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1fEOFA_F1.cc
+++ b/HMC/Mobius2p1fEOFA_F1.cc
@@ -28,11 +28,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 #ifdef GRID_DEFAULT_PRECISION_DOUBLE
 #define MIXED_PRECISION
@@ -446,4 +442,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc
+++ b/HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc
@@ -28,11 +28,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 using namespace Grid;

@@ -922,5 +918,3 @@ int main(int argc, char **argv) {
  return 0;
 #endif
 } // main
-
-#endif
--- a/HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc
+++ b/HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc
@@ -28,11 +28,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 using namespace Grid;

@@ -877,5 +873,3 @@ int main(int argc, char **argv) {
  return 0;
 #endif
 } // main
-
-#endif
--- a/HMC/Mobius2p1fRHMC.cc
+++ b/HMC/Mobius2p1fRHMC.cc
@@ -27,11 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 int main(int argc, char **argv) {
  using namespace Grid;
@@ -197,4 +193,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1f_DD_EOFA_96I_3level.cc
+++ b/HMC/Mobius2p1f_DD_EOFA_96I_3level.cc
@@ -27,11 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 NAMESPACE_BEGIN(Grid);

@@ -516,4 +512,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1f_DD_EOFA_96I_double.cc
+++ b/HMC/Mobius2p1f_DD_EOFA_96I_double.cc
@@ -27,11 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 int main(int argc, char **argv) {
  using namespace Grid;
@@ -349,4 +345,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1f_DD_EOFA_96I_mixed.cc
+++ b/HMC/Mobius2p1f_DD_EOFA_96I_mixed.cc
@@ -27,11 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 NAMESPACE_BEGIN(Grid);

@@ -520,4 +516,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1f_DD_EOFA_96I_mshift.cc
+++ b/HMC/Mobius2p1f_DD_EOFA_96I_mshift.cc
@@ -27,11 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 NAMESPACE_BEGIN(Grid);

@@ -571,4 +567,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1f_DD_RHMC.cc
+++ b/HMC/Mobius2p1f_DD_RHMC.cc
@@ -27,11 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 int main(int argc, char **argv) {
  using namespace Grid;
@@ -267,4 +263,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1f_DD_RHMC_96I.cc
+++ b/HMC/Mobius2p1f_DD_RHMC_96I.cc
@@ -27,11 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 int main(int argc, char **argv) {
  using namespace Grid;
@@ -421,4 +417,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1f_DD_RHMC_96I_mixed.cc
+++ b/HMC/Mobius2p1f_DD_RHMC_96I_mixed.cc
@@ -27,11 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 NAMESPACE_BEGIN(Grid);

@@ -456,4 +452,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1f_EOFA_96I_hmc.cc
+++ b/HMC/Mobius2p1f_EOFA_96I_hmc.cc
@@ -27,11 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>

 NAMESPACE_BEGIN(Grid);

@@ -466,4 +462,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/Mobius2p1f_EOFA_96I_hmc_double.cc
+++ b/HMC/Mobius2p1f_EOFA_96I_hmc_double.cc
@@ -27,11 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-
-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-#include<Grid/Grid.h>
+#include <Grid/Grid.h>



@@ -268,4 +264,5 @@ int main(int argc, char **argv) {
  Grid_finalize();
 } // main

-#endif
+
+
--- a/HMC/disable_examples_without_instantiations.h
+++ b/HMC/disable_examples_without_instantiations.h
@@ -1,16 +0,0 @@
-#include <Grid/Grid.h>
-#pragma once
-
-
-#ifndef ENABLE_FERMION_INSTANTIATIONS
-#include <iostream>
-
-int main(void) {
-  std::cout << "This build of Grid was configured to exclude fermion instantiations, "
-	    << "which this example relies on. "
-	    << "Please reconfigure and rebuild Grid with --enable-fermion-instantiations"
-	    << "to run this example."
-	    << std::endl;
-  return 1;
-}
-#endif
--- a/benchmarks/Benchmark_ITT.cc
+++ b/benchmarks/Benchmark_ITT.cc
@@ -26,9 +26,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace Grid;
@@ -734,5 +731,3 @@ int main (int argc, char ** argv)

  Grid_finalize();
 }
-
-#endif
--- a/benchmarks/Benchmark_dwf.cc
+++ b/benchmarks/Benchmark_dwf.cc
@@ -20,9 +20,6 @@
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>
 #ifdef GRID_CUDA
 #define CUDA_PROFILE
@@ -442,4 +439,3 @@ void Benchmark(int Ls, Coordinate Dirichlet,bool sloppy)
  GRID_ASSERT(norm2(src_e)<1.0e-4);
  GRID_ASSERT(norm2(src_o)<1.0e-4);
 }
-#endif
--- a/benchmarks/Benchmark_dwf_fp32.cc
+++ b/benchmarks/Benchmark_dwf_fp32.cc
@@ -20,10 +20,6 @@
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>
 #ifdef GRID_CUDA
 #define CUDA_PROFILE
@@ -443,5 +439,3 @@ void Benchmark(int Ls, Coordinate Dirichlet,bool sloppy)
  GRID_ASSERT(norm2(src_e)<1.0e-4);
  GRID_ASSERT(norm2(src_o)<1.0e-4);
 }
-
-#endif
--- a/benchmarks/Benchmark_dwf_fp32_paranoid.cc
+++ b/benchmarks/Benchmark_dwf_fp32_paranoid.cc
@@ -20,9 +20,6 @@
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>
 #ifdef GRID_CUDA
 #define CUDA_PROFILE
@@ -388,5 +385,3 @@ int main (int argc, char ** argv)
  Grid_finalize();
  exit(0);
 }
-
-#endif
--- a/benchmarks/Benchmark_dwf_sweep.cc
+++ b/benchmarks/Benchmark_dwf_sweep.cc
@@ -26,9 +26,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -241,4 +238,5 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
  }
 }

-#endif
+
+
--- a/benchmarks/Benchmark_gparity.cc
+++ b/benchmarks/Benchmark_gparity.cc
@@ -1,7 +1,3 @@
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-
 #include <Grid/Grid.h>
 #include <sstream>
 using namespace std;
@@ -159,4 +155,3 @@ int main (int argc, char ** argv)
  Grid_finalize();
 }

-#endif
--- a/benchmarks/Benchmark_halo.cc
+++ b/benchmarks/Benchmark_halo.cc
@@ -20,9 +20,6 @@
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>
 #ifdef GRID_CUDA
 #define CUDA_PROFILE
@@ -132,5 +129,3 @@ int main (int argc, char ** argv)
  Grid_finalize();
  exit(0);
 }
-
-#endif
--- a/benchmarks/Benchmark_mooee.cc
+++ b/benchmarks/Benchmark_mooee.cc
@@ -26,9 +26,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -152,5 +149,3 @@ int main (int argc, char ** argv)

  Grid_finalize();
 }
-
-#endif
--- a/benchmarks/Benchmark_schur.cc
+++ b/benchmarks/Benchmark_schur.cc
@@ -26,9 +26,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -175,4 +172,5 @@ void benchDw(std::vector<int> & latt4, int Ls)
  //  Dw.Report();
 }

-#endif
+
+
--- a/benchmarks/Benchmark_staggered.cc
+++ b/benchmarks/Benchmark_staggered.cc
@@ -26,9 +26,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -113,5 +110,3 @@ int main (int argc, char ** argv)

  Grid_finalize();
 }
-
-#endif
--- a/benchmarks/Benchmark_staggeredF.cc
+++ b/benchmarks/Benchmark_staggeredF.cc
@@ -26,9 +26,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -115,5 +112,3 @@ int main (int argc, char ** argv)

  Grid_finalize();
 }
-
-#endif
--- a/benchmarks/Benchmark_usqcd.cc
+++ b/benchmarks/Benchmark_usqcd.cc
@@ -26,10 +26,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
-
 #include <Grid/Grid.h>
 #include <Grid/algorithms/blas/BatchedBlas.h>

@@ -982,5 +978,3 @@ int main (int argc, char ** argv)
  Grid_finalize();
  fclose(FP);
 }
-
-#endif
--- a/benchmarks/Benchmark_wilson.cc
+++ b/benchmarks/Benchmark_wilson.cc
@@ -26,9 +26,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -261,5 +258,3 @@ int main (int argc, char ** argv)

  Grid_finalize();
 }
-
-#endif
--- a/benchmarks/Benchmark_wilson_sweep.cc
+++ b/benchmarks/Benchmark_wilson_sweep.cc
@@ -19,9 +19,6 @@ Author: Richard Rollins <rprollins@users.noreply.github.com>
    See the full license in the file "LICENSE" in the top level distribution directory
 *************************************************************************************/
 /*  END LEGAL */
-#include "disable_benchmarks_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -164,5 +161,3 @@ void bench_wilson_eo (
  double flops = (single_site_flops * volume * ncall)/2.0;
  std::cout << flops/(t1-t0) << "\t\t";
 }
-
-#endif
--- a/benchmarks/disable_benchmarks_without_instantiations.h
+++ b/benchmarks/disable_benchmarks_without_instantiations.h
@@ -1,16 +0,0 @@
-#include <Grid/Grid.h>
-
-#pragma once
-
-#ifndef ENABLE_FERMION_INSTANTIATIONS
-#include <iostream>
-
-int main(void) {
-  std::cout << "This build of Grid was configured to exclude fermion instantiations, "
-	    << "which this benchmark relies on. "
-	    << "Please reconfigure and rebuild Grid with --enable-fermion-instantiations"
-	    << "to run this benchmark."
-	    << std::endl;
-  return 1;
-}
-#endif
--- a/configure.ac
+++ b/configure.ac
@@ -172,12 +172,6 @@ case ${ac_TRACING} in
 esac

 ############### fermions
-AC_ARG_ENABLE([fermion-instantiations],
-     [AS_HELP_STRING([--enable-fermion-instantiations=yes|no],[enable fermion instantiations])],
-     [ac_FERMION_REPS=${enable_fermion_instantiations}], [ac_FERMION_INSTANTIATIONS=yes])
-
-AM_CONDITIONAL(BUILD_FERMION_INSTANTIATIONS, [ test "${ac_FERMION_INSTANTIATIONS}X" == "yesX" ])
-
 AC_ARG_ENABLE([fermion-reps],
     [AS_HELP_STRING([--enable-fermion-reps=yes|no],[enable extra fermion representation support])],
     [ac_FERMION_REPS=${enable_fermion_reps}], [ac_FERMION_REPS=yes])
@@ -200,9 +194,6 @@ AM_CONDITIONAL(BUILD_ZMOBIUS, [ test "${ac_ZMOBIUS}X" == "yesX" ])
 case ${ac_FERMION_REPS} in
   yes) AC_DEFINE([ENABLE_FERMION_REPS],[1],[non QCD fermion reps]);;
 esac
-case ${ac_FERMION_INSTANTIATIONS} in
-   yes) AC_DEFINE([ENABLE_FERMION_INSTANTIATIONS],[1],[enable fermions]);;
-esac
 case ${ac_GPARITY} in
   yes) AC_DEFINE([ENABLE_GPARITY],[1],[fermion actions with GPARITY BCs]);;
 esac
--- a/examples/Example_Mobius_spectrum.cc
+++ b/examples/Example_Mobius_spectrum.cc
@@ -3,9 +3,6 @@
 * without regression / tests being applied
 */

-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -313,4 +310,5 @@ int main (int argc, char ** argv)
  Grid_finalize();
 }

-#endif
+
+
--- a/examples/Example_christoph.cc
+++ b/examples/Example_christoph.cc
@@ -3,9 +3,6 @@
 * without regression / tests being applied
 */

-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -435,4 +432,5 @@ int main (int argc, char ** argv)
  Grid_finalize();
 }

-#endif
+
+
--- a/examples/Example_wall_wall_3pt.cc
+++ b/examples/Example_wall_wall_3pt.cc
@@ -3,9 +3,6 @@
 * without regression / tests being applied
 */

-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -538,4 +535,5 @@ int main (int argc, char ** argv)
  Grid_finalize();
 }

-#endif
+
+
--- a/examples/Example_wall_wall_spectrum.cc
+++ b/examples/Example_wall_wall_spectrum.cc
@@ -3,9 +3,6 @@
 * without regression / tests being applied
 */

-#include "disable_examples_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -432,4 +429,5 @@ int main (int argc, char ** argv)
  Grid_finalize();
 }

-#endif
+
+
--- a/examples/disable_examples_without_instantiations.h
+++ b/examples/disable_examples_without_instantiations.h
@@ -1,15 +0,0 @@
-#include <Grid/Grid.h>
-#pragma once
-
-#ifndef ENABLE_FERMION_INSTANTIATIONS
-#include <iostream>
-
-int main(void) {
-  std::cout << "This build of Grid was configured to exclude fermion instantiations, "
-	    << "which this example relies on. "
-	    << "Please reconfigure and rebuild Grid with --enable-fermion-instantiations"
-	    << "to run this example."
-	    << std::endl;
-  return 1;
-}
-#endif
--- a/systems/Aurora/tests/reproBigJob.pbs
+++ b/systems/Aurora/tests/reproBigJob.pbs
@@ -51,7 +51,7 @@ EOF
 CMD="mpiexec -np 384 -ppn 12  -envall --hostfile nodefile \
 	     ../gpu_tile.sh \
 	     $BINARY --mpi 4.4.4.6 --grid 64.64.64.96  \
-		--shm-mpi 0 --comms-overlap --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --log Message "
+		--shm-mpi 0 --comms-overlap --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --log Message --debug-stdout "

 echo $CMD > command-line
 env > environment
--- a/systems/Aurora/tests/reproBigJob256.pbs
+++ b/systems/Aurora/tests/reproBigJob256.pbs
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+#PBS -l select=256
+#PBS -q run_next
+##PBS -A LatticeQCD_aesp_CNDA
+#PBS -A LatticeFlavor
+#PBS -l walltime=06:00:00
+#PBS -N reproBigJob
+#PBS -k doe
+#PBS -l filesystems=flare
+#PBS -l filesystems=home
+
+#export OMP_PROC_BIND=spread
+#unset OMP_PLACES
+
+
+# 56 cores / 6 threads ~9
+export OMP_NUM_THREADS=6
+
+export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=1
+export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
+export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
+
+export GRID_PRINT_ENTIRE_LOG=0
+export GRID_CHECKSUM_RECV_BUF=1
+export GRID_CHECKSUM_SEND_BUF=1
+
+export MPIR_CVAR_CH4_IPC_GPU_HANDLE_CACHE=generic
+export MPIR_CVAR_NOLOCAL=1
+
+cd $PBS_O_WORKDIR
+
+source ../sourceme.sh
+
+cp $PBS_NODEFILE nodefile
+
+DIR=reproBigJob.$PBS_JOBID
+
+mkdir -p $DIR
+cd $DIR
+
+cp $PBS_NODEFILE nodefile
+
+BINARY=../Test_dwf_mixedcg_prec
+
+echo > pingjob <<EOF
+while read node ; 
+do
+	echo ssh $node killall -HUP Test_dwf_mixedcg_prec
+done < nodefile
+EOF
+
+CMD="mpiexec -np 3072 -ppn 12  -envall --hostfile nodefile \
+	     ../gpu_tile.sh \
+	     $BINARY --mpi 8.8.8.6 --grid 128.128.128.288  \
+		--shm-mpi 0 --comms-overlap --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 18000 --log Message --debug-stdout "
+
+echo $CMD > command-line
+env > environment
+$CMD
+grep Oops */Grid.stderr.* > failures.$PBS_JOBID
+
--- a/systems/mac-arm/config-command
+++ b/systems/mac-arm/config-command
@@ -0,0 +1,12 @@
+CXX=mpicxx CXXFLAGS=-I/opt/local/include LDFLAGS=-L/opt/local/lib/ ../../configure \
+	      --enable-simd=GEN \
+	      --enable-comms=mpi3 \
+	      --enable-debug \
+	      --enable-unified=yes \
+	      --prefix /Users/peterboyle/QCD/BugHunt/install \
+	      --with-lime=/Users/peterboyle/QCD/SciDAC/install/ \
+	      --with-openssl=$BREW \
+	      --disable-fermion-reps \
+	      --disable-gparity \
+	      --enable-debug
+
--- a/tests/Test_cayley_even_odd_vec.cc
+++ b/tests/Test_cayley_even_odd_vec.cc
@@ -25,9 +25,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_tests_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -276,6 +273,8 @@ void  TestWhat(What & Ddwf,

  err = phi-chi;
  std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<< std::endl;
+
+  
 }

-#endif
+
--- a/tests/Test_compressed_lanczos_hot_start.cc
+++ b/tests/Test_compressed_lanczos_hot_start.cc
@@ -30,9 +30,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 *  Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features 
 *  in Grid that were intended to be used to support blocked Aggregates, from
 */
-#include "disable_tests_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
@@ -259,4 +256,3 @@ int main (int argc, char ** argv) {
  Grid_finalize();
 }

-#endif
--- a/tests/Test_dwf_dslash_repro.cc
+++ b/tests/Test_dwf_dslash_repro.cc
@@ -25,9 +25,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_tests_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -240,5 +237,3 @@ int main (int argc, char ** argv)
  
  Grid_finalize();
 }
-
-#endif
--- a/tests/Test_dwf_mixedcg_prec.cc
+++ b/tests/Test_dwf_mixedcg_prec.cc
@@ -25,9 +25,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_tests_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -85,6 +82,7 @@ int main (int argc, char ** argv)
  
  Grid_init(&argc,&argv);

+  std::cout << GridLogMessage<<" in main(): Grid is initialised"<<std::endl;
  const int Ls=12;

  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
@@ -97,6 +95,7 @@ int main (int argc, char ** argv)
  GridCartesian         * FGrid_f   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f);
  GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f);
  
+  std::cout << GridLogMessage<<" in main(): making RNGs"<<std::endl;
  std::vector<int> seeds4({1,2,3,4});
  std::vector<int> seeds5({5,6,7,8});
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
@@ -155,7 +154,7 @@ int main (int argc, char ** argv)
  time_t start = time(NULL);
  UGrid->Broadcast(0,(void *)&start,sizeof(start));

-  FlightRecorder::ContinueOnFail = 0;
+  FlightRecorder::ContinueOnFail = 1;
  FlightRecorder::PrintEntireLog = 0;
  FlightRecorder::ChecksumComms  = 0;
  FlightRecorder::ChecksumCommsSend=0;
@@ -225,5 +224,3 @@ int main (int argc, char ** argv)
  
  Grid_finalize();
 }
-
-#endif
--- a/tests/Test_dwf_mixedcg_prec_halfcomms.cc
+++ b/tests/Test_dwf_mixedcg_prec_halfcomms.cc
@@ -25,9 +25,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include "disable_tests_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS
-
 #include <Grid/Grid.h>

 using namespace std;
@@ -121,4 +118,3 @@ int main (int argc, char ** argv)
  Grid_finalize();
 }
 #endif
-#endif
--- a/tests/Test_meson_field.cc
+++ b/tests/Test_meson_field.cc
@@ -24,8 +24,6 @@ with this program; if not, write to the Free Software Foundation, Inc.,

 See the full license in the file "LICENSE" in the top level distribution directory
 *************************************************************************************/
-#include "disable_tests_without_instantiations.h"
-#ifdef ENABLE_FERMION_INSTANTIATIONS

 #include <Grid/Grid.h>
 #include <Grid/qcd/utils/A2Autils.h>
@@ -159,5 +157,3 @@ int main(int argc, char *argv[])
  
  return EXIT_SUCCESS;
 }
-
-#endif
--- a/tests/debug/Test_general_coarse_hdcg.cc
+++ b/tests/debug/Test_general_coarse_hdcg.cc
@@ -128,10 +128,6 @@ int main (int argc, char ** argv)
  typedef HermOpAdaptor<LatticeFermionD> HermFineMatrix;
  HermFineMatrix FineHermOp(HermOpEO);

-  LatticeFermionD src(FrbGrid); 
-  src = ComplexD(1.0);
-  PowerMethod<LatticeFermionD>       PM;   PM(HermOpEO,src);
-
  ////////////////////////////////////////////////////////////
  ///////////// Coarse basis and Little Dirac Operator ///////
  ////////////////////////////////////////////////////////////
@@ -154,7 +150,7 @@ int main (int argc, char ** argv)
  std::cout << "**************************************"<<std::endl;
  std::cout << "Create Subspace"<<std::endl;
  std::cout << "**************************************"<<std::endl;
-  Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,35.,0.01,500);// <== last run
+  Aggregates.CreateSubspaceChebyshevNew(RNG5,HermOpEO,95.); 

  std::cout << "**************************************"<<std::endl;
  std::cout << "Refine Subspace"<<std::endl;
@@ -189,7 +185,7 @@ int main (int argc, char ** argv)
  std::cout << "**************************************"<<std::endl;

  typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> MrhsHermMatrix;
-  Chebyshev<CoarseVector>      IRLCheby(0.01,16.0,201);  // 1 iter
+  Chebyshev<CoarseVector>      IRLCheby(0.05,40.0,101);  // 1 iter
  MrhsHermMatrix MrhsCoarseOp     (mrhs);

  CoarseVector pm_src(CoarseMrhs);
@@ -197,10 +193,10 @@ int main (int argc, char ** argv)
  PowerMethod<CoarseVector>       cPM;
  cPM(MrhsCoarseOp,pm_src);

-  //  int Nk=16;
-  //  int Nm=Nk*3;
-  int Nk=32;
-  int Nm=128;
+  int Nk=nrhs;
+  int Nm=Nk*3;
+  //  int Nk=36;
+  //  int Nm=144;
  int Nstop=Nk;
  int Nconv_test_interval=1;
  
@@ -214,7 +210,7 @@ int main (int argc, char ** argv)
 							  nrhs,
 							  Nk,
 							  Nm,
-							  1e-4,100);
+							  1e-4,10);

  int Nconv;
  std::vector<RealD>            eval(Nm);
@@ -235,6 +231,8 @@ int main (int argc, char ** argv)
  std::cout << "**************************************"<<std::endl;
  std::cout << " Recompute coarse evecs  "<<std::endl;
  std::cout << "**************************************"<<std::endl;
+  evec.resize(Nm,Coarse5d);
+  eval.resize(Nm);
  for(int r=0;r<nrhs;r++){
    random(CRNG,c_src[r]);
  }
@@ -245,7 +243,7 @@ int main (int argc, char ** argv)
  // Deflation guesser object
  ///////////////////////
  std::cout << "**************************************"<<std::endl;
-  std::cout << " Reimport coarse evecs "<<evec.size()<<" "<<eval.size()<<std::endl;
+  std::cout << " Reimport coarse evecs  "<<std::endl;
  std::cout << "**************************************"<<std::endl;
  MultiRHSDeflation<CoarseVector> MrhsGuesser;
  MrhsGuesser.ImportEigenBasis(evec,eval);
@@ -254,11 +252,9 @@ int main (int argc, char ** argv)
  // Extra HDCG parameters
  //////////////////////////
  int maxit=3000;
-  //  ConjugateGradient<CoarseVector>  CG(2.0e-1,maxit,false);
-  //  ConjugateGradient<CoarseVector>  CG(1.0e-2,maxit,false);
-  ConjugateGradient<CoarseVector>  CG(5.0e-2,maxit,false);
-  RealD lo=0.2;
-  int ord = 7;
+  ConjugateGradient<CoarseVector>  CG(2.0e-1,maxit,false);
+  RealD lo=2.0;
+  int ord = 9;

  DoNothingGuesser<CoarseVector> DoNothing;
  HPDSolver<CoarseVector> HPDSolveMrhs(MrhsCoarseOp,CG,DoNothing);
@@ -304,19 +300,6 @@ int main (int argc, char ** argv)
    ConjugateGradient<LatticeFermionD>  CGfine(1.0e-8,30000,false);
    CGfine(HermOpEO, src, result);
  }
-  {
-    std::cout << "**************************************"<<std::endl;
-    std::cout << "Calling MdagM CG"<<std::endl;
-    std::cout << "**************************************"<<std::endl;
-      
-    LatticeFermion result(FGrid); result=Zero();
-    LatticeFermion    src(FGrid); random(RNG5,src);
-    result=Zero();
-
-    MdagMLinearOperator<MobiusFermionD, LatticeFermionD> HermOp(Ddwf);
-    ConjugateGradient<LatticeFermionD>  CGfine(1.0e-8,30000,false);
-    CGfine(HermOp, src, result);
-  }
 #endif  
  Grid_finalize();
  return 0;
--- a/tests/debug/Test_general_coarse_pvdagm.cc
+++ b/tests/debug/Test_general_coarse_pvdagm.cc
@@ -368,10 +368,7 @@ int main (int argc, char ** argv)
  TrivialPrecon<CoarseVector> simple;
  NonHermitianLinearOperator<LittleDiracOperator,CoarseVector> LinOpCoarse(LittleDiracOpPV);
  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-4, 100, LinOpCoarse,simple,10,10); 
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(3.0e-2, 100, LinOpCoarse,simple,12,12);  // 35 outer
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(5.0e-2, 100, LinOpCoarse,simple,12,12);  // 36 outer, 12s
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-1, 100, LinOpCoarse,simple,12,12);  // 36 ; 11s   
-  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(3.0e-1, 100, LinOpCoarse,simple,12,12);     
+  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(3.0e-2, 100, LinOpCoarse,simple,10,10); 
  L2PGCR.Level(3);
  c_res=Zero();
  L2PGCR(c_src,c_res);
@@ -403,7 +400,7 @@ int main (int argc, char ** argv)
 			    LinOpCoarse,
 			    L2PGCR);
  
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,100,PVdagM,TwoLevelPrecon,10,10);
+  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,PVdagM,TwoLevelPrecon,16,16);
  L1PGCR.Level(1);

  f_res=Zero();
--- a/tests/debug/Test_general_coarse_pvdagm_svd.cc
+++ b/tests/debug/Test_general_coarse_pvdagm_svd.cc
@@ -1,493 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./tests/Test_padded_cell.cc
-
-    Copyright (C) 2023
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/Grid.h>
-#include <Grid/lattice/PaddedCell.h>
-#include <Grid/stencil/GeneralLocalStencil.h>
-
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
-#include <Grid/algorithms/iterative/BiCGSTAB.h>
-
-using namespace std;
-using namespace Grid;
-
-template<class Matrix,class Field>
-class PVdagMLinearOperator : public LinearOperatorBase<Field> {
-  Matrix &_Mat;
-  Matrix &_PV;
-public:
-  PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
-
-  void OpDiag (const Field &in, Field &out) {    assert(0);  }
-  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
-  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
-  void Op     (const Field &in, Field &out){
-    //    std::cout << GridLogMessage<< "Op: PVdag M "<<std::endl;
-    Field tmp(in.Grid());
-    _Mat.M(in,tmp);
-    _PV.Mdag(tmp,out);
-  }
-  void AdjOp     (const Field &in, Field &out){
-    //    std::cout << GridLogMessage<<"AdjOp: Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    _PV.M(in,tmp);
-    _Mat.Mdag(tmp,out);
-  }
-  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
-    assert(0);
-  }
-  void HermOp(const Field &in, Field &out){
-    //    std::cout <<GridLogMessage<< "HermOp: Mdag PV PVdag M"<<std::endl;
-    Field tmp(in.Grid());
-    Op(in,tmp);
-    AdjOp(tmp,out);
-    //    std::cout << "HermOp done "<<norm2(out)<<std::endl;
-  }
-};
-template<class Matrix,class Field>
-class MdagPVLinearOperator : public LinearOperatorBase<Field> {
-  Matrix &_Mat;
-  Matrix &_PV;
-public:
-  MdagPVLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
-
-  void OpDiag (const Field &in, Field &out) {    assert(0);  }
-  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
-  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
-  void Op     (const Field &in, Field &out){
-    Field tmp(in.Grid());
-    //    std::cout <<GridLogMessage<< "Op: PVdag M "<<std::endl;
-    _PV.M(in,tmp);
-    _Mat.Mdag(tmp,out);
-  }
-  void AdjOp     (const Field &in, Field &out){
-    //    std::cout <<GridLogMessage<< "AdjOp: Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    _Mat.M(in,tmp);
-    _PV.Mdag(tmp,out);
-  }
-  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
-    assert(0);
-  }
-  void HermOp(const Field &in, Field &out){
-    //    std::cout << GridLogMessage<<"HermOp: PVdag M Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    Op(in,tmp);
-    AdjOp(tmp,out);
-    //    std::cout << "HermOp done "<<norm2(out)<<std::endl;
-  }
-};
-template<class Matrix,class Field>
-class ShiftedPVdagMLinearOperator : public LinearOperatorBase<Field> {
-  Matrix &_Mat;
-  Matrix &_PV;
-  RealD shift;
-public:
-  ShiftedPVdagMLinearOperator(RealD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
-
-  void OpDiag (const Field &in, Field &out) {    assert(0);  }
-  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
-  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
-  void Op     (const Field &in, Field &out){
-    //    std::cout << "Op: PVdag M "<<std::endl;
-    Field tmp(in.Grid());
-    _Mat.M(in,tmp);
-    _PV.Mdag(tmp,out);
-    out = out + shift * in;
-  }
-  void AdjOp     (const Field &in, Field &out){
-    //    std::cout << "AdjOp: Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    _PV.M(tmp,out);
-    _Mat.Mdag(in,tmp);
-    out = out + shift * in;
-  }
-  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
-  void HermOp(const Field &in, Field &out){
-    //    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
-    Field tmp(in.Grid());
-    Op(in,tmp);
-    AdjOp(tmp,out);
-  }
-};
-template<class Fobj,class CComplex,int nbasis>
-class MGPreconditionerSVD : public LinearFunction< Lattice<Fobj> > {
-public:
-  using LinearFunction<Lattice<Fobj> >::operator();
-
-  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
-  typedef LinearOperatorBase<FineField>                            FineOperator;
-  typedef LinearFunction    <FineField>                            FineSmoother;
-  typedef LinearOperatorBase<CoarseVector>                         CoarseOperator;
-  typedef LinearFunction    <CoarseVector>                         CoarseSolver;
-  Aggregates     & _FineToCoarse;
-  Aggregates     & _CoarseToFine;
-  FineOperator   & _FineOperator;
-  FineSmoother   & _PreSmoother;
-  FineSmoother   & _PostSmoother;
-  CoarseOperator & _CoarseOperator;
-  CoarseSolver   & _CoarseSolve;
-
-  int    level;  void Level(int lv) {level = lv; };
-
-  MGPreconditionerSVD(Aggregates &FtoC,
-		      Aggregates &CtoF,
-		      FineOperator &Fine,
-		      FineSmoother &PreSmoother,
-		      FineSmoother &PostSmoother,
-		      CoarseOperator &CoarseOperator_,
-		      CoarseSolver &CoarseSolve_)
-    : _FineToCoarse(FtoC),
-      _CoarseToFine(CtoF),
-      _FineOperator(Fine),
-      _PreSmoother(PreSmoother),
-      _PostSmoother(PostSmoother),
-      _CoarseOperator(CoarseOperator_),
-      _CoarseSolve(CoarseSolve_),
-      level(1)  {  }
-
-  virtual void operator()(const FineField &in, FineField & out) 
-  {
-    GridBase *CoarseGrid = _FineToCoarse.CoarseGrid;
-    //    auto CoarseGrid = _CoarseOperator.Grid();
-    CoarseVector Csrc(CoarseGrid);
-    CoarseVector Csol(CoarseGrid);
-    FineField vec1(in.Grid());
-    FineField vec2(in.Grid());
-
-    std::cout<<GridLogMessage << "Calling PreSmoother " <<std::endl;
-
-    //    std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
-    double t;
-    // Fine Smoother
-    //    out = in;
-    out = Zero();
-    t=-usecond();
-    _PreSmoother(in,out);
-    t+=usecond();
-
-    std::cout<<GridLogMessage << "PreSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Update the residual
-    _FineOperator.Op(out,vec1);  sub(vec1, in ,vec1);   
-    //    std::cout<<GridLogMessage <<"Residual-1 now " <<norm2(vec1)<<std::endl;
-
-    // Fine to Coarse 
-    t=-usecond();
-    _FineToCoarse.ProjectToSubspace  (Csrc,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse correction
-    t=-usecond();
-    Csol = Zero();
-    _CoarseSolve(Csrc,Csol);
-    //Csol=Zero();
-    t+=usecond();
-    std::cout<<GridLogMessage << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse to Fine
-    t=-usecond();  
-    //    _CoarseOperator.PromoteFromSubspace(_Aggregates,Csol,vec1);
-    _CoarseToFine.PromoteFromSubspace(Csol,vec1); 
-    add(out,out,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Residual
-    _FineOperator.Op(out,vec1);  sub(vec1 ,in , vec1);  
-    //    std::cout<<GridLogMessage <<"Residual-2 now " <<norm2(vec1)<<std::endl;
-
-    // Fine Smoother
-    t=-usecond();
-    //    vec2=vec1;
-    vec2=Zero();
-    _PostSmoother(vec1,vec2);
-    t+=usecond();
-    std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    add( out,out,vec2);
-    std::cout<<GridLogMessage << "Done " <<std::endl;
-  }
-};
-
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  const int Ls=16;
-
-  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
-  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
-
-  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
-  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
-
-  // Construct a coarsened grid
-  Coordinate clatt = GridDefaultLatt();
-  for(int d=0;d<clatt.size();d++){
-    clatt[d] = clatt[d]/2;
-    //    clatt[d] = clatt[d]/4;
-  }
-  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
-  GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
-
-  std::vector<int> seeds4({1,2,3,4});
-  std::vector<int> seeds5({5,6,7,8});
-  std::vector<int> cseeds({5,6,7,8});
-  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
-  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
-  GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
-
-  LatticeFermion    src(FGrid); random(RNG5,src);
-  LatticeFermion result(FGrid); result=Zero();
-  LatticeFermion    ref(FGrid); ref=Zero();
-  LatticeFermion    tmp(FGrid);
-  LatticeFermion    err(FGrid);
-  LatticeGaugeField Umu(UGrid);
-
-  FieldMetaData header;
-  std::string file("ckpoint_lat.4000");
-  NerscIO::readConfiguration(Umu,header,file);
-  
-  RealD mass=0.01;
-  RealD M5=1.8;
-
-  DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);
-
-  const int nbasis = 30;
-  const int cb = 0 ;
-
-
-  NextToNearestStencilGeometry5D geom(Coarse5d);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-
-  typedef PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM_t;
-  typedef MdagPVLinearOperator<DomainWallFermionD,LatticeFermionD> MdagPV_t;
-  typedef ShiftedPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedPVdagM_t;
-  PVdagM_t PVdagM(Ddwf,Dpv);
-  MdagPV_t MdagPV(Ddwf,Dpv);
-  //  ShiftedPVdagM_t ShiftedPVdagM(2.0,Ddwf,Dpv); // 355
-  //  ShiftedPVdagM_t ShiftedPVdagM(1.0,Ddwf,Dpv); // 246
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.5,Ddwf,Dpv); // 183
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 145
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 134
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 127 -- NULL space via inverse iteration
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 57 -- NULL space via inverse iteration; 3 iterations
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 57 , tighter inversion
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 49 iters
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 70 iters; asymmetric 
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 58; Loosen coarse, tighten fine
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 56 ... 
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 51 ...  with 24 vecs
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 31 ...  with 24 vecs and 2^4 blocking
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 43 ...  with 16 vecs and 2^4 blocking, sloppier
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35  ...  with 20 vecs and 2^4 blocking
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35  ...  with 20 vecs and 2^4 blocking, looser coarse
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 64  ...  with 20 vecs, Christoph setup, and 2^4 blocking, looser coarse
-  ShiftedPVdagM_t ShiftedPVdagM(0.01,Ddwf,Dpv); // 
-
-
-  // Run power method on HOA??
-  PowerMethod<LatticeFermion>       PM;
-  //  PM(PVdagM,src);
-  //  PM(MdagPV,src);
- 
-  // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
-  typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
-  Subspace V(Coarse5d,FGrid,cb);
-  Subspace U(Coarse5d,FGrid,cb);
-
-  // Breeds right singular vectors with call to HermOp (V)
-  V.CreateSubspaceChebyshev(RNG5,PVdagM,
-			    nbasis,
-			    4000.0,0.003,
-			    500);
-
-  // Breeds left singular vectors with call to HermOp (U)
-  //  U.CreateSubspaceChebyshev(RNG5,PVdagM,
-  U.CreateSubspaceChebyshev(RNG5,MdagPV,
-			    nbasis,
-			    4000.0,0.003,
-			    500);
-  
-  typedef Aggregation<vSpinColourVector,vTComplex,2*nbasis> CombinedSubspace;
-  CombinedSubspace CombinedUV(Coarse5d,FGrid,cb);
-  for(int b=0;b<nbasis;b++){
-    CombinedUV.subspace[b]        = V.subspace[b];
-    CombinedUV.subspace[b+nbasis] = U.subspace[b];
-  }
-
-  int bl, br;
-  std::cout <<" <V| PVdagM| V> " <<std::endl;
-  for(bl=0;bl<nbasis;bl++){
-  for(br=0;br<nbasis;br++){
-    PVdagM.Op(V.subspace[br],src);
-    std::cout <<bl<<" "<<br<<"\t"<<innerProduct(V.subspace[bl],src)<<std::endl;
-  }}
-  std::cout <<" <V| PVdagM| U> " <<std::endl;
-  for(bl=0;bl<nbasis;bl++){
-  for(br=0;br<nbasis;br++){
-    PVdagM.Op(U.subspace[br],src);
-    std::cout <<bl<<" "<<br<<"\t"<<innerProduct(V.subspace[bl],src)<<std::endl;
-  }}
-  std::cout <<" <U| PVdagM| V> " <<std::endl;
-  for(bl=0;bl<nbasis;bl++){
-  for(br=0;br<nbasis;br++){
-    PVdagM.Op(V.subspace[br],src);
-    std::cout <<bl<<" "<<br<<"\t"<<innerProduct(U.subspace[bl],src)<<std::endl;
-  }}
-  std::cout <<" <U| PVdagM| U> " <<std::endl;
-  for(bl=0;bl<nbasis;bl++){
-  for(br=0;br<nbasis;br++){
-    PVdagM.Op(U.subspace[br],src);
-    std::cout <<bl<<" "<<br<<"\t"<<innerProduct(U.subspace[bl],src)<<std::endl;
-  }}
-
-  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperatorV;
-  typedef LittleDiracOperatorV::CoarseVector CoarseVectorV;
-  
-  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,2*nbasis> LittleDiracOperator;
-  typedef LittleDiracOperator::CoarseVector CoarseVector;
-
-  V.Orthogonalise();
-  for(int b =0 ; b<nbasis;b++){
-    CoarseVectorV c_src (Coarse5d);
-    V.ProjectToSubspace  (c_src,U.subspace[b]);
-    V.PromoteFromSubspace(c_src,src);
-    std::cout << " Completeness of U in V ["<< b<<"] "<< std::sqrt(norm2(src)/norm2(U.subspace[b]))<<std::endl;
-  }
-  
-  CoarseVector c_src (Coarse5d);
-  CoarseVector c_res (Coarse5d);
-  CoarseVector c_proj(Coarse5d);
-  LittleDiracOperator LittleDiracOpPV(geom,FGrid,Coarse5d);
-  LittleDiracOpPV.CoarsenOperator(PVdagM,CombinedUV,CombinedUV);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"Testing coarsened operator "<<std::endl;
-  
-  Complex one(1.0);
-  c_src = one;  // 1 in every element for vector 1.
-
-  blockPromote(c_src,err,CombinedUV.subspace);
-
-  LatticeFermion prom(FGrid);
-  prom=Zero();
-  for(int b=0;b<nbasis*2;b++){
-    prom=prom+CombinedUV.subspace[b];
-  }
-
-  std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl;
-  std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl;
-
-  PVdagM.Op(prom,tmp);
-  blockProject(c_proj,tmp,CombinedUV.subspace);
-  std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl;
-
-  LittleDiracOpPV.M(c_src,c_res);
-  std::cout<<GridLogMessage<<" Called Little Dirac Op c_src "<< norm2(c_src) << "  c_res "<< norm2(c_res) <<std::endl;
-
-  std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl;
-
-  std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl;
-
-  c_proj = c_proj - c_res;
-  std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl;
-
-
-  /**********
-   * Some solvers
-   **********
-   */
-
-  ///////////////////////////////////////
-  // Coarse grid solver test
-  ///////////////////////////////////////
-
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Coarse Grid Solve -- Level 3 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<CoarseVector> simple;
-  NonHermitianLinearOperator<LittleDiracOperator,CoarseVector> LinOpCoarse(LittleDiracOpPV);
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-4, 100, LinOpCoarse,simple,10,10); 
-  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-2, 10, LinOpCoarse,simple,20,20); 
-  L2PGCR.Level(3);
-  c_res=Zero();
-  L2PGCR(c_src,c_res);
-
-  ////////////////////////////////////////
-  // Fine grid smoother
-  ////////////////////////////////////////
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Fine Grid Smoother -- Level 2 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<LatticeFermionD> simple_fine;
-  //  NonHermitianLinearOperator<PVdagM_t,LatticeFermionD> LinOpSmooth(PVdagM);
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.01,1,ShiftedPVdagM,simple_fine,16,16);
-  SmootherGCR.Level(2);
-  
-  LatticeFermionD f_src(FGrid);
-  LatticeFermionD f_res(FGrid);
-
-  f_src = one;  // 1 in every element for vector 1.
-  f_res=Zero();
-  SmootherGCR(f_src,f_res);
-
-  typedef MGPreconditionerSVD<vSpinColourVector,  vTComplex,nbasis*2> TwoLevelMG;
-
-  TwoLevelMG TwoLevelPrecon(CombinedUV,CombinedUV,
-			    PVdagM,
-			    simple_fine,
-			    SmootherGCR,
-			    LinOpCoarse,
-			    L2PGCR);
-  
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,PVdagM,TwoLevelPrecon,20,20);
-  L1PGCR.Level(1);
-
-  f_res=Zero();
-  L1PGCR(f_src,f_res);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage << "Done "<< std::endl;
-
-  Grid_finalize();
-  return 0;
-}
--- a/tests/debug/Test_general_coarse_pvdagm_svd_cg.cc
+++ b/tests/debug/Test_general_coarse_pvdagm_svd_cg.cc
@@ -1,492 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./tests/Test_padded_cell.cc
-
-    Copyright (C) 2023
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/Grid.h>
-#include <Grid/lattice/PaddedCell.h>
-#include <Grid/stencil/GeneralLocalStencil.h>
-
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
-#include <Grid/algorithms/iterative/BiCGSTAB.h>
-
-using namespace std;
-using namespace Grid;
-
-template<class Matrix,class Field>
-class PVdagMLinearOperator : public LinearOperatorBase<Field> {
-  Matrix &_Mat;
-  Matrix &_PV;
-public:
-  PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
-
-  void OpDiag (const Field &in, Field &out) {    assert(0);  }
-  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
-  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
-  void Op     (const Field &in, Field &out){
-    //    std::cout << GridLogMessage<< "Op: PVdag M "<<std::endl;
-    Field tmp(in.Grid());
-    _Mat.M(in,tmp);
-    _PV.Mdag(tmp,out);
-  }
-  void AdjOp     (const Field &in, Field &out){
-    //    std::cout << GridLogMessage<<"AdjOp: Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    _PV.M(in,tmp);
-    _Mat.Mdag(tmp,out);
-  }
-  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
-    HermOp(in,out);
-    ComplexD dot = innerProduct(in,out);
-    n1=real(dot);
-    n2=norm2(out);
-  }
-  void HermOp(const Field &in, Field &out){
-    //    std::cout <<GridLogMessage<< "HermOp: Mdag PV PVdag M"<<std::endl;
-    Field tmp(in.Grid());
-    Op(in,tmp);
-    AdjOp(tmp,out);
-    //    std::cout << "HermOp done "<<norm2(out)<<std::endl;
-  }
-};
-template<class Matrix,class Field>
-class MdagPVLinearOperator : public LinearOperatorBase<Field> {
-  Matrix &_Mat;
-  Matrix &_PV;
-public:
-  MdagPVLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
-
-  void OpDiag (const Field &in, Field &out) {    assert(0);  }
-  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
-  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
-  void Op     (const Field &in, Field &out){
-    Field tmp(in.Grid());
-    //    std::cout <<GridLogMessage<< "Op: PVdag M "<<std::endl;
-    _PV.M(in,tmp);
-    _Mat.Mdag(tmp,out);
-  }
-  void AdjOp     (const Field &in, Field &out){
-    //    std::cout <<GridLogMessage<< "AdjOp: Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    _Mat.M(in,tmp);
-    _PV.Mdag(tmp,out);
-  }
-  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
-    ComplexD dot = innerProduct(in,out);
-    n1=real(dot);
-    n2=norm2(out);
-  }
-  void HermOp(const Field &in, Field &out){
-    //    std::cout << GridLogMessage<<"HermOp: PVdag M Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    Op(in,tmp);
-    AdjOp(tmp,out);
-    //    std::cout << "HermOp done "<<norm2(out)<<std::endl;
-  }
-};
-template<class Matrix,class Field>
-class ShiftedPVdagMLinearOperator : public LinearOperatorBase<Field> {
-  Matrix &_Mat;
-  Matrix &_PV;
-  RealD shift;
-public:
-  ShiftedPVdagMLinearOperator(RealD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
-
-  void OpDiag (const Field &in, Field &out) {    assert(0);  }
-  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
-  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
-  void Op     (const Field &in, Field &out){
-    //    std::cout << "Op: PVdag M "<<std::endl;
-    Field tmp(in.Grid());
-    _Mat.M(in,tmp);
-    _PV.Mdag(tmp,out);
-    out = out + shift * in;
-  }
-  void AdjOp     (const Field &in, Field &out){
-    //    std::cout << "AdjOp: Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    _PV.M(tmp,out);
-    _Mat.Mdag(in,tmp);
-    out = out + shift * in;
-  }
-  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
-  void HermOp(const Field &in, Field &out){
-    //    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
-    Field tmp(in.Grid());
-    Op(in,tmp);
-    AdjOp(tmp,out);
-  }
-};
-template<class Fobj,class CComplex,int nbasis>
-class MGPreconditionerSVD : public LinearFunction< Lattice<Fobj> > {
-public:
-  using LinearFunction<Lattice<Fobj> >::operator();
-
-  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
-  typedef LinearOperatorBase<FineField>                            FineOperator;
-  typedef LinearFunction    <FineField>                            FineSmoother;
-  typedef LinearOperatorBase<CoarseVector>                         CoarseOperator;
-  typedef LinearFunction    <CoarseVector>                         CoarseSolver;
-  Aggregates     & _FineToCoarse;
-  Aggregates     & _CoarseToFine;
-  FineOperator   & _FineOperator;
-  FineSmoother   & _PreSmoother;
-  FineSmoother   & _PostSmoother;
-  CoarseOperator & _CoarseOperator;
-  CoarseSolver   & _CoarseSolve;
-
-  int    level;  void Level(int lv) {level = lv; };
-
-  MGPreconditionerSVD(Aggregates &FtoC,
-		      Aggregates &CtoF,
-		      FineOperator &Fine,
-		      FineSmoother &PreSmoother,
-		      FineSmoother &PostSmoother,
-		      CoarseOperator &CoarseOperator_,
-		      CoarseSolver &CoarseSolve_)
-    : _FineToCoarse(FtoC),
-      _CoarseToFine(CtoF),
-      _FineOperator(Fine),
-      _PreSmoother(PreSmoother),
-      _PostSmoother(PostSmoother),
-      _CoarseOperator(CoarseOperator_),
-      _CoarseSolve(CoarseSolve_),
-      level(1)  {  }
-
-  virtual void operator()(const FineField &in, FineField & out) 
-  {
-    GridBase *CoarseGrid = _FineToCoarse.CoarseGrid;
-    //    auto CoarseGrid = _CoarseOperator.Grid();
-    CoarseVector Csrc(CoarseGrid);
-    CoarseVector Csol(CoarseGrid);
-    FineField vec1(in.Grid());
-    FineField vec2(in.Grid());
-
-    std::cout<<GridLogMessage << "Calling PreSmoother " <<std::endl;
-
-    //    std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
-    double t;
-    // Fine Smoother
-    //    out = in;
-    out = Zero();
-    t=-usecond();
-    _PreSmoother(in,out);
-    t+=usecond();
-
-    std::cout<<GridLogMessage << "PreSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Update the residual
-    _FineOperator.Op(out,vec1);  sub(vec1, in ,vec1);   
-    //    std::cout<<GridLogMessage <<"Residual-1 now " <<norm2(vec1)<<std::endl;
-
-    // Fine to Coarse 
-    t=-usecond();
-    _FineToCoarse.ProjectToSubspace  (Csrc,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse correction
-    t=-usecond();
-    Csol = Zero();
-    _CoarseSolve(Csrc,Csol);
-    //Csol=Zero();
-    t+=usecond();
-    std::cout<<GridLogMessage << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse to Fine
-    t=-usecond();  
-    //    _CoarseOperator.PromoteFromSubspace(_Aggregates,Csol,vec1);
-    _CoarseToFine.PromoteFromSubspace(Csol,vec1); 
-    add(out,out,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Residual
-    _FineOperator.Op(out,vec1);  sub(vec1 ,in , vec1);  
-    //    std::cout<<GridLogMessage <<"Residual-2 now " <<norm2(vec1)<<std::endl;
-
-    // Fine Smoother
-    t=-usecond();
-    //    vec2=vec1;
-    vec2=Zero();
-    _PostSmoother(vec1,vec2);
-    t+=usecond();
-    std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    add( out,out,vec2);
-    std::cout<<GridLogMessage << "Done " <<std::endl;
-  }
-};
-
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  const int Ls=16;
-
-  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
-  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
-
-  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
-  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
-
-  // Construct a coarsened grid
-  Coordinate clatt = GridDefaultLatt();
-  for(int d=0;d<clatt.size();d++){
-    clatt[d] = clatt[d]/2;
-    //    clatt[d] = clatt[d]/4;
-  }
-  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
-  GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
-
-  std::vector<int> seeds4({1,2,3,4});
-  std::vector<int> seeds5({5,6,7,8});
-  std::vector<int> cseeds({5,6,7,8});
-  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
-  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
-  GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
-
-  LatticeFermion    src(FGrid); random(RNG5,src);
-  LatticeFermion result(FGrid); result=Zero();
-  LatticeFermion    ref(FGrid); ref=Zero();
-  LatticeFermion    tmp(FGrid);
-  LatticeFermion    err(FGrid);
-  LatticeGaugeField Umu(UGrid);
-
-  FieldMetaData header;
-  std::string file("ckpoint_lat.4000");
-  NerscIO::readConfiguration(Umu,header,file);
-  
-  RealD mass=0.01;
-  RealD M5=1.8;
-
-  DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);
-
-  const int nbasis = 20;
-  const int cb = 0 ;
-
-
-  NextToNearestStencilGeometry5D geom(Coarse5d);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-
-  typedef PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM_t;
-  typedef MdagPVLinearOperator<DomainWallFermionD,LatticeFermionD> MdagPV_t;
-  typedef ShiftedPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedPVdagM_t;
-  PVdagM_t PVdagM(Ddwf,Dpv);
-  MdagPV_t MdagPV(Ddwf,Dpv);
-  //  ShiftedPVdagM_t ShiftedPVdagM(2.0,Ddwf,Dpv); // 355
-  //  ShiftedPVdagM_t ShiftedPVdagM(1.0,Ddwf,Dpv); // 246
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.5,Ddwf,Dpv); // 183
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 145
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 134
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 127 -- NULL space via inverse iteration
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 57 -- NULL space via inverse iteration; 3 iterations
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 57 , tighter inversion
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 49 iters
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 70 iters; asymmetric 
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 58; Loosen coarse, tighten fine
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 56 ... 
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 51 ...  with 24 vecs
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 31 ...  with 24 vecs and 2^4 blocking
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 43 ...  with 16 vecs and 2^4 blocking, sloppier
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35  ...  with 20 vecs and 2^4 blocking
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35  ...  with 20 vecs and 2^4 blocking, looser coarse
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 64  ...  with 20 vecs, Christoph setup, and 2^4 blocking, looser coarse
-  ShiftedPVdagM_t ShiftedPVdagM(0.01,Ddwf,Dpv); // 
-
-
-  // Run power method on HOA??
-  PowerMethod<LatticeFermion>       PM;
-  //  PM(PVdagM,src);
-  //  PM(MdagPV,src);
- 
-  // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
-  typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
-  Subspace V(Coarse5d,FGrid,cb);
-  Subspace U(Coarse5d,FGrid,cb);
-
-  // Breeds right singular vectors with call to HermOp (V)
-  V.CreateSubspace(RNG5,PVdagM,nbasis);
-
-  // Breeds left singular vectors with call to HermOp (U)
-  //  U.CreateSubspaceChebyshev(RNG5,MdagPV,
-  U.CreateSubspace(RNG5,PVdagM,nbasis);
-  
-  typedef Aggregation<vSpinColourVector,vTComplex,2*nbasis> CombinedSubspace;
-  CombinedSubspace CombinedUV(Coarse5d,FGrid,cb);
-  for(int b=0;b<nbasis;b++){
-    CombinedUV.subspace[b]        = V.subspace[b];
-    CombinedUV.subspace[b+nbasis] = U.subspace[b];
-  }
-
-  int bl, br;
-  std::cout <<" <V| PVdagM| V> " <<std::endl;
-  for(bl=0;bl<nbasis;bl++){
-  for(br=0;br<nbasis;br++){
-    PVdagM.Op(V.subspace[br],src);
-    std::cout <<bl<<" "<<br<<"\t"<<innerProduct(V.subspace[bl],src)<<std::endl;
-  }}
-  std::cout <<" <V| PVdagM| U> " <<std::endl;
-  for(bl=0;bl<nbasis;bl++){
-  for(br=0;br<nbasis;br++){
-    PVdagM.Op(U.subspace[br],src);
-    std::cout <<bl<<" "<<br<<"\t"<<innerProduct(V.subspace[bl],src)<<std::endl;
-  }}
-  std::cout <<" <U| PVdagM| V> " <<std::endl;
-  for(bl=0;bl<nbasis;bl++){
-  for(br=0;br<nbasis;br++){
-    PVdagM.Op(V.subspace[br],src);
-    std::cout <<bl<<" "<<br<<"\t"<<innerProduct(U.subspace[bl],src)<<std::endl;
-  }}
-  std::cout <<" <U| PVdagM| U> " <<std::endl;
-  for(bl=0;bl<nbasis;bl++){
-  for(br=0;br<nbasis;br++){
-    PVdagM.Op(U.subspace[br],src);
-    std::cout <<bl<<" "<<br<<"\t"<<innerProduct(U.subspace[bl],src)<<std::endl;
-  }}
-
-  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperatorV;
-  typedef LittleDiracOperatorV::CoarseVector CoarseVectorV;
-  
-  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,2*nbasis> LittleDiracOperator;
-  typedef LittleDiracOperator::CoarseVector CoarseVector;
-
-  V.Orthogonalise();
-  for(int b =0 ; b<nbasis;b++){
-    CoarseVectorV c_src (Coarse5d);
-    V.ProjectToSubspace  (c_src,U.subspace[b]);
-    V.PromoteFromSubspace(c_src,src);
-    std::cout << " Completeness of U in V ["<< b<<"] "<< std::sqrt(norm2(src)/norm2(U.subspace[b]))<<std::endl;
-  }
-  
-  CoarseVector c_src (Coarse5d);
-  CoarseVector c_res (Coarse5d);
-  CoarseVector c_proj(Coarse5d);
-  LittleDiracOperator LittleDiracOpPV(geom,FGrid,Coarse5d);
-  LittleDiracOpPV.CoarsenOperator(PVdagM,CombinedUV,CombinedUV);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"Testing coarsened operator "<<std::endl;
-  
-  Complex one(1.0);
-  c_src = one;  // 1 in every element for vector 1.
-
-  blockPromote(c_src,err,CombinedUV.subspace);
-
-  LatticeFermion prom(FGrid);
-  prom=Zero();
-  for(int b=0;b<nbasis*2;b++){
-    prom=prom+CombinedUV.subspace[b];
-  }
-
-  std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl;
-  std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl;
-
-  PVdagM.Op(prom,tmp);
-  blockProject(c_proj,tmp,CombinedUV.subspace);
-  std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl;
-
-  LittleDiracOpPV.M(c_src,c_res);
-  std::cout<<GridLogMessage<<" Called Little Dirac Op c_src "<< norm2(c_src) << "  c_res "<< norm2(c_res) <<std::endl;
-
-  std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl;
-
-  std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl;
-
-  c_proj = c_proj - c_res;
-  std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl;
-
-
-  /**********
-   * Some solvers
-   **********
-   */
-
-  ///////////////////////////////////////
-  // Coarse grid solver test
-  ///////////////////////////////////////
-
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Coarse Grid Solve -- Level 3 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<CoarseVector> simple;
-  NonHermitianLinearOperator<LittleDiracOperator,CoarseVector> LinOpCoarse(LittleDiracOpPV);
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-4, 100, LinOpCoarse,simple,10,10); 
-  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-2, 10, LinOpCoarse,simple,20,20); 
-  L2PGCR.Level(3);
-  c_res=Zero();
-  L2PGCR(c_src,c_res);
-
-  ////////////////////////////////////////
-  // Fine grid smoother
-  ////////////////////////////////////////
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Fine Grid Smoother -- Level 2 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<LatticeFermionD> simple_fine;
-  //  NonHermitianLinearOperator<PVdagM_t,LatticeFermionD> LinOpSmooth(PVdagM);
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.01,1,ShiftedPVdagM,simple_fine,16,16);
-  SmootherGCR.Level(2);
-  
-  LatticeFermionD f_src(FGrid);
-  LatticeFermionD f_res(FGrid);
-
-  f_src = one;  // 1 in every element for vector 1.
-  f_res=Zero();
-  SmootherGCR(f_src,f_res);
-
-  typedef MGPreconditionerSVD<vSpinColourVector,  vTComplex,nbasis*2> TwoLevelMG;
-
-  TwoLevelMG TwoLevelPrecon(CombinedUV,CombinedUV,
-			    PVdagM,
-			    simple_fine,
-			    SmootherGCR,
-			    LinOpCoarse,
-			    L2PGCR);
-  
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,PVdagM,TwoLevelPrecon,20,20);
-  L1PGCR.Level(1);
-
-  f_res=Zero();
-  L1PGCR(f_src,f_res);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage << "Done "<< std::endl;
-
-  Grid_finalize();
-  return 0;
-}
--- a/tests/debug/Test_general_coarse_pvdagm_svd_uv.cc
+++ b/tests/debug/Test_general_coarse_pvdagm_svd_uv.cc
@@ -1,479 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./tests/Test_padded_cell.cc
-
-    Copyright (C) 2023
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/Grid.h>
-#include <Grid/lattice/PaddedCell.h>
-#include <Grid/stencil/GeneralLocalStencil.h>
-
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
-#include <Grid/algorithms/iterative/BiCGSTAB.h>
-
-using namespace std;
-using namespace Grid;
-
-template<class Matrix,class Field>
-class PVdagMLinearOperator : public LinearOperatorBase<Field> {
-  Matrix &_Mat;
-  Matrix &_PV;
-public:
-  PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
-
-  void OpDiag (const Field &in, Field &out) {    assert(0);  }
-  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
-  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
-  void Op     (const Field &in, Field &out){
-    //    std::cout << GridLogMessage<< "Op: PVdag M "<<std::endl;
-    Field tmp(in.Grid());
-    _Mat.M(in,tmp);
-    _PV.Mdag(tmp,out);
-  }
-  void AdjOp     (const Field &in, Field &out){
-    //    std::cout << GridLogMessage<<"AdjOp: Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    _PV.M(in,tmp);
-    _Mat.Mdag(tmp,out);
-  }
-  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
-    assert(0);
-  }
-  void HermOp(const Field &in, Field &out){
-    //    std::cout <<GridLogMessage<< "HermOp: Mdag PV PVdag M"<<std::endl;
-    Field tmp(in.Grid());
-    Op(in,tmp);
-    AdjOp(tmp,out);
-    //    std::cout << "HermOp done "<<norm2(out)<<std::endl;
-  }
-};
-template<class Matrix,class Field>
-class MdagPVLinearOperator : public LinearOperatorBase<Field> {
-  Matrix &_Mat;
-  Matrix &_PV;
-public:
-  MdagPVLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
-
-  void OpDiag (const Field &in, Field &out) {    assert(0);  }
-  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
-  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
-  void Op     (const Field &in, Field &out){
-    Field tmp(in.Grid());
-    //    std::cout <<GridLogMessage<< "Op: PVdag M "<<std::endl;
-    _PV.M(in,tmp);
-    _Mat.Mdag(tmp,out);
-  }
-  void AdjOp     (const Field &in, Field &out){
-    //    std::cout <<GridLogMessage<< "AdjOp: Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    _Mat.M(in,tmp);
-    _PV.Mdag(tmp,out);
-  }
-  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
-    assert(0);
-  }
-  void HermOp(const Field &in, Field &out){
-    //    std::cout << GridLogMessage<<"HermOp: PVdag M Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    Op(in,tmp);
-    AdjOp(tmp,out);
-    //    std::cout << "HermOp done "<<norm2(out)<<std::endl;
-  }
-};
-template<class Matrix,class Field>
-class ShiftedPVdagMLinearOperator : public LinearOperatorBase<Field> {
-  Matrix &_Mat;
-  Matrix &_PV;
-  RealD shift;
-public:
-  ShiftedPVdagMLinearOperator(RealD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
-
-  void OpDiag (const Field &in, Field &out) {    assert(0);  }
-  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
-  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
-  void Op     (const Field &in, Field &out){
-    //    std::cout << "Op: PVdag M "<<std::endl;
-    Field tmp(in.Grid());
-    _Mat.M(in,tmp);
-    _PV.Mdag(tmp,out);
-    out = out + shift * in;
-  }
-  void AdjOp     (const Field &in, Field &out){
-    //    std::cout << "AdjOp: Mdag PV "<<std::endl;
-    Field tmp(in.Grid());
-    _PV.M(tmp,out);
-    _Mat.Mdag(in,tmp);
-    out = out + shift * in;
-  }
-  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
-  void HermOp(const Field &in, Field &out){
-    //    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
-    Field tmp(in.Grid());
-    Op(in,tmp);
-    AdjOp(tmp,out);
-  }
-};
-template<class Fobj,class CComplex,int nbasis>
-class MGPreconditionerSVD : public LinearFunction< Lattice<Fobj> > {
-public:
-  using LinearFunction<Lattice<Fobj> >::operator();
-
-  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
-  typedef LinearOperatorBase<FineField>                            FineOperator;
-  typedef LinearFunction    <FineField>                            FineSmoother;
-  typedef LinearOperatorBase<CoarseVector>                         CoarseOperator;
-  typedef LinearFunction    <CoarseVector>                         CoarseSolver;
-  ///////////////////////////////
-  // SVD is M = U S Vdag
-  //
-  // Define a subset of Vc and Uc in Complex_f,c  matrix
-  // - these are the coarsening, non-square matrices
-  //
-  // Solve a coarse approx to
-  //
-  //      M psi = eta
-  //
-  //  via
-  //
-  //  Uc^dag U S Vdag Vc Vc^dag psi = Uc^dag eta
-  //
-  //  M_coarse Vc^dag psi = M_coarse psi_c = eta_c
-  //  
-  ///////////////////////////////
-  Aggregates     & _U;
-  Aggregates     & _V;
-  FineOperator   & _FineOperator;
-  FineSmoother   & _PreSmoother;
-  FineSmoother   & _PostSmoother;
-  CoarseOperator & _CoarseOperator;
-  CoarseSolver   & _CoarseSolve;
-
-  int    level;  void Level(int lv) {level = lv; };
-
-  MGPreconditionerSVD(Aggregates &U,
-		      Aggregates &V,
-		      FineOperator &Fine,
-		      FineSmoother &PreSmoother,
-		      FineSmoother &PostSmoother,
-		      CoarseOperator &CoarseOperator_,
-		      CoarseSolver &CoarseSolve_)
-    : _U(U),
-      _V(V),
-      _FineOperator(Fine),
-      _PreSmoother(PreSmoother),
-      _PostSmoother(PostSmoother),
-      _CoarseOperator(CoarseOperator_),
-      _CoarseSolve(CoarseSolve_),
-      level(1)  {  }
-
-  virtual void operator()(const FineField &in, FineField & out) 
-  {
-    GridBase *CoarseGrid = _U.CoarseGrid;
-    //    auto CoarseGrid = _CoarseOperator.Grid();
-    CoarseVector Csrc(CoarseGrid);
-    CoarseVector Csol(CoarseGrid);
-    FineField vec1(in.Grid());
-    FineField vec2(in.Grid());
-
-    std::cout<<GridLogMessage << "Calling PreSmoother " <<std::endl;
-
-    //    std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
-    double t;
-    // Fine Smoother
-    //    out = in;
-    out = Zero();
-    t=-usecond();
-    _PreSmoother(in,out);
-    t+=usecond();
-
-    std::cout<<GridLogMessage << "PreSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Update the residual
-    _FineOperator.Op(out,vec1);  sub(vec1, in ,vec1);   
-    //    std::cout<<GridLogMessage <<"Residual-1 now " <<norm2(vec1)<<std::endl;
-
-    //  Uc^dag U S Vdag Vc Vc^dag psi = Uc^dag eta
-    // Fine to Coarse 
-    t=-usecond();
-    _U.ProjectToSubspace  (Csrc,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse correction
-    t=-usecond();
-    Csol = Zero();
-    _CoarseSolve(Csrc,Csol);
-    //Csol=Zero();
-    t+=usecond();
-    std::cout<<GridLogMessage << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse to Fine
-    t=-usecond();  
-    //    _CoarseOperator.PromoteFromSubspace(_Aggregates,Csol,vec1);
-    _V.PromoteFromSubspace(Csol,vec1); 
-    add(out,out,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Residual
-    _FineOperator.Op(out,vec1);  sub(vec1 ,in , vec1);  
-    //    std::cout<<GridLogMessage <<"Residual-2 now " <<norm2(vec1)<<std::endl;
-
-    // Fine Smoother
-    t=-usecond();
-    //    vec2=vec1;
-    vec2=Zero();
-    _PostSmoother(vec1,vec2);
-    t+=usecond();
-    std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    add( out,out,vec2);
-    std::cout<<GridLogMessage << "Done " <<std::endl;
-  }
-};
-
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  const int Ls=16;
-
-  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
-  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
-
-  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
-  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
-
-  // Construct a coarsened grid
-  Coordinate clatt = GridDefaultLatt();
-  for(int d=0;d<clatt.size();d++){
-    clatt[d] = clatt[d]/2;
-    //    clatt[d] = clatt[d]/4;
-  }
-  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
-  GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
-
-  std::vector<int> seeds4({1,2,3,4});
-  std::vector<int> seeds5({5,6,7,8});
-  std::vector<int> cseeds({5,6,7,8});
-  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
-  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
-  GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
-
-  LatticeFermion    src(FGrid); random(RNG5,src);
-  LatticeFermion result(FGrid); result=Zero();
-  LatticeFermion    ref(FGrid); ref=Zero();
-  LatticeFermion    tmp(FGrid);
-  LatticeFermion    err(FGrid);
-  LatticeGaugeField Umu(UGrid);
-
-  FieldMetaData header;
-  std::string file("ckpoint_lat.4000");
-  NerscIO::readConfiguration(Umu,header,file);
-  
-  RealD mass=0.01;
-  RealD M5=1.8;
-
-  DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);
-
-  const int nbasis = 60;
-  const int cb = 0 ;
-
-  NextToNearestStencilGeometry5D geom(Coarse5d);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-
-  typedef PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM_t;
-  typedef MdagPVLinearOperator<DomainWallFermionD,LatticeFermionD> MdagPV_t;
-  typedef ShiftedPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedPVdagM_t;
-  PVdagM_t PVdagM(Ddwf,Dpv);
-  MdagPV_t MdagPV(Ddwf,Dpv);
-  //  ShiftedPVdagM_t ShiftedPVdagM(2.0,Ddwf,Dpv); // 355
-  //  ShiftedPVdagM_t ShiftedPVdagM(1.0,Ddwf,Dpv); // 246
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.5,Ddwf,Dpv); // 183
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 145
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 134
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 127 -- NULL space via inverse iteration
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 57 -- NULL space via inverse iteration; 3 iterations
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 57 , tighter inversion
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 49 iters
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 70 iters; asymmetric 
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 58; Loosen coarse, tighten fine
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 56 ... 
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 51 ...  with 24 vecs
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 31 ...  with 24 vecs and 2^4 blocking
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 43 ...  with 16 vecs and 2^4 blocking, sloppier
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35  ...  with 20 vecs and 2^4 blocking
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35  ...  with 20 vecs and 2^4 blocking, looser coarse
-  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 64  ...  with 20 vecs, Christoph setup, and 2^4 blocking, looser coarse
-  ShiftedPVdagM_t ShiftedPVdagM(0.01,Ddwf,Dpv); // 
-
-
-  // Run power method on HOA??
-  PowerMethod<LatticeFermion>       PM;
-  PM(PVdagM,src);
-  PM(MdagPV,src);
-
- 
-  // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
-  typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
-  Subspace V(Coarse5d,FGrid,cb);
-  //  Subspace U(Coarse5d,FGrid,cb);
-
-  // Breeds right singular vectors with call to HermOp
-  V.CreateSubspaceChebyshev(RNG5,PVdagM,
-			    nbasis,
-			    4000.0,0.003,
-			    300);
-
-  // Breeds left singular vectors with call to HermOp
-  //  U.CreateSubspaceChebyshev(RNG5,MdagPV,
-  //			    nbasis,
-  //			    4000.0,0.003,
-  //			    300);
-  //  U.subspace=V.subspace;
-  
-  //  typedef Aggregation<vSpinColourVector,vTComplex,2*nbasis> CombinedSubspace;
-  //  CombinedSubspace CombinedUV(Coarse5d,FGrid,cb);
-  //  for(int b=0;b<nbasis;b++){
-  //    CombinedUV.subspace[b]        = V.subspace[b];
-  //    CombinedUV.subspace[b+nbasis] = U.subspace[b];
-  //  }
-  
-  
-  //  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,2*nbasis> LittleDiracOperator;
-  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
-  typedef LittleDiracOperator::CoarseVector CoarseVector;
-  LittleDiracOperator LittleDiracOpPV(geom,FGrid,Coarse5d);
-  LittleDiracOpPV.CoarsenOperator(PVdagM,V,V);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"Testing coarsened operator "<<std::endl;
-  
-  CoarseVector c_src (Coarse5d);
-  CoarseVector c_res (Coarse5d);
-  CoarseVector c_proj(Coarse5d);
-
-
-  Complex one(1.0);
-  c_src = one;  // 1 in every element for vector 1.
-
-  //  blockPromote(c_src,err,CoarseToFine.subspace);
-
-  LatticeFermion prom(FGrid);
-  prom=Zero();
-  for(int b=0;b<nbasis;b++){
-    prom=prom+V.subspace[b];
-  }
-
-  std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl;
-  std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl;
-
-  PVdagM.Op(prom,tmp);
-  blockProject(c_proj,tmp,V.subspace);
-  std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl;
-
-  LittleDiracOpPV.M(c_src,c_res);
-  std::cout<<GridLogMessage<<" Called Little Dirac Op c_src "<< norm2(c_src) << "  c_res "<< norm2(c_res) <<std::endl;
-
-  std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl;
-
-  std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl;
-
-  c_proj = c_proj - c_res;
-  std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl;
-
-
-  /**********
-   * Some solvers
-   **********
-   */
-
-  ///////////////////////////////////////
-  // Coarse grid solver test
-  ///////////////////////////////////////
-
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Coarse Grid Solve -- Level 3 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<CoarseVector> simple;
-  NonHermitianLinearOperator<LittleDiracOperator,CoarseVector> LinOpCoarse(LittleDiracOpPV);
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-4, 100, LinOpCoarse,simple,10,10); 
-  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L3PGCR(1.0e-4, 10, LinOpCoarse,simple,20,20); 
-  L3PGCR.Level(3);
-  c_res=Zero();
-  L3PGCR(c_src,c_res);
-
-  ////////////////////////////////////////
-  // Fine grid smoother
-  ////////////////////////////////////////
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Fine Grid Smoother -- Level 2 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<LatticeFermionD> simple_fine;
-  //  NonHermitianLinearOperator<PVdagM_t,LatticeFermionD> LinOpSmooth(PVdagM);
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.01,1,ShiftedPVdagM,simple_fine,16,16);
-  SmootherGCR.Level(2);
-  
-  LatticeFermionD f_src(FGrid);
-  LatticeFermionD f_res(FGrid);
-
-  f_src = one;  // 1 in every element for vector 1.
-  f_res=Zero();
-  SmootherGCR(f_src,f_res);
-
-  //  typedef MGPreconditionerSVD<vSpinColourVector,  vTComplex,nbasis*2> TwoLevelMG;
-  typedef MGPreconditionerSVD<vSpinColourVector,  vTComplex,nbasis> TwoLevelMG;
-
-  //  TwoLevelMG TwoLevelPrecon(CombinedUV,CombinedUV,
-  TwoLevelMG TwoLevelPrecon(V,V,
-			    PVdagM,
-			    simple_fine,
-			    SmootherGCR,
-			    LinOpCoarse,
-			    L3PGCR);
-  
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,PVdagM,TwoLevelPrecon,16,16);
-  L1PGCR.Level(1);
-
-  f_res=Zero();
-  L1PGCR(f_src,f_res);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage << "Done "<< std::endl;
-
-  Grid_finalize();
-  return 0;
-}
--- a/tests/debug/Test_general_coarse_wilson.cc
+++ b/tests/debug/Test_general_coarse_wilson.cc
@@ -1,333 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./tests/Test_padded_cell.cc
-
-    Copyright (C) 2023
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/Grid.h>
-#include <Grid/lattice/PaddedCell.h>
-#include <Grid/stencil/GeneralLocalStencil.h>
-
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
-#include <Grid/algorithms/iterative/BiCGSTAB.h>
-
-using namespace std;
-using namespace Grid;
-
-template<class Fobj,class CComplex,int nbasis>
-class MGPreconditioner : public LinearFunction< Lattice<Fobj> > {
-public:
-  using LinearFunction<Lattice<Fobj> >::operator();
-
-  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
-  typedef LinearOperatorBase<FineField>                            FineOperator;
-  typedef LinearFunction    <FineField>                            FineSmoother;
-  typedef LinearOperatorBase<CoarseVector>                         CoarseOperator;
-  typedef LinearFunction    <CoarseVector>                         CoarseSolver;
-  Aggregates     & _Aggregates;
-  FineOperator   & _FineOperator;
-  FineSmoother   & _PreSmoother;
-  FineSmoother   & _PostSmoother;
-  CoarseOperator & _CoarseOperator;
-  CoarseSolver   & _CoarseSolve;
-
-  int    level;  void Level(int lv) {level = lv; };
-
-  MGPreconditioner(Aggregates &Agg,
-		   FineOperator &Fine,
-		   FineSmoother &PreSmoother,
-		   FineSmoother &PostSmoother,
-		   CoarseOperator &CoarseOperator_,
-		   CoarseSolver &CoarseSolve_)
-    : _Aggregates(Agg),
-      _FineOperator(Fine),
-      _PreSmoother(PreSmoother),
-      _PostSmoother(PostSmoother),
-      _CoarseOperator(CoarseOperator_),
-      _CoarseSolve(CoarseSolve_),
-      level(1)  {  }
-
-  virtual void operator()(const FineField &in, FineField & out) 
-  {
-    GridBase *CoarseGrid = _Aggregates.CoarseGrid;
-    //    auto CoarseGrid = _CoarseOperator.Grid();
-    CoarseVector Csrc(CoarseGrid);
-    CoarseVector Csol(CoarseGrid);
-    FineField vec1(in.Grid());
-    FineField vec2(in.Grid());
-
-    std::cout<<GridLogMessage << "Calling PreSmoother " <<std::endl;
-
-    //    std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
-    double t;
-    // Fine Smoother
-    //    out = in;
-    out = Zero();
-    t=-usecond();
-    _PreSmoother(in,out);
-    t+=usecond();
-
-    std::cout<<GridLogMessage << "PreSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Update the residual
-    _FineOperator.Op(out,vec1);  sub(vec1, in ,vec1);   
-    //    std::cout<<GridLogMessage <<"Residual-1 now " <<norm2(vec1)<<std::endl;
-
-    // Fine to Coarse 
-    t=-usecond();
-    _Aggregates.ProjectToSubspace  (Csrc,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse correction
-    t=-usecond();
-    Csol = Zero();
-    _CoarseSolve(Csrc,Csol);
-    //Csol=Zero();
-    t+=usecond();
-    std::cout<<GridLogMessage << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse to Fine
-    t=-usecond();  
-    //    _CoarseOperator.PromoteFromSubspace(_Aggregates,Csol,vec1);
-    _Aggregates.PromoteFromSubspace(Csol,vec1); 
-    add(out,out,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Residual
-    _FineOperator.Op(out,vec1);  sub(vec1 ,in , vec1);  
-    //    std::cout<<GridLogMessage <<"Residual-2 now " <<norm2(vec1)<<std::endl;
-
-    // Fine Smoother
-    t=-usecond();
-    //    vec2=vec1;
-    vec2=Zero();
-    _PostSmoother(vec1,vec2);
-    t+=usecond();
-    std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    add( out,out,vec2);
-    std::cout<<GridLogMessage << "Done " <<std::endl;
-  }
-};
-
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  const int Ls=16;
-
-  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
-  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
-
-  GridCartesian         * FGrid   = UGrid;
-  GridRedBlackCartesian * FrbGrid = UrbGrid;
-
-  // Construct a coarsened grid
-  Coordinate clatt = GridDefaultLatt();
-  for(int d=0;d<clatt.size();d++){
-    clatt[d] = clatt[d]/2;
-    //clatt[d] = clatt[d]/4;
-  }
-  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
-
-  std::vector<int> seeds4({1,2,3,4});
-  std::vector<int> cseeds({5,6,7,8});
-  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
-  GridParallelRNG          CRNG(Coarse4d);CRNG.SeedFixedIntegers(cseeds);
-
-  Complex one(1.0);
-
-  LatticeFermion    src(FGrid); src=one;
-  LatticeFermion result(FGrid); result=Zero();
-  LatticeFermion    ref(FGrid); ref=Zero();
-  LatticeFermion    tmp(FGrid);
-  LatticeFermion    err(FGrid);
-  LatticeFermion    precsrc(FGrid);
-  LatticeGaugeField Umu(UGrid);
-
-  FieldMetaData header;
-  std::string file("ckpoint_lat");
-  NerscIO::readConfiguration(Umu,header,file);
-
-  RealD csw =0.0;
-  RealD mass=-0.92;
-
-  WilsonCloverFermionD Dw(Umu,*UGrid,*UrbGrid,mass,csw,csw);
-
-  const int nbasis = 20;
-  const int cb = 0 ;
-  LatticeFermion prom(FGrid);
-
-  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,2*nbasis> LittleDiracOperator;
-  typedef LittleDiracOperator::CoarseVector CoarseVector;
-
-  NearestStencilGeometry4D geom(Coarse4d);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
- 
-  // Warning: This routine calls Linop.Op, not LinOpo.HermOp
-  typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
-  Subspace Aggregates(Coarse4d,FGrid,cb);
-
-  NonHermitianLinearOperator<WilsonCloverFermionD,LatticeFermion> LinOpDw(Dw);
-  ShiftedNonHermitianLinearOperator<WilsonCloverFermionD,LatticeFermion> ShiftedLinOpDw(Dw,0.01);
-
-  Aggregates.CreateSubspaceGCR(RNG4,
-			       LinOpDw,
-			       nbasis);
-  
-  typedef Aggregation<vSpinColourVector,vTComplex,2*nbasis> CombinedSubspace;
-  CombinedSubspace CombinedUV(Coarse4d,UGrid,cb);
-  for(int b=0;b<nbasis;b++){
-    Gamma G5(Gamma::Algebra::Gamma5);
-    CombinedUV.subspace[b]        = Aggregates.subspace[b];
-    CombinedUV.subspace[b+nbasis] = G5*Aggregates.subspace[b];
-  }
-
-  LittleDiracOperator LittleDiracOp(geom,FGrid,Coarse4d);
-  LittleDiracOp.CoarsenOperator(LinOpDw,CombinedUV);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"Testing coarsened operator "<<std::endl;
-  
-  CoarseVector c_src (Coarse4d);
-  CoarseVector c_res (Coarse4d);
-  CoarseVector c_proj(Coarse4d);
-
-  std::vector<LatticeFermion> subspace(2*nbasis,FGrid);
-  subspace=CombinedUV.subspace;
-
-  c_src = one;  // 1 in every element for vector 1.
-  blockPromote(c_src,err,subspace);
-
-  prom=Zero();
-  for(int b=0;b<2*nbasis;b++){
-    prom=prom+subspace[b];
-  }
-  err=err-prom; 
-  std::cout<<GridLogMessage<<"Promoted back from subspace: err "<<norm2(err)<<std::endl;
-  std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl;
-  std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl;
-
-  LinOpDw.Op(prom,tmp);
-  blockProject(c_proj,tmp,subspace);
-  std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl;
-
-  LittleDiracOp.M(c_src,c_res);
-  std::cout<<GridLogMessage<<" Called Little Dirac Op c_src "<< norm2(c_src) << "  c_res "<< norm2(c_res) <<std::endl;
-
-  std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl;
-  //  std::cout<<GridLogMessage<<" Little "<< c_res<<std::endl;
-
-  std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl;
-  //  std::cout<<GridLogMessage<<" Big "<< c_proj<<std::endl;
-  c_proj = c_proj - c_res;
-  std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl;
-  //  std::cout<<GridLogMessage<<" error "<< c_proj<<std::endl;
-
-
-  /**********
-   * Some solvers
-   **********
-   */
-
-  // CG
-  {
-    MdagMLinearOperator<WilsonFermionD,LatticeFermion> HermOp(Dw);
-    ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
-    Dw.Mdag(src,precsrc);
-    CG(HermOp,precsrc,result);
-    result=Zero();
-  }
- 
-  ///////////////////////////////////////
-  // Coarse grid solver test
-  ///////////////////////////////////////
-
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Coarse Grid Solve -- Level 3 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<CoarseVector> simple;
-  NonHermitianLinearOperator<LittleDiracOperator,CoarseVector> LinOpCoarse(LittleDiracOp);
-  ShiftedNonHermitianLinearOperator<LittleDiracOperator,CoarseVector> ShiftedLinOpCoarse(LittleDiracOp,0.001);
-  //  ShiftedNonHermitianLinearOperator<LittleDiracOperator,CoarseVector> ShiftedLinOpCoarse(LittleDiracOp,0.01);
-  //  ShiftedNonHermitianLinearOperator<LittleDiracOperator,CoarseVector> ShiftedLinOpCoarse(LinOpCoarse,0.001);
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-4, 100, LinOpCoarse,simple,10,10); 
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-1, 100, LinOpCoarse,simple,30,30); 
-  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(2.0e-1, 50, ShiftedLinOpCoarse,simple,50,50); 
-  L2PGCR.Level(3);
-  c_res=Zero();
-  L2PGCR(c_src,c_res);
-
-  ////////////////////////////////////////
-  // Fine grid smoother
-  ////////////////////////////////////////
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Fine Grid Smoother -- Level 2 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<LatticeFermionD> simple_fine;
-
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.1,1,ShiftedLinOpDw,simple_fine,4,4);
-  SmootherGCR.Level(2);
-  
-  LatticeFermionD f_src(FGrid);
-  LatticeFermionD f_res(FGrid);
-
-  f_src = one;  // 1 in every element for vector 1.
-  f_res=Zero();
-  SmootherGCR(f_src,f_res);
-
-  typedef MGPreconditioner<vSpinColourVector,  vTComplex,2*nbasis> TwoLevelMG;
-
-  TwoLevelMG TwoLevelPrecon(CombinedUV,
-			    LinOpDw,
-			    simple_fine,
-			    SmootherGCR,
-			    LinOpCoarse,
-			    L2PGCR);
-  
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,LinOpDw,TwoLevelPrecon,16,16);
-  L1PGCR.Level(1);
-
-  f_res=Zero();
-  L1PGCR(f_src,f_res);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage << "Done "<< std::endl;
-
-  Grid_finalize();
-  return 0;
-}
--- a/tests/debug/Test_general_coarse_wilson_nog5.cc
+++ b/tests/debug/Test_general_coarse_wilson_nog5.cc
@@ -1,326 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./tests/Test_padded_cell.cc
-
-    Copyright (C) 2023
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/Grid.h>
-#include <Grid/lattice/PaddedCell.h>
-#include <Grid/stencil/GeneralLocalStencil.h>
-
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
-#include <Grid/algorithms/iterative/BiCGSTAB.h>
-
-using namespace std;
-using namespace Grid;
-
-template<class Fobj,class CComplex,int nbasis>
-class MGPreconditioner : public LinearFunction< Lattice<Fobj> > {
-public:
-  using LinearFunction<Lattice<Fobj> >::operator();
-
-  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
-  typedef LinearOperatorBase<FineField>                            FineOperator;
-  typedef LinearFunction    <FineField>                            FineSmoother;
-  typedef LinearOperatorBase<CoarseVector>                         CoarseOperator;
-  typedef LinearFunction    <CoarseVector>                         CoarseSolver;
-  Aggregates     & _Aggregates;
-  FineOperator   & _FineOperator;
-  FineSmoother   & _PreSmoother;
-  FineSmoother   & _PostSmoother;
-  CoarseOperator & _CoarseOperator;
-  CoarseSolver   & _CoarseSolve;
-
-  int    level;  void Level(int lv) {level = lv; };
-
-  MGPreconditioner(Aggregates &Agg,
-		   FineOperator &Fine,
-		   FineSmoother &PreSmoother,
-		   FineSmoother &PostSmoother,
-		   CoarseOperator &CoarseOperator_,
-		   CoarseSolver &CoarseSolve_)
-    : _Aggregates(Agg),
-      _FineOperator(Fine),
-      _PreSmoother(PreSmoother),
-      _PostSmoother(PostSmoother),
-      _CoarseOperator(CoarseOperator_),
-      _CoarseSolve(CoarseSolve_),
-      level(1)  {  }
-
-  virtual void operator()(const FineField &in, FineField & out) 
-  {
-    GridBase *CoarseGrid = _Aggregates.CoarseGrid;
-    //    auto CoarseGrid = _CoarseOperator.Grid();
-    CoarseVector Csrc(CoarseGrid);
-    CoarseVector Csol(CoarseGrid);
-    FineField vec1(in.Grid());
-    FineField vec2(in.Grid());
-
-    std::cout<<GridLogMessage << "Calling PreSmoother " <<std::endl;
-
-    //    std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
-    double t;
-    // Fine Smoother
-    //    out = in;
-    out = Zero();
-    t=-usecond();
-    _PreSmoother(in,out);
-    t+=usecond();
-
-    std::cout<<GridLogMessage << "PreSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Update the residual
-    _FineOperator.Op(out,vec1);  sub(vec1, in ,vec1);   
-    //    std::cout<<GridLogMessage <<"Residual-1 now " <<norm2(vec1)<<std::endl;
-
-    // Fine to Coarse 
-    t=-usecond();
-    _Aggregates.ProjectToSubspace  (Csrc,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse correction
-    t=-usecond();
-    Csol = Zero();
-    _CoarseSolve(Csrc,Csol);
-    //Csol=Zero();
-    t+=usecond();
-    std::cout<<GridLogMessage << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse to Fine
-    t=-usecond();  
-    //    _CoarseOperator.PromoteFromSubspace(_Aggregates,Csol,vec1);
-    _Aggregates.PromoteFromSubspace(Csol,vec1); 
-    add(out,out,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Residual
-    _FineOperator.Op(out,vec1);  sub(vec1 ,in , vec1);  
-    //    std::cout<<GridLogMessage <<"Residual-2 now " <<norm2(vec1)<<std::endl;
-
-    // Fine Smoother
-    t=-usecond();
-    //    vec2=vec1;
-    vec2=Zero();
-    _PostSmoother(vec1,vec2);
-    t+=usecond();
-    std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    add( out,out,vec2);
-    std::cout<<GridLogMessage << "Done " <<std::endl;
-  }
-};
-
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  const int Ls=16;
-
-  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
-  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
-
-  GridCartesian         * FGrid   = UGrid;
-  GridRedBlackCartesian * FrbGrid = UrbGrid;
-
-  // Construct a coarsened grid
-  Coordinate clatt = GridDefaultLatt();
-  for(int d=0;d<clatt.size();d++){
-    clatt[d] = clatt[d]/2;
-    //    clatt[d] = clatt[d]/4;
-  }
-  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
-
-  std::vector<int> seeds4({1,2,3,4});
-  std::vector<int> cseeds({5,6,7,8});
-  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
-  GridParallelRNG          CRNG(Coarse4d);CRNG.SeedFixedIntegers(cseeds);
-
-  Complex one(1.0);
-
-  LatticeFermion    src(FGrid); src=one;
-  LatticeFermion result(FGrid); result=Zero();
-  LatticeFermion    ref(FGrid); ref=Zero();
-  LatticeFermion    tmp(FGrid);
-  LatticeFermion    err(FGrid);
-  LatticeFermion    precsrc(FGrid);
-  LatticeGaugeField Umu(UGrid);
-
-  FieldMetaData header;
-  std::string file("ckpoint_lat");
-  NerscIO::readConfiguration(Umu,header,file);
-
-  RealD csw =0.0;
-  RealD mass=-0.92;
-
-  WilsonCloverFermionD Dw(Umu,*UGrid,*UrbGrid,mass,csw,csw);
-
-  const int nbasis = 40;
-  const int cb = 0 ;
-  LatticeFermion prom(FGrid);
-
-  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
-  typedef LittleDiracOperator::CoarseVector CoarseVector;
-
-  NearestStencilGeometry4D geom(Coarse4d);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
- 
-  // Warning: This routine calls Linop.Op, not LinOpo.HermOp
-  typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
-  Subspace Aggregates(Coarse4d,FGrid,cb);
-
-  NonHermitianLinearOperator<WilsonCloverFermionD,LatticeFermion> LinOpDw(Dw);
-  ShiftedNonHermitianLinearOperator<WilsonCloverFermionD,LatticeFermion> ShiftedLinOpDw(Dw,0.01);
-
-  Aggregates.CreateSubspaceGCR(RNG4,
-			       LinOpDw,
-			       nbasis);
-  
-
-  LittleDiracOperator LittleDiracOp(geom,FGrid,Coarse4d);
-  LittleDiracOp.CoarsenOperator(LinOpDw,Aggregates);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"Testing coarsened operator "<<std::endl;
-  
-  CoarseVector c_src (Coarse4d);
-  CoarseVector c_res (Coarse4d);
-  CoarseVector c_proj(Coarse4d);
-
-  std::vector<LatticeFermion> subspace(nbasis,FGrid);
-  subspace=Aggregates.subspace;
-
-  c_src = one;  // 1 in every element for vector 1.
-  blockPromote(c_src,err,subspace);
-
-  prom=Zero();
-  for(int b=0;b<nbasis;b++){
-    prom=prom+subspace[b];
-  }
-  err=err-prom; 
-  std::cout<<GridLogMessage<<"Promoted back from subspace: err "<<norm2(err)<<std::endl;
-  std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl;
-  std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl;
-
-  LinOpDw.Op(prom,tmp);
-  blockProject(c_proj,tmp,subspace);
-  std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl;
-
-  LittleDiracOp.M(c_src,c_res);
-  std::cout<<GridLogMessage<<" Called Little Dirac Op c_src "<< norm2(c_src) << "  c_res "<< norm2(c_res) <<std::endl;
-
-  std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl;
-  //  std::cout<<GridLogMessage<<" Little "<< c_res<<std::endl;
-
-  std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl;
-  //  std::cout<<GridLogMessage<<" Big "<< c_proj<<std::endl;
-  c_proj = c_proj - c_res;
-  std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl;
-  //  std::cout<<GridLogMessage<<" error "<< c_proj<<std::endl;
-
-
-  /**********
-   * Some solvers
-   **********
-   */
-
-  // CG
-  {
-    MdagMLinearOperator<WilsonFermionD,LatticeFermion> HermOp(Dw);
-    ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
-    Dw.Mdag(src,precsrc);
-    CG(HermOp,precsrc,result);
-    result=Zero();
-  }
- 
-  ///////////////////////////////////////
-  // Coarse grid solver test
-  ///////////////////////////////////////
-
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Coarse Grid Solve -- Level 3 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<CoarseVector> simple;
-  NonHermitianLinearOperator<LittleDiracOperator,CoarseVector> LinOpCoarse(LittleDiracOp);
-  ShiftedNonHermitianLinearOperator<LittleDiracOperator,CoarseVector> ShiftedLinOpCoarse(LittleDiracOp,0.001);
-  //  ShiftedNonHermitianLinearOperator<LittleDiracOperator,CoarseVector> ShiftedLinOpCoarse(LittleDiracOp,0.01);
-  //  ShiftedNonHermitianLinearOperator<LittleDiracOperator,CoarseVector> ShiftedLinOpCoarse(LinOpCoarse,0.001);
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-4, 100, LinOpCoarse,simple,10,10); 
-  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-1, 100, LinOpCoarse,simple,30,30); 
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(2.0e-1, 50, ShiftedLinOpCoarse,simple,50,50); 
-  L2PGCR.Level(3);
-  c_res=Zero();
-  L2PGCR(c_src,c_res);
-
-  ////////////////////////////////////////
-  // Fine grid smoother
-  ////////////////////////////////////////
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Fine Grid Smoother -- Level 2 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<LatticeFermionD> simple_fine;
-
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.1,1,ShiftedLinOpDw,simple_fine,6,6);
-  SmootherGCR.Level(2);
-  
-  LatticeFermionD f_src(FGrid);
-  LatticeFermionD f_res(FGrid);
-
-  f_src = one;  // 1 in every element for vector 1.
-  f_res=Zero();
-  SmootherGCR(f_src,f_res);
-
-  typedef MGPreconditioner<vSpinColourVector,  vTComplex,nbasis> TwoLevelMG;
-
-  TwoLevelMG TwoLevelPrecon(Aggregates,
-			    LinOpDw,
-			    simple_fine,
-			    SmootherGCR,
-			    LinOpCoarse,
-			    L2PGCR);
-  
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,LinOpDw,TwoLevelPrecon,16,16);
-  L1PGCR.Level(1);
-
-  f_res=Zero();
-  L1PGCR(f_src,f_res);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage << "Done "<< std::endl;
-
-  Grid_finalize();
-  return 0;
-}
--- a/tests/debug/Test_general_coarse_wilson_svd.cc
+++ b/tests/debug/Test_general_coarse_wilson_svd.cc
@@ -1,320 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./tests/Test_padded_cell.cc
-
-    Copyright (C) 2023
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/Grid.h>
-#include <Grid/lattice/PaddedCell.h>
-#include <Grid/stencil/GeneralLocalStencil.h>
-
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
-#include <Grid/algorithms/iterative/BiCGSTAB.h>
-
-using namespace std;
-using namespace Grid;
-
-template<class Fobj,class CComplex,int nbasis>
-class MGPreconditioner : public LinearFunction< Lattice<Fobj> > {
-public:
-  using LinearFunction<Lattice<Fobj> >::operator();
-
-  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
-  typedef LinearOperatorBase<FineField>                            FineOperator;
-  typedef LinearFunction    <FineField>                            FineSmoother;
-  typedef LinearOperatorBase<CoarseVector>                         CoarseOperator;
-  typedef LinearFunction    <CoarseVector>                         CoarseSolver;
-  Aggregates     & _Aggregates;
-  FineOperator   & _FineOperator;
-  FineSmoother   & _PreSmoother;
-  FineSmoother   & _PostSmoother;
-  CoarseOperator & _CoarseOperator;
-  CoarseSolver   & _CoarseSolve;
-
-  int    level;  void Level(int lv) {level = lv; };
-
-  MGPreconditioner(Aggregates &Agg,
-		   FineOperator &Fine,
-		   FineSmoother &PreSmoother,
-		   FineSmoother &PostSmoother,
-		   CoarseOperator &CoarseOperator_,
-		   CoarseSolver &CoarseSolve_)
-    : _Aggregates(Agg),
-      _FineOperator(Fine),
-      _PreSmoother(PreSmoother),
-      _PostSmoother(PostSmoother),
-      _CoarseOperator(CoarseOperator_),
-      _CoarseSolve(CoarseSolve_),
-      level(1)  {  }
-
-  virtual void operator()(const FineField &in, FineField & out) 
-  {
-    GridBase *CoarseGrid = _Aggregates.CoarseGrid;
-    //    auto CoarseGrid = _CoarseOperator.Grid();
-    CoarseVector Csrc(CoarseGrid);
-    CoarseVector Csol(CoarseGrid);
-    FineField vec1(in.Grid());
-    FineField vec2(in.Grid());
-
-    std::cout<<GridLogMessage << "Calling PreSmoother " <<std::endl;
-
-    //    std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
-    double t;
-    // Fine Smoother
-    //    out = in;
-    out = Zero();
-    t=-usecond();
-    _PreSmoother(in,out);
-    t+=usecond();
-
-    std::cout<<GridLogMessage << "PreSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Update the residual
-    _FineOperator.Op(out,vec1);  sub(vec1, in ,vec1);   
-    //    std::cout<<GridLogMessage <<"Residual-1 now " <<norm2(vec1)<<std::endl;
-
-    // Fine to Coarse 
-    t=-usecond();
-    _Aggregates.ProjectToSubspace  (Csrc,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse correction
-    t=-usecond();
-    Csol = Zero();
-    _CoarseSolve(Csrc,Csol);
-    //Csol=Zero();
-    t+=usecond();
-    std::cout<<GridLogMessage << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse to Fine
-    t=-usecond();  
-    //    _CoarseOperator.PromoteFromSubspace(_Aggregates,Csol,vec1);
-    _Aggregates.PromoteFromSubspace(Csol,vec1); 
-    add(out,out,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Residual
-    _FineOperator.Op(out,vec1);  sub(vec1 ,in , vec1);  
-    //    std::cout<<GridLogMessage <<"Residual-2 now " <<norm2(vec1)<<std::endl;
-
-    // Fine Smoother
-    t=-usecond();
-    //    vec2=vec1;
-    vec2=Zero();
-    _PostSmoother(vec1,vec2);
-    t+=usecond();
-    std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    add( out,out,vec2);
-    std::cout<<GridLogMessage << "Done " <<std::endl;
-  }
-};
-
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  const int Ls=16;
-
-  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
-  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
-
-  GridCartesian         * FGrid   = UGrid;
-  GridRedBlackCartesian * FrbGrid = UrbGrid;
-
-  // Construct a coarsened grid
-  Coordinate clatt = GridDefaultLatt();
-  for(int d=0;d<clatt.size();d++){
-    clatt[d] = clatt[d]/2;
-    //    clatt[d] = clatt[d]/4;
-  }
-  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
-
-  std::vector<int> seeds4({1,2,3,4});
-  std::vector<int> cseeds({5,6,7,8});
-  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
-  GridParallelRNG          CRNG(Coarse4d);CRNG.SeedFixedIntegers(cseeds);
-
-  LatticeFermion    src(FGrid); random(RNG4,src);
-  LatticeFermion result(FGrid); result=Zero();
-  LatticeFermion    ref(FGrid); ref=Zero();
-  LatticeFermion    tmp(FGrid);
-  LatticeFermion    err(FGrid);
-  LatticeGaugeField Umu(UGrid);
-
-  FieldMetaData header;
-  std::string file("ckpoint_lat");
-  NerscIO::readConfiguration(Umu,header,file);
-  
-  RealD csw =0.0;
-  RealD mass=-0.92;
-
-  WilsonCloverFermionD Dw(Umu,*UGrid,*UrbGrid,mass,csw,csw);
-
-  const int nbasis = 20;
-  const int cb = 0 ;
-  LatticeFermion prom(FGrid);
-
-  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,2*nbasis> LittleDiracOperator;
-  typedef LittleDiracOperator::CoarseVector CoarseVector;
-
-  NearestStencilGeometry4D geom(Coarse4d);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
- 
-  // Warning: This routine calls Linop.Op, not LinOpo.HermOp
-  typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
-  Subspace Aggregates(Coarse4d,FGrid,cb);
-
-  MdagMLinearOperator<WilsonCloverFermionD,LatticeFermion> MdagMOpDw(Dw);
-  NonHermitianLinearOperator<WilsonCloverFermionD,LatticeFermion> LinOpDw(Dw);
-  ShiftedNonHermitianLinearOperator<WilsonCloverFermionD,LatticeFermion> ShiftedLinOpDw(Dw,0.5);
-
-  //  Aggregates.CreateSubspaceGCR(RNG4,
-  //			       LinOpDw,
-  //			       nbasis);
-  Aggregates.CreateSubspace(RNG4,MdagMOpDw,nbasis);
-  
-  typedef Aggregation<vSpinColourVector,vTComplex,2*nbasis> CombinedSubspace;
-  CombinedSubspace CombinedUV(Coarse4d,UGrid,cb);
-  for(int b=0;b<nbasis;b++){
-    Gamma G5(Gamma::Algebra::Gamma5);
-    CombinedUV.subspace[b]        = Aggregates.subspace[b];
-    CombinedUV.subspace[b+nbasis] = G5*Aggregates.subspace[b];
-  }
-
-  LittleDiracOperator LittleDiracOp(geom,FGrid,Coarse4d);
-  LittleDiracOp.CoarsenOperator(LinOpDw,CombinedUV);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"Testing coarsened operator "<<std::endl;
-  
-  CoarseVector c_src (Coarse4d);
-  CoarseVector c_res (Coarse4d);
-  CoarseVector c_proj(Coarse4d);
-
-  std::vector<LatticeFermion> subspace(2*nbasis,FGrid);
-  subspace=CombinedUV.subspace;
-
-  Complex one(1.0);
-  c_src = one;  // 1 in every element for vector 1.
-  blockPromote(c_src,err,subspace);
-
-  prom=Zero();
-  for(int b=0;b<2*nbasis;b++){
-    prom=prom+subspace[b];
-  }
-  err=err-prom; 
-  std::cout<<GridLogMessage<<"Promoted back from subspace: err "<<norm2(err)<<std::endl;
-  std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl;
-  std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl;
-
-  LinOpDw.Op(prom,tmp);
-  blockProject(c_proj,tmp,subspace);
-  std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl;
-
-  LittleDiracOp.M(c_src,c_res);
-  std::cout<<GridLogMessage<<" Called Little Dirac Op c_src "<< norm2(c_src) << "  c_res "<< norm2(c_res) <<std::endl;
-
-  std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl;
-  //  std::cout<<GridLogMessage<<" Little "<< c_res<<std::endl;
-
-  std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl;
-  //  std::cout<<GridLogMessage<<" Big "<< c_proj<<std::endl;
-  c_proj = c_proj - c_res;
-  std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl;
-  //  std::cout<<GridLogMessage<<" error "<< c_proj<<std::endl;
-
-
-  /**********
-   * Some solvers
-   **********
-   */
-
-  ///////////////////////////////////////
-  // Coarse grid solver test
-  ///////////////////////////////////////
-
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Coarse Grid Solve -- Level 3 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<CoarseVector> simple;
-  NonHermitianLinearOperator<LittleDiracOperator,CoarseVector> LinOpCoarse(LittleDiracOp);
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-4, 100, LinOpCoarse,simple,10,10); 
-  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-2, 100, LinOpCoarse,simple,30,30); 
-  L2PGCR.Level(3);
-  c_res=Zero();
-  L2PGCR(c_src,c_res);
-
-  ////////////////////////////////////////
-  // Fine grid smoother
-  ////////////////////////////////////////
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Fine Grid Smoother -- Level 2 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<LatticeFermionD> simple_fine;
-
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.01,1,ShiftedLinOpDw,simple_fine,4,4);
-  SmootherGCR.Level(2);
-  
-  LatticeFermionD f_src(FGrid);
-  LatticeFermionD f_res(FGrid);
-
-  f_src = one;  // 1 in every element for vector 1.
-  f_res=Zero();
-  SmootherGCR(f_src,f_res);
-
-  typedef MGPreconditioner<vSpinColourVector,  vTComplex,2*nbasis> TwoLevelMG;
-
-  TwoLevelMG TwoLevelPrecon(CombinedUV,
-			    LinOpDw,
-			    simple_fine,
-			    SmootherGCR,
-			    LinOpCoarse,
-			    L2PGCR);
-  
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,LinOpDw,TwoLevelPrecon,32,32);
-  L1PGCR.Level(1);
-
-  f_res=Zero();
-  L1PGCR(f_src,f_res);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage << "Done "<< std::endl;
-
-  Grid_finalize();
-  return 0;
-}
--- a/tests/debug/Test_general_coarse_wilson_svd_no5g.cc
+++ b/tests/debug/Test_general_coarse_wilson_svd_no5g.cc
@@ -1,312 +0,0 @@
-/*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./tests/Test_padded_cell.cc
-
-    Copyright (C) 2023
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/Grid.h>
-#include <Grid/lattice/PaddedCell.h>
-#include <Grid/stencil/GeneralLocalStencil.h>
-
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
-#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
-#include <Grid/algorithms/iterative/BiCGSTAB.h>
-
-using namespace std;
-using namespace Grid;
-
-template<class Fobj,class CComplex,int nbasis>
-class MGPreconditioner : public LinearFunction< Lattice<Fobj> > {
-public:
-  using LinearFunction<Lattice<Fobj> >::operator();
-
-  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
-  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
-  typedef LinearOperatorBase<FineField>                            FineOperator;
-  typedef LinearFunction    <FineField>                            FineSmoother;
-  typedef LinearOperatorBase<CoarseVector>                         CoarseOperator;
-  typedef LinearFunction    <CoarseVector>                         CoarseSolver;
-  Aggregates     & _Aggregates;
-  FineOperator   & _FineOperator;
-  FineSmoother   & _PreSmoother;
-  FineSmoother   & _PostSmoother;
-  CoarseOperator & _CoarseOperator;
-  CoarseSolver   & _CoarseSolve;
-
-  int    level;  void Level(int lv) {level = lv; };
-
-  MGPreconditioner(Aggregates &Agg,
-		   FineOperator &Fine,
-		   FineSmoother &PreSmoother,
-		   FineSmoother &PostSmoother,
-		   CoarseOperator &CoarseOperator_,
-		   CoarseSolver &CoarseSolve_)
-    : _Aggregates(Agg),
-      _FineOperator(Fine),
-      _PreSmoother(PreSmoother),
-      _PostSmoother(PostSmoother),
-      _CoarseOperator(CoarseOperator_),
-      _CoarseSolve(CoarseSolve_),
-      level(1)  {  }
-
-  virtual void operator()(const FineField &in, FineField & out) 
-  {
-    GridBase *CoarseGrid = _Aggregates.CoarseGrid;
-    //    auto CoarseGrid = _CoarseOperator.Grid();
-    CoarseVector Csrc(CoarseGrid);
-    CoarseVector Csol(CoarseGrid);
-    FineField vec1(in.Grid());
-    FineField vec2(in.Grid());
-
-    std::cout<<GridLogMessage << "Calling PreSmoother " <<std::endl;
-
-    //    std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
-    double t;
-    // Fine Smoother
-    //    out = in;
-    out = Zero();
-    t=-usecond();
-    _PreSmoother(in,out);
-    t+=usecond();
-
-    std::cout<<GridLogMessage << "PreSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Update the residual
-    _FineOperator.Op(out,vec1);  sub(vec1, in ,vec1);   
-    //    std::cout<<GridLogMessage <<"Residual-1 now " <<norm2(vec1)<<std::endl;
-
-    // Fine to Coarse 
-    t=-usecond();
-    _Aggregates.ProjectToSubspace  (Csrc,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse correction
-    t=-usecond();
-    Csol = Zero();
-    _CoarseSolve(Csrc,Csol);
-    //Csol=Zero();
-    t+=usecond();
-    std::cout<<GridLogMessage << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Coarse to Fine
-    t=-usecond();  
-    //    _CoarseOperator.PromoteFromSubspace(_Aggregates,Csol,vec1);
-    _Aggregates.PromoteFromSubspace(Csol,vec1); 
-    add(out,out,vec1);
-    t+=usecond();
-    std::cout<<GridLogMessage << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
-
-    // Residual
-    _FineOperator.Op(out,vec1);  sub(vec1 ,in , vec1);  
-    //    std::cout<<GridLogMessage <<"Residual-2 now " <<norm2(vec1)<<std::endl;
-
-    // Fine Smoother
-    t=-usecond();
-    //    vec2=vec1;
-    vec2=Zero();
-    _PostSmoother(vec1,vec2);
-    t+=usecond();
-    std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
-
-    add( out,out,vec2);
-    std::cout<<GridLogMessage << "Done " <<std::endl;
-  }
-};
-
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  const int Ls=16;
-
-  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
-  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
-
-  GridCartesian         * FGrid   = UGrid;
-  GridRedBlackCartesian * FrbGrid = UrbGrid;
-
-  // Construct a coarsened grid
-  Coordinate clatt = GridDefaultLatt();
-  for(int d=0;d<clatt.size();d++){
-    clatt[d] = clatt[d]/2;
-    //    clatt[d] = clatt[d]/4;
-  }
-  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
-
-  std::vector<int> seeds4({1,2,3,4});
-  std::vector<int> cseeds({5,6,7,8});
-  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
-  GridParallelRNG          CRNG(Coarse4d);CRNG.SeedFixedIntegers(cseeds);
-
-  LatticeFermion    src(FGrid); random(RNG4,src);
-  LatticeFermion result(FGrid); result=Zero();
-  LatticeFermion    ref(FGrid); ref=Zero();
-  LatticeFermion    tmp(FGrid);
-  LatticeFermion    err(FGrid);
-  LatticeGaugeField Umu(UGrid);
-
-  FieldMetaData header;
-  std::string file("ckpoint_lat");
-  NerscIO::readConfiguration(Umu,header,file);
-  
-  RealD csw =0.0;
-  RealD mass=-0.92;
-
-  WilsonCloverFermionD Dw(Umu,*UGrid,*UrbGrid,mass,csw,csw);
-
-  const int nbasis = 40;
-  const int cb = 0 ;
-  LatticeFermion prom(FGrid);
-
-  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
-  typedef LittleDiracOperator::CoarseVector CoarseVector;
-
-  NearestStencilGeometry4D geom(Coarse4d);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
- 
-  // Warning: This routine calls Linop.Op, not LinOpo.HermOp
-  typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
-  Subspace Aggregates(Coarse4d,FGrid,cb);
-
-  MdagMLinearOperator<WilsonCloverFermionD,LatticeFermion> MdagMOpDw(Dw);
-  NonHermitianLinearOperator<WilsonCloverFermionD,LatticeFermion> LinOpDw(Dw);
-  ShiftedNonHermitianLinearOperator<WilsonCloverFermionD,LatticeFermion> ShiftedLinOpDw(Dw,0.5);
-
-  //  Aggregates.CreateSubspaceGCR(RNG4,
-  //			       LinOpDw,
-  //			       nbasis);
-  Aggregates.CreateSubspace(RNG4,MdagMOpDw,nbasis);
-
-  LittleDiracOperator LittleDiracOp(geom,FGrid,Coarse4d);
-  LittleDiracOp.CoarsenOperator(LinOpDw,Aggregates);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"Testing coarsened operator "<<std::endl;
-  
-  CoarseVector c_src (Coarse4d);
-  CoarseVector c_res (Coarse4d);
-  CoarseVector c_proj(Coarse4d);
-
-  std::vector<LatticeFermion> subspace(nbasis,FGrid);
-  subspace=Aggregates.subspace;
-
-  Complex one(1.0);
-  c_src = one;  // 1 in every element for vector 1.
-  blockPromote(c_src,err,subspace);
-
-  prom=Zero();
-  for(int b=0;b<nbasis;b++){
-    prom=prom+subspace[b];
-  }
-  err=err-prom; 
-  std::cout<<GridLogMessage<<"Promoted back from subspace: err "<<norm2(err)<<std::endl;
-  std::cout<<GridLogMessage<<"c_src "<<norm2(c_src)<<std::endl;
-  std::cout<<GridLogMessage<<"prom  "<<norm2(prom)<<std::endl;
-
-  LinOpDw.Op(prom,tmp);
-  blockProject(c_proj,tmp,subspace);
-  std::cout<<GridLogMessage<<" Called Big Dirac Op "<<norm2(tmp)<<std::endl;
-
-  LittleDiracOp.M(c_src,c_res);
-  std::cout<<GridLogMessage<<" Called Little Dirac Op c_src "<< norm2(c_src) << "  c_res "<< norm2(c_res) <<std::endl;
-
-  std::cout<<GridLogMessage<<"Little dop : "<<norm2(c_res)<<std::endl;
-  //  std::cout<<GridLogMessage<<" Little "<< c_res<<std::endl;
-
-  std::cout<<GridLogMessage<<"Big dop in subspace : "<<norm2(c_proj)<<std::endl;
-  //  std::cout<<GridLogMessage<<" Big "<< c_proj<<std::endl;
-  c_proj = c_proj - c_res;
-  std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl;
-  //  std::cout<<GridLogMessage<<" error "<< c_proj<<std::endl;
-
-
-  /**********
-   * Some solvers
-   **********
-   */
-
-  ///////////////////////////////////////
-  // Coarse grid solver test
-  ///////////////////////////////////////
-
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Coarse Grid Solve -- Level 3 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<CoarseVector> simple;
-  NonHermitianLinearOperator<LittleDiracOperator,CoarseVector> LinOpCoarse(LittleDiracOp);
-  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-4, 100, LinOpCoarse,simple,10,10); 
-  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-2, 100, LinOpCoarse,simple,30,30); 
-  L2PGCR.Level(3);
-  c_res=Zero();
-  L2PGCR(c_src,c_res);
-
-  ////////////////////////////////////////
-  // Fine grid smoother
-  ////////////////////////////////////////
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Fine Grid Smoother -- Level 2 "<<std::endl;
-  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  TrivialPrecon<LatticeFermionD> simple_fine;
-
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.01,1,ShiftedLinOpDw,simple_fine,6,6);
-  SmootherGCR.Level(2);
-  
-  LatticeFermionD f_src(FGrid);
-  LatticeFermionD f_res(FGrid);
-
-  f_src = one;  // 1 in every element for vector 1.
-  f_res=Zero();
-  SmootherGCR(f_src,f_res);
-
-  typedef MGPreconditioner<vSpinColourVector,  vTComplex,nbasis> TwoLevelMG;
-
-  TwoLevelMG TwoLevelPrecon(Aggregates,
-			    LinOpDw,
-			    simple_fine,
-			    SmootherGCR,
-			    LinOpCoarse,
-			    L2PGCR);
-  
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,LinOpDw,TwoLevelPrecon,32,32);
-  L1PGCR.Level(1);
-
-  f_res=Zero();
-  L1PGCR(f_src,f_res);
-
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
-  std::cout<<GridLogMessage<<std::endl;
-  std::cout<<GridLogMessage << "Done "<< std::endl;
-
-  Grid_finalize();
-  return 0;
-}
--- a/tests/debug/Test_padded_cell_staple.cc
+++ b/tests/debug/Test_padded_cell_staple.cc
@@ -490,7 +490,7 @@ public:

 	    }
 	  }
-	  assert(s==nshift);
+	  GRID_ASSERT(s==nshift);
 	  coalescedWrite(gStaple_v[ss],stencil_ss);
 	}
 	);
--- a/tests/disable_tests_without_instantiations.h
+++ b/tests/disable_tests_without_instantiations.h
@@ -1,16 +0,0 @@
-#include <Grid/Grid.h>
-
-#pragma once
-
-#ifndef ENABLE_FERMION_INSTANTIATIONS
-#include <iostream>
-
-int main(void) {
-  std::cout << "This build of Grid was configured to exclude fermion instantiations, "
-	    << "which this test relies on. "
-	    << "Please reconfigure and rebuild Grid with --enable-fermion-instantiations"
-	    << "to run this test."
-	    << std::endl;
-  return 1;
-}
-#endif
Author	SHA1	Message	Date
Peter Boyle	7132a4fd28	Update	2025-12-02 23:22:42 +00:00
Peter Boyle	e8057d6b4a	Updated for verbose on host vs. device side csum	2025-12-02 23:15:32 +00:00
Peter Boyle	973584e039	Update on aurora	2025-12-02 22:24:54 +00:00
Peter Boyle	ea46c2dc3c	config command	2025-12-02 16:01:37 -05:00
Peter Boyle	50bcd76fc1	Changes to help with error logging on aurora -- triage MPI / Slingshot vs. host-device / SYCL on checksum error	2025-12-02 15:51:29 -05:00