Profiling temporary code until optimised

Optional checkpoint smeared configs for FTHMC
Additional tests
2026-02-23 23:26:12 +00:00 · 2023-06-15 06:54:10 -04:00 · 2023-06-14 04:54:29 -04:00 · 2023-06-13 11:57:11 -04:00 · 2023-06-13 11:56:37 -04:00 · 2023-06-13 11:56:11 -04:00
21 changed files with 41 additions and 840 deletions
--- a/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h
+++ b/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h
@@ -419,15 +419,14 @@ until convergence
 	}
      }

-      if ( Nconv < Nstop ) {
+      if ( Nconv < Nstop )
 	std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
-	std::cout << GridLogIRL << "returning Nstop vectors, the last "<< Nstop-Nconv << "of which might meet convergence criterion only approximately" <<std::endl;
-      }
+
      eval=eval2;
      
      //Keep only converged
-      eval.resize(Nstop);// was Nconv
-      evec.resize(Nstop,grid);// was Nconv
+      eval.resize(Nconv);// Nstop?
+      evec.resize(Nconv,grid);// Nstop?
      basisSortInPlace(evec,eval,reverse);
      
    }
--- a/Grid/communicator/SharedMemoryMPI.cc
+++ b/Grid/communicator/SharedMemoryMPI.cc
@@ -604,8 +604,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 #ifdef GRID_SYCL_LEVEL_ZERO_IPC
    typedef struct { int fd; pid_t pid ; ze_ipc_mem_handle_t ze; } clone_mem_t;

-    auto zeDevice    = cl::sycl::get_native<cl::sycl::backend::ext_oneapi_level_zero>(theGridAccelerator->get_device());
-    auto zeContext   = cl::sycl::get_native<cl::sycl::backend::ext_oneapi_level_zero>(theGridAccelerator->get_context());
+    auto zeDevice    = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device());
+    auto zeContext   = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context());
      
    ze_ipc_mem_handle_t ihandle;
    clone_mem_t handle;
--- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h
+++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h
@@ -423,6 +423,7 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
 #define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();

 #define KERNEL_CALL_EXT(A)						\
+  const uint64_t    NN = Nsite*Ls;					\
  const uint64_t    sz = st.surface_list.size();			\
  auto ptr = &st.surface_list[0];					\
  accelerator_forNB( ss, sz, Simd::Nsimd(), {				\
--- a/Grid/qcd/utils/SUn.h
+++ b/Grid/qcd/utils/SUn.h
@@ -40,20 +40,18 @@ Lattice<iScalar<iScalar<iScalar<Vec> > > > Determinant(const Lattice<iScalar<iSc
  GridBase *grid=Umu.Grid();
  auto lvol = grid->lSites();
  Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid);
-  typedef typename Vec::scalar_type scalar;
+
  autoView(Umu_v,Umu,CpuRead);
  autoView(ret_v,ret,CpuWrite);
  thread_for(site,lvol,{
    Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
    Coordinate lcoor;
    grid->LocalIndexToLocalCoor(site, lcoor);
-    iScalar<iScalar<iMatrix<scalar, N> > > Us;
+    iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
    peekLocalSite(Us, Umu_v, lcoor);
    for(int i=0;i<N;i++){
      for(int j=0;j<N;j++){
-	scalar tmp= Us()()(i,j);
-	ComplexD ztmp(real(tmp),imag(tmp));
-	EigenU(i,j)=ztmp;
+	EigenU(i,j) = Us()()(i,j);
      }}
    ComplexD detD  = EigenU.determinant();
    typename Vec::scalar_type det(detD.real(),detD.imag());
--- a/Grid/stencil/Stencil.h
+++ b/Grid/stencil/Stencil.h
@@ -705,7 +705,7 @@ public:
 	}
      }
    }
-    std::cout << GridLogDebug << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
+    std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
  }
  /// Introduce a block structure and switch off comms on boundaries
  void DirichletBlock(const Coordinate &dirichlet_block)
--- a/Grid/tensors/Tensor_Ta.h
+++ b/Grid/tensors/Tensor_Ta.h
@@ -90,12 +90,10 @@ template<class vtype,int N> accelerator_inline iVector<vtype,N> ProjectOnGroup(c
 template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr> 
 accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
 {
-  typedef typename iMatrix<vtype,N>::scalar_type scalar;
  // need a check for the group type?
  iMatrix<vtype,N> ret(arg);
  vtype nrm;
  vtype inner;
-  scalar one(1.0);
  for(int c1=0;c1<N;c1++){

    // Normalises row c1
@@ -104,7 +102,7 @@ accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
      inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);

    nrm = sqrt(inner);
-    nrm = one/nrm;
+    nrm = 1.0/nrm;
    for(int c2=0;c2<N;c2++)
      ret._internal[c1][c2]*= nrm;
      
@@ -129,7 +127,7 @@ accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
      inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);

    nrm = sqrt(inner);
-    nrm = one/nrm;
+    nrm = 1.0/nrm;
    for(int c2=0;c2<N;c2++)
      ret._internal[c1][c2]*= nrm;
  }
--- a/Grid/tensors/Tensor_exp.h
+++ b/Grid/tensors/Tensor_exp.h
@@ -55,7 +55,7 @@ template<class vtype, int N> accelerator_inline iVector<vtype, N> Exponentiate(c


 // Specialisation: Cayley-Hamilton exponential for SU(3)
-#if 0
+#ifndef GRID_ACCELERATED
 template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr> 
 accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha  , Integer Nexp = DEFAULT_MAT_EXP )
 {
--- a/HMC/FTHMC2p1f.cc
+++ b/HMC/FTHMC2p1f.cc
@@ -1,224 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Copyright (C) 2023
-
-Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Grid/Grid.h>
-#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
-#include <Grid/qcd/smearing/JacobianAction.h>
-
-using namespace Grid;
-
-int main(int argc, char **argv)
-{
-  std::cout << std::setprecision(12);
-  
-  Grid_init(&argc, &argv);
-  int threads = GridThread::GetThreads();
-  // here make a routine to print all the relevant information on the run
-  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
-
-   // Typedefs to simplify notation
-  typedef WilsonImplR FermionImplPolicy;
-  typedef MobiusFermionD FermionAction;
-  typedef typename FermionAction::FermionField FermionField;
-
-  typedef Grid::XmlReader       Serialiser;
-
-  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
-  IntegratorParameters MD;
-  //  typedef GenericHMCRunner<LeapFrog> HMCWrapper;
-  //  MD.name    = std::string("Leap Frog");
-  //  typedef GenericHMCRunner<ForceGradient> HMCWrapper;
-  //  MD.name    = std::string("Force Gradient");
-  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
-  MD.name    = std::string("MinimumNorm2");
-  MD.MDsteps = 12;
-  MD.trajL   = 1.0;
-
-  HMCparameters HMCparams;
-  HMCparams.StartTrajectory  = 0;
-  HMCparams.Trajectories     = 200;
-  HMCparams.NoMetropolisUntil=  20;
-  // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
-  HMCparams.StartingType     =std::string("HotStart");
-  HMCparams.MD = MD;
-  HMCWrapper TheHMC(HMCparams);
-
-  // Grid from the command line arguments --grid and --mpi
-  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
-
-  CheckpointerParameters CPparams;
-  CPparams.config_prefix = "ckpoint_EODWF_lat";
-  CPparams.smeared_prefix = "ckpoint_EODWF_lat_smr";
-  CPparams.rng_prefix    = "ckpoint_EODWF_rng";
-  CPparams.saveInterval  = 1;
-  CPparams.saveSmeared   = true;
-  CPparams.format        = "IEEE64BIG";
-  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
-
-  RNGModuleParameters RNGpar;
-  RNGpar.serial_seeds = "1 2 3 4 5";
-  RNGpar.parallel_seeds = "6 7 8 9 10";
-  TheHMC.Resources.SetRNGSeeds(RNGpar);
-
-  // Construct observables
-  // here there is too much indirection
-  typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
-  TheHMC.Resources.AddObservable<PlaqObs>();
-  //////////////////////////////////////////////
-
-  const int Ls      = 16;
-  Real beta         = 2.13;
-  Real light_mass   = 0.01;
-  Real strange_mass = 0.04;
-  Real pv_mass      = 1.0;
-  RealD M5  = 1.8;
-  RealD b   = 1.0; // Scale factor two
-  RealD c   = 0.0;
-
-  OneFlavourRationalParams OFRp;
-  OFRp.lo       = 1.0e-2;
-  OFRp.hi       = 64;
-  OFRp.MaxIter  = 10000;
-  OFRp.tolerance= 1.0e-10;
-  OFRp.degree   = 14;
-  OFRp.precision= 40;
-
-  std::vector<Real> hasenbusch({ 0.1 });
-
-  auto GridPtr   = TheHMC.Resources.GetCartesian();
-  auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
-  auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
-  auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
-
-  IwasakiGaugeActionR GaugeAction(beta);
-
-  // temporarily need a gauge field
-  LatticeGaugeField U(GridPtr);
-  LatticeGaugeField Uhot(GridPtr);
-
-  // These lines are unecessary if BC are all periodic
-  std::vector<Complex> boundary = {1,1,1,-1};
-  FermionAction::ImplParams Params(boundary);
-
-  double StoppingCondition = 1e-10;
-  double MaxCGIterations = 30000;
-  ConjugateGradient<FermionField>  CG(StoppingCondition,MaxCGIterations);
-
-  bool ApplySmearing = true;
-  
-  ////////////////////////////////////
-  // Collect actions
-  ////////////////////////////////////
-  ActionLevel<HMCWrapper::Field> Level1(1);
-  ActionLevel<HMCWrapper::Field> Level2(2);
-  ActionLevel<HMCWrapper::Field> Level3(4);
-
-  ////////////////////////////////////
-  // Strange action
-  ////////////////////////////////////
-
-  MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
-  MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c);
-  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> 
-    EOFA(Strange_Op_L, Strange_Op_R, 
-	 CG,
-	 CG, CG,
-	 CG, CG, 
-	 OFRp, false);
-
-  EOFA.is_smeared = ApplySmearing;
-  Level1.push_back(&EOFA);
-
-  ////////////////////////////////////
-  // up down action
-  ////////////////////////////////////
-  std::vector<Real> light_den;
-  std::vector<Real> light_num;
-
-  int n_hasenbusch = hasenbusch.size();
-  light_den.push_back(light_mass);
-  for(int h=0;h<n_hasenbusch;h++){
-    light_den.push_back(hasenbusch[h]);
-    light_num.push_back(hasenbusch[h]);
-  }
-  light_num.push_back(pv_mass);
-
-  std::vector<FermionAction *> Numerators;
-  std::vector<FermionAction *> Denominators;
-  std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
-
-  for(int h=0;h<n_hasenbusch+1;h++){
-    std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl;
-    Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
-    Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
-    Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG));
-  }
-
-  for(int h=0;h<n_hasenbusch+1;h++){
-    Quotients[h]->is_smeared = ApplySmearing;
-    Level1.push_back(Quotients[h]);
-  }
-
-  /////////////////////////////////////////////////////////////
-  // lnDetJacobianAction
-  /////////////////////////////////////////////////////////////
-  double rho = 0.1;  // smearing parameter
-  int Nsmear = 1;    // number of smearing levels - must be multiple of 2Nd
-  int Nstep  = 8*Nsmear;    // number of smearing levels - must be multiple of 2Nd
-  Smear_Stout<HMCWrapper::ImplPolicy> Stout(rho);
-  SmearedConfigurationMasked<HMCWrapper::ImplPolicy> SmearingPolicy(GridPtr, Nstep, Stout);
-  JacobianAction<HMCWrapper::ImplPolicy> Jacobian(&SmearingPolicy);
-  if( ApplySmearing ) Level2.push_back(&Jacobian);
-  std::cout << GridLogMessage << " Built the Jacobian "<< std::endl;
-
-
-  /////////////////////////////////////////////////////////////
-  // Gauge action
-  /////////////////////////////////////////////////////////////
-  //  GaugeAction.is_smeared = ApplySmearing;
-  GaugeAction.is_smeared = true;
-  Level3.push_back(&GaugeAction);
-
-  std::cout << GridLogMessage << " ************************************************"<< std::endl;
-  std::cout << GridLogMessage << " Action complete -- NO FERMIONS FOR NOW -- FIXME"<< std::endl;
-  std::cout << GridLogMessage << " ************************************************"<< std::endl;
-  std::cout << GridLogMessage <<  std::endl;
-  std::cout << GridLogMessage <<  std::endl;
-
-
-  std::cout << GridLogMessage << " Running the FT HMC "<< std::endl;
-
-  TheHMC.TheAction.push_back(Level1);
-  TheHMC.TheAction.push_back(Level2);
-  TheHMC.TheAction.push_back(Level3);
-
-  TheHMC.Run(SmearingPolicy); // for smearing
-
-  Grid_finalize();
-} // main
-
-
-
--- a/HMC/Mobius2p1f_EOFA_96I_hmc.cc
+++ b/HMC/Mobius2p1f_EOFA_96I_hmc.cc
@@ -146,8 +146,6 @@ NAMESPACE_END(Grid);
 int main(int argc, char **argv) {
  using namespace Grid;

-  std::cout << " Grid Initialise "<<std::endl;
-  
  Grid_init(&argc, &argv);

  CartesianCommunicator::BarrierWorld();
@@ -172,24 +170,24 @@ int main(int argc, char **argv) {
  IntegratorParameters MD;
  //  typedef GenericHMCRunner<LeapFrog> HMCWrapper;
  //  MD.name    = std::string("Leap Frog");
-  //  typedef GenericHMCRunner<ForceGradient> HMCWrapper;
-  //  MD.name    = std::string("Force Gradient");
-  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
-  MD.name    = std::string("MinimumNorm2");
+  typedef GenericHMCRunner<ForceGradient> HMCWrapper;
+  MD.name    = std::string("Force Gradient");
+  //typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
+  // MD.name    = std::string("MinimumNorm2");
  // TrajL = 2
  // 4/2 => 0.6 dH
  // 3/3 => 0.8 dH .. depth 3, slower
  //MD.MDsteps =  4;
-  MD.MDsteps =  14;
+  MD.MDsteps =  12;
  MD.trajL   = 0.5;

  HMCparameters HMCparams;
  HMCparams.StartTrajectory  = 1077;
-  HMCparams.Trajectories     = 20;
+  HMCparams.Trajectories     = 1;
  HMCparams.NoMetropolisUntil=  0;
  // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
-  HMCparams.StartingType     =std::string("ColdStart");
-  //  HMCparams.StartingType     =std::string("CheckpointStart");
+  //  HMCparams.StartingType     =std::string("ColdStart");
+  HMCparams.StartingType     =std::string("CheckpointStart");
  HMCparams.MD = MD;
  HMCWrapper TheHMC(HMCparams);

@@ -225,7 +223,7 @@ int main(int argc, char **argv) {
  Real pv_mass      = 1.0;
  //  std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
  //  std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
-  std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 }); // Updated
+  std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
  //  std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });

  auto GridPtr   = TheHMC.Resources.GetCartesian();
@@ -277,10 +275,10 @@ int main(int argc, char **argv) {

  //  double StoppingCondition = 1e-14;
  //  double MDStoppingCondition = 1e-9;
-  double StoppingCondition = 1e-9;
-  double MDStoppingCondition = 1e-8;
-  double MDStoppingConditionLoose = 1e-8;
-  double MDStoppingConditionStrange = 1e-8;
+  double StoppingCondition = 1e-8;
+  double MDStoppingCondition = 1e-7;
+  double MDStoppingConditionLoose = 1e-7;
+  double MDStoppingConditionStrange = 1e-7;
  double MaxCGIterations = 300000;
  ConjugateGradient<FermionField>  CG(StoppingCondition,MaxCGIterations);
  ConjugateGradient<FermionField>  MDCG(MDStoppingCondition,MaxCGIterations);
--- a/systems/Lumi/benchmarks/bench2.slurm
+++ b/systems/Lumi/benchmarks/bench2.slurm
@@ -1,44 +0,0 @@
-#!/bin/bash -l
-#SBATCH --job-name=bench_lehner
-#SBATCH --partition=small-g
-#SBATCH --nodes=2
-#SBATCH --ntasks-per-node=8
-#SBATCH --cpus-per-task=7
-#SBATCH --gpus-per-node=8
-#SBATCH --time=00:10:00
-#SBATCH --account=project_465000546
-#SBATCH --gpu-bind=none
-#SBATCH --exclusive
-#SBATCH --mem=0
-
-CPU_BIND="map_cpu:48,56,32,40,16,24,1,8"
-echo $CPU_BIND
-
-cat << EOF > select_gpu
-#!/bin/bash
-export GPU_MAP=(0 1 2 3 4 5 6 7)
-export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
-export HIP_VISIBLE_DEVICES=\$GPU
-unset ROCR_VISIBLE_DEVICES
-echo RANK \$SLURM_LOCALID using GPU \$GPU    
-exec \$*
-EOF
-
-chmod +x ./select_gpu
-
-root=/scratch/project_465000546/boylepet/Grid/systems/Lumi
-source ${root}/sourceme.sh
-
-export OMP_NUM_THREADS=7
-export MPICH_GPU_SUPPORT_ENABLED=1
-export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
-
-for vol in 16.16.16.64 32.32.32.64  32.32.32.128
-do
-srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol  > log.shm0.ov.$vol
-#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol  > log.shm1.ov.$vol
-
-srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol  > log.shm0.seq.$vol
-#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol
-done
-
--- a/systems/Lumi/config-command
+++ b/systems/Lumi/config-command
@@ -3,28 +3,30 @@ spack load gmp
 spack load mpfr
 CLIME=`spack find --paths c-lime | grep c-lime| cut -c 15-`
 GMP=`spack find --paths gmp | grep gmp | cut -c 12-`
-MPFR=`spack find --paths mpfr | grep mpfr | cut -c 13-`
-echo clime X$CLIME
-echo gmp X$GMP
-echo mpfr X$MPFR
+MPFR=`spack find --paths mpfr | grep mpfr | cut -c 12-`
+echo clime $CLIME
+echo gmp $GMP
+echo mpfr $MPFR

-../../configure \
--enable-comms=mpi-auto \
+../../configure --enable-comms=mpi-auto \
 --with-lime=$CLIME \
 --enable-unified=no \
 --enable-shm=nvlink \
+--enable-tracing=timer \
 --enable-accelerator=hip \
 --enable-gen-simd-width=64 \
 --enable-simd=GPU \
--enable-accelerator-cshift \
--with-gmp=$GMP \
--with-mpfr=$MPFR \
+--disable-accelerator-cshift \
+--with-gmp=$OLCF_GMP_ROOT \
 --with-fftw=$FFTW_DIR/.. \
+--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
 --disable-fermion-reps \
 --disable-gparity \
 CXX=hipcc MPICXX=mpicxx \
-  CXXFLAGS="-fPIC --offload-arch=gfx90a -I/opt/rocm/include/ -std=c++14 -I/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/include" \
-  LDFLAGS="-L/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib -lmpi -L/opt/cray/pe/mpich/8.1.23/gtl/lib -lmpi_gtl_hsa -lamdhip64 -fopenmp" 
+CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 --amdgpu-target=gfx90a" \
+ LDFLAGS="-L/lib64 -L/opt/rocm-5.2.0/lib/ -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 --amdgpu-target=gfx90a "


+#--enable-simd=GPU-RRII \
+

--- a/systems/Lumi/sourceme.sh
+++ b/systems/Lumi/sourceme.sh
@@ -1,5 +1 @@
-source ~/spack/share/spack/setup-env.sh
-module load CrayEnv LUMI/22.12 partition/G  cray-fftw/3.3.10.1 rocm
-spack load c-lime
-spack load gmp
-spack load mpfr
+module load CrayEnv LUMI/22.12 partition/G  cray-fftw/3.3.10.1
--- a/systems/OEM/README
+++ b/systems/OEM/README
@@ -1,53 +0,0 @@
-1. Prerequisites:
-===================
-Make sure you have the latest Intel ipcx release loaded (via modules or similar)
-Make sure you have SYCL aware MPICH or Intel MPI loaded (assumed as mpicxx)
-
-2. Obtain Grid:
-===================
-
-bash$
-git clone https://github.com/paboyle/Grid
-cd Grid
-./bootstrap.sh
-cd systems/PVC
-
-3. Build Grid:
-===================
-
-Here, configure command is stored in file config-command:
-
-bash$
-../../configure \
-	--enable-simd=GPU \
-	--enable-gen-simd-width=64 \
-	--enable-comms=mpi-auto \
-	--enable-accelerator-cshift \
-	--disable-gparity \
-	--disable-fermion-reps \
-	--enable-shm=nvlink \
-	--enable-accelerator=sycl \
-	--enable-unified=no \
-	MPICXX=mpicxx \
-	CXX=icpx \
-	LDFLAGS="-fiopenmp  -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader " \
-	CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare "
-
-make all
-
-4. Run a benchmark:
-===================
-
-*** Assumes interactive access to node. ***
-
-run Benchmark_dwf_fp32 using benchmarks/bench.sh
-
-bash$
-cd benchmarks
-./bench.sh
-
-
-
-
-
-
--- a/systems/OEM/benchmarks/bench.sh
+++ b/systems/OEM/benchmarks/bench.sh
@@ -1,18 +0,0 @@
-#!/bin/bash
-
-export EnableImplicitScaling=0
-export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
-export ZE_AFFINITY_MASK=$gpu_id.$tile_id
-export ONEAPI_DEVICE_FILTER=gpu,level_zero
-export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
-export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
-export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
-
-mpiexec -launcher ssh -n 1 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads 16 --shm-mpi 1 --shm 2048 --device-mem 32768 | tee 1tile.log
-mpiexec -launcher ssh -n 2 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads 16 --shm-mpi 1 --shm 2048 --device-mem 32768 | tee 2tile.log
-
-#mpiexec -launcher ssh -n 4 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.2.2 --grid 16.16.64.64 --accelerator-threads 16 --shm-mpi 0 --shm 2048 --device-mem 32768 | tee 4tile.log
-#mpiexec -launcher ssh -n 8 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.2.4 --grid 16.16.64.128 --accelerator-threads 16 --shm-mpi 0 --shm 2048 --device-mem 32768 | tee 8tile.log
-
-
--- a/systems/OEM/benchmarks/select_gpu.sh
+++ b/systems/OEM/benchmarks/select_gpu.sh
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-num_tile=2
-
-gpu_id=$(( (MPI_LOCAL_RANKID % num_tile ) ))
-tile_id=$((MPI_LOCAL_RANKID / num_tile))
-
-export ZE_AFFINITY_MASK=$gpu_id.$tile_id
-
-echo "local rank $MPI_LOCALRANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK"
-
-"$@"
-
--- a/systems/OEM/config-command
+++ b/systems/OEM/config-command
@@ -1,15 +0,0 @@
-../../configure \
-	--enable-simd=GPU \
-	--enable-gen-simd-width=64 \
-	--enable-comms=mpi-auto \
-	--enable-accelerator-cshift \
-	--disable-gparity \
-	--disable-fermion-reps \
-	--enable-shm=nvlink \
-	--enable-accelerator=sycl \
-	--enable-unified=no \
-	MPICXX=mpicxx \
-	CXX=icpx \
-	LDFLAGS="-fiopenmp  -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader " \
-	CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare "
-
--- a/systems/OEM/setup.sh
+++ b/systems/OEM/setup.sh
@@ -1,3 +0,0 @@
-export https_proxy=http://proxy-chain.intel.com:911
-module load intel-release
-module load intel/mpich
--- a/systems/Sunspot/benchmarks/bench.pbs
+++ b/systems/Sunspot/benchmarks/bench.pbs
@@ -1,46 +0,0 @@
-#!/bin/bash
-
-#PBS -l select=1:system=sunspot,place=scatter
-#PBS -A LatticeQCD_aesp_CNDA
-#PBS -l walltime=01:00:00
-#PBS -N dwf
-#PBS -k doe
-
-HDIR=/home/paboyle/
-module use /soft/testing/modulefiles/
-module load intel-UMD23.05.25593.11/23.05.25593.11
-module load tools/pti-gpu  
-export LD_LIBRARY_PATH=$HDIR/tools/lib64:$LD_LIBRARY_PATH
-export PATH=$HDIR/tools/bin:$PATH
-
-export TZ='/usr/share/zoneinfo/US/Central'
-export OMP_PROC_BIND=spread
-export OMP_NUM_THREADS=3
-unset OMP_PLACES
-
-cd $PBS_O_WORKDIR
-
-qsub jobscript.pbs
-
-echo Jobid: $PBS_JOBID
-echo Running on host `hostname`
-echo Running on nodes `cat $PBS_NODEFILE`
-
-echo NODES
-cat $PBS_NODEFILE
-NNODES=`wc -l < $PBS_NODEFILE`
-NRANKS=12         # Number of MPI ranks per node
-NDEPTH=4          # Number of hardware threads per rank, spacing between MPI ranks on a node
-NTHREADS=$OMP_NUM_THREADS # Number of OMP threads per rank, given to OMP_NUM_THREADS
-
-NTOTRANKS=$(( NNODES * NRANKS ))
-
-echo "NUM_NODES=${NNODES}  TOTAL_RANKS=${NTOTRANKS}  RANKS_PER_NODE=${NRANKS}  THREADS_PER_RANK=${OMP_NUM_THREADS}"
-echo "OMP_PROC_BIND=$OMP_PROC_BIND OMP_PLACES=$OMP_PLACES"
-
-    
-CMD="mpiexec -np ${NTOTRANKS} -ppn ${NRANKS} -d ${NDEPTH} --cpu-bind=depth -envall \
-	     ./gpu_tile_compact.sh \
-	./Benchmark_dwf_fp32 --mpi 1.1.2.6 --grid 16.32.64.192 --comms-overlap \
-	--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32"
-
--- a/systems/Sunspot/benchmarks/gpu_tile_compact.sh
+++ b/systems/Sunspot/benchmarks/gpu_tile_compact.sh
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-display_help() {
-  echo " Will map gpu tile to rank in compact and then round-robin fashion"
-  echo " Usage (only work for one node of ATS/PVC):"
-  echo "   mpiexec --np N gpu_tile_compact.sh ./a.out"
-  echo
-  echo " Example 3 GPU of 2 Tiles with 7 Ranks:"
-  echo "   0 Rank 0.0"
-  echo "   1 Rank 0.1"
-  echo "   2 Rank 1.0"
-  echo "   3 Rank 1.1"
-  echo "   4 Rank 2.0"
-  echo "   5 Rank 2.1"
-  echo "   6 Rank 0.0"
-  echo
-  echo " Hacked together by apl@anl.gov, please contact if bug found"
-  exit 1
-}
-
-#This give the exact GPU count i915 knows about and I use udev to only enumerate the devices with physical presence.
-#works? num_gpu=$(/usr/bin/udevadm info /sys/module/i915/drivers/pci\:i915/* |& grep -v Unknown | grep -c "P: /devices")
-num_gpu=6
-num_tile=2
-
-if [ "$#" -eq 0 ] || [ "$1" == "--help" ] || [ "$1" == "-h" ] || [ "$num_gpu" = 0 ]; then
-  display_help
-fi
-
-gpu_id=$(( (PALS_LOCAL_RANKID / num_tile ) % num_gpu ))
-tile_id=$((PALS_LOCAL_RANKID % num_tile))
-
-unset EnableWalkerPartition
-export EnableImplicitScaling=0
-export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
-export ZE_AFFINITY_MASK=$gpu_id.$tile_id
-export ONEAPI_DEVICE_FILTER=gpu,level_zero
-export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
-export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
-export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
-#export SYCL_PI_LEVEL_ZERO_USM_RESIDENT=1
-
-echo "rank $PALS_RANKID ; local rank $PALS_LOCAL_RANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK"
-
-if [ $PALS_LOCAL_RANKID = 0 ]
-then
-    onetrace --chrome-device-timeline "$@"
-#    "$@"
-else
-"$@"
-fi
--- a/systems/Sunspot/config-command
+++ b/systems/Sunspot/config-command
@@ -1,16 +0,0 @@
-TOOLS=$HOME/tools
-../../configure \
-	--enable-simd=GPU \
-	--enable-gen-simd-width=64 \
-	--enable-comms=mpi-auto \
-	--enable-accelerator-cshift \
-	--disable-gparity \
-	--disable-fermion-reps \
-	--enable-shm=nvlink \
-	--enable-accelerator=sycl \
-	--enable-unified=no \
-	MPICXX=mpicxx \
-	CXX=icpx \
-	LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -lapmidg -L$TOOLS/lib64/" \
-	CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -I$TOOLS/include"
-
--- a/tests/core/Test_fft_pf.cc
+++ b/tests/core/Test_fft_pf.cc
@@ -1,307 +0,0 @@
-    /*************************************************************************************
-
-    grid` physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./tests/Test_cshift.cc
-
-    Copyright (C) 2015
-
-Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/Grid.h>
-
-using namespace Grid;
-
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  int threads = GridThread::GetThreads();
-  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
-
-  Coordinate latt_size   = GridDefaultLatt();
-  Coordinate simd_layout( { vComplexD::Nsimd(),1,1,1});
-  Coordinate mpi_layout  = GridDefaultMpi();
-
-  int vol = 1;
-  for(int d=0;d<latt_size.size();d++){
-    vol = vol * latt_size[d];
-  }
-  GridCartesian         GRID(latt_size,simd_layout,mpi_layout);
-  GridRedBlackCartesian RBGRID(&GRID);
-
-  ComplexD ci(0.0,1.0);
-
-  std::vector<int> seeds({1,2,3,4});
-  GridSerialRNG          sRNG;  sRNG.SeedFixedIntegers(seeds); // naughty seeding
-  GridParallelRNG          pRNG(&GRID);
-  pRNG.SeedFixedIntegers(seeds);
-
-  LatticeGaugeFieldD Umu(&GRID);
-
-  SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
-
-  ////////////////////////////////////////////////////
-  // PF prop
-  ////////////////////////////////////////////////////
-  LatticeFermionD    src(&GRID);
-
-  gaussian(pRNG,src);
-#if 1
-    Coordinate point(4,0);
-    src=Zero();
-    SpinColourVectorD ferm; gaussian(sRNG,ferm);
-    pokeSite(ferm,src,point);
-#endif
-  
-  {
-    std::cout<<"****************************************"<<std::endl;
-    std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator \n";
-    std::cout<<"****************************************"<<std::endl;
-
-    //    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
-    LatticeFermionD    tmp(&GRID);
-    LatticeFermionD    ref(&GRID);
-    LatticeFermionD    diff(&GRID);
-
-    const int Ls=48+1;
-    GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
-    GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
-
-    RealD mass=0.1;
-    RealD M5  =0.8;
-    OverlapWilsonPartialFractionZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,0.001,8.0);
-
-    // Momentum space prop
-    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
-    bool fiveD = false; //calculate 4d free propagator
-
-    std::cout << " Free propagator " <<std::endl;
-    Dov.FreePropagator(src,ref,mass) ;
-    std::cout << " Free propagator norm "<< norm2(ref) <<std::endl;
-
-    Gamma G5(Gamma::Algebra::Gamma5);
-
-    LatticeFermionD    src5(FGrid); src5=Zero();
-    LatticeFermionD    tmp5(FGrid); 
-    LatticeFermionD    result5(FGrid); result5=Zero();
-    LatticeFermionD    result4(&GRID); 
-    const int sdir=0;
-
-    ////////////////////////////////////////////////////////////////////////
-    // Import
-    ////////////////////////////////////////////////////////////////////////
-    std::cout << " Free propagator Import "<< norm2(src) <<std::endl;
-    Dov.ImportPhysicalFermionSource  (src,src5);
-    std::cout << " Free propagator Imported "<< norm2(src5) <<std::endl;
-    
-    ////////////////////////////////////////////////////////////////////////
-    // Conjugate gradient on normal equations system
-    ////////////////////////////////////////////////////////////////////////
-    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
-    Dov.Mdag(src5,tmp5);
-    src5=tmp5;
-    MdagMLinearOperator<OverlapWilsonPartialFractionZolotarevFermionD,LatticeFermionD> HermOp(Dov);
-    ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
-    CG(HermOp,src5,result5);
-    ////////////////////////////////////////////////////////////////////////
-    // Domain wall physical field propagator
-    ////////////////////////////////////////////////////////////////////////
-    Dov.ExportPhysicalFermionSolution(result5,result4);
-
-    // From DWF4d.pdf :
-    //
-    // Dov_pf = 2/(1-m) D_cayley_ovlap  [ Page 43 ]
-    // Dinv_cayley_ovlap = 2/(1-m) Dinv_pf 
-    // Dinv_cayley_surface =1/(1-m) ( Dinv_cayley_ovlap - 1 ) =>  2/(1-m)^2 Dinv_pf - 1/(1-m) * src   [ Eq.2.67 ]
-
-    RealD scale = 2.0/(1.0-mass)/(1.0-mass);
-    result4 = result4 * scale;
-    result4 = result4 - src*(1.0/(1.0-mass)); // Subtract contact term
-    DumpSliceNorm("Src",src);
-    DumpSliceNorm("Grid",result4);
-    DumpSliceNorm("Fourier",ref);
-
-    std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
-    std::cout << "Dov ref     "<<norm2(ref)<<std::endl;
-
-    diff = result4- ref;
-    DumpSliceNorm("diff ",diff);
-    
-  }
-  
-  ////////////////////////////////////////////////////
-  // Dwf prop
-  ////////////////////////////////////////////////////
-  {
-    std::cout<<"****************************************"<<std::endl;
-    std::cout << "Testing Dov(Hw) Mom space 4d propagator \n";
-    std::cout<<"****************************************"<<std::endl;
-
-    LatticeFermionD    tmp(&GRID);
-    LatticeFermionD    ref(&GRID);
-    LatticeFermionD    diff(&GRID);
-
-    const int Ls=48;
-    GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
-    GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
-
-    RealD mass=0.1;
-    RealD M5  =0.8;
-
-    OverlapWilsonCayleyTanhFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,1.0);
-
-    // Momentum space prop
-    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
-    Dov.FreePropagator(src,ref,mass) ;
-
-    Gamma G5(Gamma::Algebra::Gamma5);
-
-    LatticeFermionD    src5(FGrid); src5=Zero();
-    LatticeFermionD    tmp5(FGrid); 
-    LatticeFermionD    result5(FGrid); result5=Zero();
-    LatticeFermionD    result4(&GRID); 
-    const int sdir=0;
-
-    ////////////////////////////////////////////////////////////////////////
-    // Domain wall physical field source; need D_minus
-    ////////////////////////////////////////////////////////////////////////
-    /*
-	chi_5[0]   = chiralProjectPlus(chi);
-	chi_5[Ls-1]= chiralProjectMinus(chi);
-    */      
-    tmp =   (src + G5*src)*0.5;      InsertSlice(tmp,src5,   0,sdir);
-    tmp =   (src - G5*src)*0.5;      InsertSlice(tmp,src5,Ls-1,sdir);
-    
-    ////////////////////////////////////////////////////////////////////////
-    // Conjugate gradient on normal equations system
-    ////////////////////////////////////////////////////////////////////////
-    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
-    Dov.Dminus(src5,tmp5);
-    src5=tmp5;
-    Dov.Mdag(src5,tmp5);
-    src5=tmp5;
-    MdagMLinearOperator<OverlapWilsonCayleyTanhFermionD,LatticeFermionD> HermOp(Dov);
-    ConjugateGradient<LatticeFermionD> CG(1.0e-16,10000);
-    CG(HermOp,src5,result5);
-    
-    ////////////////////////////////////////////////////////////////////////
-    // Domain wall physical field propagator
-    ////////////////////////////////////////////////////////////////////////
-    /*
-      psi  = chiralProjectMinus(psi_5[0]);
-      psi += chiralProjectPlus(psi_5[Ls-1]);
-    */
-    ExtractSlice(tmp,result5,0   ,sdir);   result4 =         (tmp-G5*tmp)*0.5;
-    ExtractSlice(tmp,result5,Ls-1,sdir);   result4 = result4+(tmp+G5*tmp)*0.5;
-    
-    std::cout << " Taking difference" <<std::endl;
-    std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
-    std::cout << "Dov ref     "<<norm2(ref)<<std::endl;
-    DumpSliceNorm("Grid",result4);
-    DumpSliceNorm("Fourier",ref);
-    diff = ref - result4;
-    std::cout << "result - ref     "<<norm2(diff)<<std::endl;
-    
-    DumpSliceNorm("diff",diff);
-
-  }
-
-  
-  {
-    std::cout<<"****************************************"<<std::endl;
-    std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator with q\n";
-    std::cout<<"****************************************"<<std::endl;
-
-    //    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
-    LatticeFermionD    tmp(&GRID);
-    LatticeFermionD    ref(&GRID);
-    LatticeFermionD    diff(&GRID);
-
-    const int Ls=48+1;
-    GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
-    GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
-
-    RealD mass=0.1;
-    RealD M5  =0.8;
-    OverlapWilsonPartialFractionZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,0.001,8.0);
-
-    // Momentum space prop
-    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
-    bool fiveD = false; //calculate 4d free propagator
-
-    std::cout << " Free propagator " <<std::endl;
-    Dov.FreePropagator(src,ref,mass) ;
-    std::cout << " Free propagator norm "<< norm2(ref) <<std::endl;
-
-    Gamma G5(Gamma::Algebra::Gamma5);
-
-    LatticeFermionD    src5(FGrid); src5=Zero();
-    LatticeFermionD    tmp5(FGrid); 
-    LatticeFermionD    result5(FGrid); result5=Zero();
-    LatticeFermionD    result4(&GRID); 
-    const int sdir=0;
-
-    ////////////////////////////////////////////////////////////////////////
-    // Import
-    ////////////////////////////////////////////////////////////////////////
-    std::cout << " Free propagator Import "<< norm2(src) <<std::endl;
-    Dov.ImportPhysicalFermionSource  (src,src5);
-    std::cout << " Free propagator Imported "<< norm2(src5) <<std::endl;
-    
-    ////////////////////////////////////////////////////////////////////////
-    // Conjugate gradient on normal equations system
-    ////////////////////////////////////////////////////////////////////////
-    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
-    Dov.Mdag(src5,tmp5);
-    src5=tmp5;
-    MdagMLinearOperator<OverlapWilsonPartialFractionZolotarevFermionD,LatticeFermionD> HermOp(Dov);
-    ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
-    CG(HermOp,src5,result5);
-    ////////////////////////////////////////////////////////////////////////
-    // Domain wall physical field propagator
-    ////////////////////////////////////////////////////////////////////////
-    Dov.ExportPhysicalFermionSolution(result5,result4);
-
-    // From DWF4d.pdf :
-    //
-    // Dov_pf = 2/(1-m) D_cayley_ovlap  [ Page 43 ]
-    // Dinv_cayley_ovlap = 2/(1-m) Dinv_pf 
-    // Dinv_cayley_surface =1/(1-m) ( Dinv_cayley_ovlap - 1 ) =>  2/(1-m)^2 Dinv_pf - 1/(1-m) * src   [ Eq.2.67 ]
-
-    RealD scale = 2.0/(1.0-mass)/(1.0-mass);
-    result4 = result4 * scale;
-    result4 = result4 - src*(1.0/(1.0-mass)); // Subtract contact term
-    DumpSliceNorm("Src",src);
-    DumpSliceNorm("Grid",result4);
-    DumpSliceNorm("Fourier",ref);
-
-    std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
-    std::cout << "Dov ref     "<<norm2(ref)<<std::endl;
-
-    diff = result4- ref;
-    DumpSliceNorm("diff ",diff);
-    
-  }
-
-  
-  Grid_finalize();
-}
Author	SHA1	Message	Date
Peter Boyle	09146cfc43	Profiling temporary code until optimised	2023-06-15 06:54:10 -04:00
Peter Boyle	a450e96827	Optional checkpoint smeared configs for FTHMC	2023-06-14 04:54:29 -04:00
Peter Boyle	0f3678b9be	Additional tests	2023-06-13 11:57:11 -04:00
Peter Boyle	8dd8338e14	Hot start should be properly Hot	2023-06-13 11:56:37 -04:00
Peter Boyle	11e0dc9851	Ta project	2023-06-13 11:56:11 -04:00
Peter Boyle	f4ef6dae43	Keep methods virtual	2023-06-13 11:55:05 -04:00
Peter Boyle	b6e147372b	Clean up	2023-06-13 11:54:11 -04:00
Peter Boyle	3a4a662dc6	Integrator over to smeared force structure	2023-06-13 11:53:38 -04:00
Peter Boyle	8d06bda6fb	Smeared action virtual class	2023-06-13 11:49:09 -04:00