Profiling temporary code until optimised

Optional checkpoint smeared configs for FTHMC
Additional tests
2026-01-26 19:34:44 +00:00 · 2023-06-15 06:54:10 -04:00 · 2023-06-14 04:54:29 -04:00 · 2023-06-13 11:57:11 -04:00 · 2023-06-13 11:56:37 -04:00 · 2023-06-13 11:56:11 -04:00
4 changed files with 0 additions and 338 deletions
--- a/HMC/FTHMC2p1f.cc
+++ b/HMC/FTHMC2p1f.cc
@@ -1,224 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid
-
-Copyright (C) 2023
-
-Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution
-directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Grid/Grid.h>
-#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
-#include <Grid/qcd/smearing/JacobianAction.h>
-
-using namespace Grid;
-
-int main(int argc, char **argv)
-{
-  std::cout << std::setprecision(12);
-  
-  Grid_init(&argc, &argv);
-  int threads = GridThread::GetThreads();
-  // here make a routine to print all the relevant information on the run
-  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
-
-   // Typedefs to simplify notation
-  typedef WilsonImplR FermionImplPolicy;
-  typedef MobiusFermionD FermionAction;
-  typedef typename FermionAction::FermionField FermionField;
-
-  typedef Grid::XmlReader       Serialiser;
-
-  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
-  IntegratorParameters MD;
-  //  typedef GenericHMCRunner<LeapFrog> HMCWrapper;
-  //  MD.name    = std::string("Leap Frog");
-  //  typedef GenericHMCRunner<ForceGradient> HMCWrapper;
-  //  MD.name    = std::string("Force Gradient");
-  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
-  MD.name    = std::string("MinimumNorm2");
-  MD.MDsteps = 12;
-  MD.trajL   = 1.0;
-
-  HMCparameters HMCparams;
-  HMCparams.StartTrajectory  = 0;
-  HMCparams.Trajectories     = 200;
-  HMCparams.NoMetropolisUntil=  20;
-  // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
-  HMCparams.StartingType     =std::string("HotStart");
-  HMCparams.MD = MD;
-  HMCWrapper TheHMC(HMCparams);
-
-  // Grid from the command line arguments --grid and --mpi
-  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
-
-  CheckpointerParameters CPparams;
-  CPparams.config_prefix = "ckpoint_EODWF_lat";
-  CPparams.smeared_prefix = "ckpoint_EODWF_lat_smr";
-  CPparams.rng_prefix    = "ckpoint_EODWF_rng";
-  CPparams.saveInterval  = 1;
-  CPparams.saveSmeared   = true;
-  CPparams.format        = "IEEE64BIG";
-  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
-
-  RNGModuleParameters RNGpar;
-  RNGpar.serial_seeds = "1 2 3 4 5";
-  RNGpar.parallel_seeds = "6 7 8 9 10";
-  TheHMC.Resources.SetRNGSeeds(RNGpar);
-
-  // Construct observables
-  // here there is too much indirection
-  typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
-  TheHMC.Resources.AddObservable<PlaqObs>();
-  //////////////////////////////////////////////
-
-  const int Ls      = 16;
-  Real beta         = 2.13;
-  Real light_mass   = 0.01;
-  Real strange_mass = 0.04;
-  Real pv_mass      = 1.0;
-  RealD M5  = 1.8;
-  RealD b   = 1.0; // Scale factor two
-  RealD c   = 0.0;
-
-  OneFlavourRationalParams OFRp;
-  OFRp.lo       = 1.0e-2;
-  OFRp.hi       = 64;
-  OFRp.MaxIter  = 10000;
-  OFRp.tolerance= 1.0e-10;
-  OFRp.degree   = 14;
-  OFRp.precision= 40;
-
-  std::vector<Real> hasenbusch({ 0.1 });
-
-  auto GridPtr   = TheHMC.Resources.GetCartesian();
-  auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
-  auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
-  auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
-
-  IwasakiGaugeActionR GaugeAction(beta);
-
-  // temporarily need a gauge field
-  LatticeGaugeField U(GridPtr);
-  LatticeGaugeField Uhot(GridPtr);
-
-  // These lines are unecessary if BC are all periodic
-  std::vector<Complex> boundary = {1,1,1,-1};
-  FermionAction::ImplParams Params(boundary);
-
-  double StoppingCondition = 1e-10;
-  double MaxCGIterations = 30000;
-  ConjugateGradient<FermionField>  CG(StoppingCondition,MaxCGIterations);
-
-  bool ApplySmearing = true;
-  
-  ////////////////////////////////////
-  // Collect actions
-  ////////////////////////////////////
-  ActionLevel<HMCWrapper::Field> Level1(1);
-  ActionLevel<HMCWrapper::Field> Level2(2);
-  ActionLevel<HMCWrapper::Field> Level3(4);
-
-  ////////////////////////////////////
-  // Strange action
-  ////////////////////////////////////
-
-  MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
-  MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c);
-  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> 
-    EOFA(Strange_Op_L, Strange_Op_R, 
-	 CG,
-	 CG, CG,
-	 CG, CG, 
-	 OFRp, false);
-
-  EOFA.is_smeared = ApplySmearing;
-  Level1.push_back(&EOFA);
-
-  ////////////////////////////////////
-  // up down action
-  ////////////////////////////////////
-  std::vector<Real> light_den;
-  std::vector<Real> light_num;
-
-  int n_hasenbusch = hasenbusch.size();
-  light_den.push_back(light_mass);
-  for(int h=0;h<n_hasenbusch;h++){
-    light_den.push_back(hasenbusch[h]);
-    light_num.push_back(hasenbusch[h]);
-  }
-  light_num.push_back(pv_mass);
-
-  std::vector<FermionAction *> Numerators;
-  std::vector<FermionAction *> Denominators;
-  std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
-
-  for(int h=0;h<n_hasenbusch+1;h++){
-    std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl;
-    Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
-    Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
-    Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG));
-  }
-
-  for(int h=0;h<n_hasenbusch+1;h++){
-    Quotients[h]->is_smeared = ApplySmearing;
-    Level1.push_back(Quotients[h]);
-  }
-
-  /////////////////////////////////////////////////////////////
-  // lnDetJacobianAction
-  /////////////////////////////////////////////////////////////
-  double rho = 0.1;  // smearing parameter
-  int Nsmear = 1;    // number of smearing levels - must be multiple of 2Nd
-  int Nstep  = 8*Nsmear;    // number of smearing levels - must be multiple of 2Nd
-  Smear_Stout<HMCWrapper::ImplPolicy> Stout(rho);
-  SmearedConfigurationMasked<HMCWrapper::ImplPolicy> SmearingPolicy(GridPtr, Nstep, Stout);
-  JacobianAction<HMCWrapper::ImplPolicy> Jacobian(&SmearingPolicy);
-  if( ApplySmearing ) Level2.push_back(&Jacobian);
-  std::cout << GridLogMessage << " Built the Jacobian "<< std::endl;
-
-
-  /////////////////////////////////////////////////////////////
-  // Gauge action
-  /////////////////////////////////////////////////////////////
-  //  GaugeAction.is_smeared = ApplySmearing;
-  GaugeAction.is_smeared = true;
-  Level3.push_back(&GaugeAction);
-
-  std::cout << GridLogMessage << " ************************************************"<< std::endl;
-  std::cout << GridLogMessage << " Action complete -- NO FERMIONS FOR NOW -- FIXME"<< std::endl;
-  std::cout << GridLogMessage << " ************************************************"<< std::endl;
-  std::cout << GridLogMessage <<  std::endl;
-  std::cout << GridLogMessage <<  std::endl;
-
-
-  std::cout << GridLogMessage << " Running the FT HMC "<< std::endl;
-
-  TheHMC.TheAction.push_back(Level1);
-  TheHMC.TheAction.push_back(Level2);
-  TheHMC.TheAction.push_back(Level3);
-
-  TheHMC.Run(SmearingPolicy); // for smearing
-
-  Grid_finalize();
-} // main
-
-
-
--- a/systems/Sunspot/benchmarks/bench.pbs
+++ b/systems/Sunspot/benchmarks/bench.pbs
@@ -1,46 +0,0 @@
-#!/bin/bash
-
-#PBS -l select=1:system=sunspot,place=scatter
-#PBS -A LatticeQCD_aesp_CNDA
-#PBS -l walltime=01:00:00
-#PBS -N dwf
-#PBS -k doe
-
-HDIR=/home/paboyle/
-module use /soft/testing/modulefiles/
-module load intel-UMD23.05.25593.11/23.05.25593.11
-module load tools/pti-gpu  
-export LD_LIBRARY_PATH=$HDIR/tools/lib64:$LD_LIBRARY_PATH
-export PATH=$HDIR/tools/bin:$PATH
-
-export TZ='/usr/share/zoneinfo/US/Central'
-export OMP_PROC_BIND=spread
-export OMP_NUM_THREADS=3
-unset OMP_PLACES
-
-cd $PBS_O_WORKDIR
-
-qsub jobscript.pbs
-
-echo Jobid: $PBS_JOBID
-echo Running on host `hostname`
-echo Running on nodes `cat $PBS_NODEFILE`
-
-echo NODES
-cat $PBS_NODEFILE
-NNODES=`wc -l < $PBS_NODEFILE`
-NRANKS=12         # Number of MPI ranks per node
-NDEPTH=4          # Number of hardware threads per rank, spacing between MPI ranks on a node
-NTHREADS=$OMP_NUM_THREADS # Number of OMP threads per rank, given to OMP_NUM_THREADS
-
-NTOTRANKS=$(( NNODES * NRANKS ))
-
-echo "NUM_NODES=${NNODES}  TOTAL_RANKS=${NTOTRANKS}  RANKS_PER_NODE=${NRANKS}  THREADS_PER_RANK=${OMP_NUM_THREADS}"
-echo "OMP_PROC_BIND=$OMP_PROC_BIND OMP_PLACES=$OMP_PLACES"
-
-    
-CMD="mpiexec -np ${NTOTRANKS} -ppn ${NRANKS} -d ${NDEPTH} --cpu-bind=depth -envall \
-	     ./gpu_tile_compact.sh \
-	./Benchmark_dwf_fp32 --mpi 1.1.2.6 --grid 16.32.64.192 --comms-overlap \
-	--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32"
-
--- a/systems/Sunspot/benchmarks/gpu_tile_compact.sh
+++ b/systems/Sunspot/benchmarks/gpu_tile_compact.sh
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-display_help() {
-  echo " Will map gpu tile to rank in compact and then round-robin fashion"
-  echo " Usage (only work for one node of ATS/PVC):"
-  echo "   mpiexec --np N gpu_tile_compact.sh ./a.out"
-  echo
-  echo " Example 3 GPU of 2 Tiles with 7 Ranks:"
-  echo "   0 Rank 0.0"
-  echo "   1 Rank 0.1"
-  echo "   2 Rank 1.0"
-  echo "   3 Rank 1.1"
-  echo "   4 Rank 2.0"
-  echo "   5 Rank 2.1"
-  echo "   6 Rank 0.0"
-  echo
-  echo " Hacked together by apl@anl.gov, please contact if bug found"
-  exit 1
-}
-
-#This give the exact GPU count i915 knows about and I use udev to only enumerate the devices with physical presence.
-#works? num_gpu=$(/usr/bin/udevadm info /sys/module/i915/drivers/pci\:i915/* |& grep -v Unknown | grep -c "P: /devices")
-num_gpu=6
-num_tile=2
-
-if [ "$#" -eq 0 ] || [ "$1" == "--help" ] || [ "$1" == "-h" ] || [ "$num_gpu" = 0 ]; then
-  display_help
-fi
-
-gpu_id=$(( (PALS_LOCAL_RANKID / num_tile ) % num_gpu ))
-tile_id=$((PALS_LOCAL_RANKID % num_tile))
-
-unset EnableWalkerPartition
-export EnableImplicitScaling=0
-export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
-export ZE_AFFINITY_MASK=$gpu_id.$tile_id
-export ONEAPI_DEVICE_FILTER=gpu,level_zero
-export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
-export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
-export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
-#export SYCL_PI_LEVEL_ZERO_USM_RESIDENT=1
-
-echo "rank $PALS_RANKID ; local rank $PALS_LOCAL_RANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK"
-
-if [ $PALS_LOCAL_RANKID = 0 ]
-then
-    onetrace --chrome-device-timeline "$@"
-#    "$@"
-else
-"$@"
-fi
--- a/systems/Sunspot/config-command
+++ b/systems/Sunspot/config-command
@@ -1,16 +0,0 @@
-TOOLS=$HOME/tools
-../../configure \
-	--enable-simd=GPU \
-	--enable-gen-simd-width=64 \
-	--enable-comms=mpi-auto \
-	--enable-accelerator-cshift \
-	--disable-gparity \
-	--disable-fermion-reps \
-	--enable-shm=nvlink \
-	--enable-accelerator=sycl \
-	--enable-unified=no \
-	MPICXX=mpicxx \
-	CXX=icpx \
-	LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -lapmidg -L$TOOLS/lib64/" \
-	CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -I$TOOLS/include"
-
Author	SHA1	Message	Date
Peter Boyle	09146cfc43	Profiling temporary code until optimised	2023-06-15 06:54:10 -04:00
Peter Boyle	a450e96827	Optional checkpoint smeared configs for FTHMC	2023-06-14 04:54:29 -04:00
Peter Boyle	0f3678b9be	Additional tests	2023-06-13 11:57:11 -04:00
Peter Boyle	8dd8338e14	Hot start should be properly Hot	2023-06-13 11:56:37 -04:00
Peter Boyle	11e0dc9851	Ta project	2023-06-13 11:56:11 -04:00
Peter Boyle	f4ef6dae43	Keep methods virtual	2023-06-13 11:55:05 -04:00
Peter Boyle	b6e147372b	Clean up	2023-06-13 11:54:11 -04:00
Peter Boyle	3a4a662dc6	Integrator over to smeared force structure	2023-06-13 11:53:38 -04:00
Peter Boyle	8d06bda6fb	Smeared action virtual class	2023-06-13 11:49:09 -04:00