1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-21 09:12:03 +01:00

Compare commits

...

7 Commits

11 changed files with 205 additions and 1 deletions

View File

@ -129,6 +129,22 @@ public:
virtual ~Action(){}
};
template <class GaugeField >
class EmptyAction : public Action <GaugeField>
{
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) { assert(0);}; // refresh pseudofermions
virtual RealD S(const GaugeField& U) { return 0.0;}; // evaluate the action
virtual void deriv(const GaugeField& U, GaugeField& dSdU) { assert(0); }; // evaluate the action derivative
///////////////////////////////
// Logging
///////////////////////////////
virtual std::string action_name() { return std::string("Level Force Log"); };
virtual std::string LogParameters() { return std::string("No parameters");};
};
NAMESPACE_END(Grid);
#endif // ACTION_BASE_H

View File

@ -86,8 +86,13 @@ public:
assert(ForceE.Checkerboard()==Even);
assert(ForceO.Checkerboard()==Odd);
#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)
acceleratorSetCheckerboard(Force,ForceE);
acceleratorSetCheckerboard(Force,ForceO);
#else
setCheckerboard(Force,ForceE);
setCheckerboard(Force,ForceO);
#endif
Force=-Force;
delete forcecb;
@ -130,8 +135,13 @@ public:
assert(ForceE.Checkerboard()==Even);
assert(ForceO.Checkerboard()==Odd);
#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)
acceleratorSetCheckerboard(Force,ForceE);
acceleratorSetCheckerboard(Force,ForceO);
#else
setCheckerboard(Force,ForceE);
setCheckerboard(Force,ForceO);
#endif
Force=-Force;
delete forcecb;

View File

@ -87,6 +87,8 @@ public:
const ActionSet<Field, RepresentationPolicy> as;
ActionSet<Field,RepresentationPolicy> LevelForces;
//Get a pointer to a shared static instance of the "do-nothing" momentum filter to serve as a default
static MomentumFilterBase<MomentaField> const* getDefaultMomFilter(){
static MomentumFilterNone<MomentaField> filter;
@ -124,6 +126,9 @@ public:
// input U actually not used in the fundamental case
// Fundamental updates, include smearing
assert(as.size()==LevelForces.size());
Field level_force(U.Grid()); level_force =Zero();
for (int a = 0; a < as[level].actions.size(); ++a) {
double start_full = usecond();
@ -145,6 +150,9 @@ public:
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<<" dt "<<ep<< std::endl;
// track the total
level_force = level_force+force;
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
@ -167,6 +175,16 @@ public:
}
{
// total force
Real force_abs = std::sqrt(norm2(level_force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
Real force_max = std::sqrt(maxLocalNorm2(level_force));
Real impulse_max = force_max * ep * HMC_MOMENTUM_DENOMINATOR;
LevelForces[level].actions.at(0)->deriv_log(force_abs,force_max,impulse_abs,impulse_max);
}
// Force from the other representations
as[level].apply(update_P_hireps, Representations, Mom, U, ep);
@ -216,6 +234,16 @@ public:
//Default the momentum filter to "do-nothing"
MomFilter = getDefaultMomFilter();
for (int level = 0; level < as.size(); ++level) {
int multiplier = as.at(level).multiplier;
ActionLevel<Field> * Level = new ActionLevel<Field>(multiplier);
Level->push_back(new EmptyAction<Field>);
LevelForces.push_back(*Level);
// does it copy by value or reference??
// - answer it copies by value, BUT the action level contains a reference that is NOT updated.
// Unsafe code in Guido's area
}
};
virtual ~Integrator() {}
@ -233,10 +261,14 @@ public:
void reset_timer(void)
{
assert(as.size()==LevelForces.size());
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
as[level].actions.at(actionID)->reset_timer();
}
int actionID=0;
assert(LevelForces.at(level).actions.size()==1);
LevelForces.at(level).actions.at(actionID)->reset_timer();
}
}
void print_timer(void)
@ -298,6 +330,16 @@ public:
<<" calls " << as[level].actions.at(actionID)->deriv_num
<< std::endl;
}
int actionID=0;
std::cout << GridLogMessage
<< LevelForces[level].actions.at(actionID)->action_name()
<<"["<<level<<"]["<< actionID<<"] :\n\t\t "
<<" force max " << LevelForces[level].actions.at(actionID)->deriv_max_average()
<<" norm " << LevelForces[level].actions.at(actionID)->deriv_norm_average()
<<" Fdt max " << LevelForces[level].actions.at(actionID)->Fdt_max_average()
<<" Fdt norm " << LevelForces[level].actions.at(actionID)->Fdt_norm_average()
<<" calls " << LevelForces[level].actions.at(actionID)->deriv_num
<< std::endl;
}
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl;
}
@ -319,6 +361,13 @@ public:
std::cout << as[level].actions.at(actionID)->LogParameters();
}
}
std::cout << " [Integrator] Total Force loggers: "<< LevelForces.size() <<std::endl;
for (int level = 0; level < LevelForces.size(); ++level) {
std::cout << GridLogMessage << "[Integrator] ---- Level: "<< level << std::endl;
for (int actionID = 0; actionID < LevelForces[level].actions.size(); ++actionID) {
std::cout << GridLogMessage << "["<< LevelForces[level].actions.at(actionID)->action_name() << "] ID: " << actionID << std::endl;
}
}
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl;
}
@ -400,6 +449,7 @@ public:
RealD S(Field& U)
{ // here also U not used
assert(as.size()==LevelForces.size());
std::cout << GridLogIntegrator << "Integrator action\n";
RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom

View File

@ -185,6 +185,7 @@ void Benchmark(int Ls, Coordinate Dirichlet)
GaugeField Umu(UGrid);
GaugeField UmuCopy(UGrid);
SU<Nc>::HotConfiguration(RNG4,Umu);
// SU<Nc>::ColdConfiguration(Umu);
UmuCopy=Umu;
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
@ -307,6 +308,14 @@ void Benchmark(int Ls, Coordinate Dirichlet)
if(( n2e>1.0e-4) ) {
std::cout<<GridLogMessage << "WRONG RESULT" << std::endl;
FGrid->Barrier();
std::cout<<GridLogMessage << "RESULT" << std::endl;
// std::cout << result<<std::endl;
std::cout << norm2(result)<<std::endl;
std::cout<<GridLogMessage << "REF" << std::endl;
std::cout << norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "ERR" << std::endl;
std::cout << norm2(err)<<std::endl;
FGrid->Barrier();
exit(-1);
}
assert (n2e< 1.0e-4 );

View File

@ -0,0 +1,42 @@
#!/bin/bash
#SBATCH --partition csi
#SBATCH --time=00:10:00
#SBATCH -A csigeneral
#SBATCH --exclusive
#SBATCH --nodes=1
#SBATCH --ntasks=4
#SBATCH --qos csi
#SBATCH --gres=gpu:4
source sourceme.sh
cat << EOF > select_gpu
#!/bin/bash
export GPU_MAP=(0 1 2 3)
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
export CUDA_VISIBLE_DEVICES=\$GPU
unset ROCR_VISIBLE_DEVICES
echo RANK \$SLURM_LOCALID using GPU \$GPU
exec \$*
EOF
chmod +x ./select_gpu
export OMP_NUM_THREADS=4
export OMPI_MCA_btl=^uct,openib
export UCX_TLS=cuda,gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc
export UCX_RNDV_SCHEME=put_zcopy
export UCX_RNDV_THRESH=16384
export UCX_IB_GPU_DIRECT_RDMA=no
export UCX_MEMTYPE_CACHE=n
export OMP_NUM_THREAD=8
#srun -N1 -n1 nvidia-smi
#srun -N1 -n1 numactl -H > numa.txt
srun -N1 -n1 lstopo A100-topo.pdf
# 4.35 TF/s
#srun -N1 -n1 ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 16.32.32.32 --shm 2048 --shm-mpi 0 --accelerator-threads 16
srun -N1 -n4 ./select_gpu ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.2.2 --grid 32.32.64.64 --shm 2048 --shm-mpi 0 --accelerator-threads 16

View File

@ -0,0 +1,17 @@
../../configure \
--enable-comms=mpi-auto \
--enable-unified=no \
--enable-shm=nvlink \
--enable-accelerator=cuda \
--enable-gen-simd-width=64 \
--enable-simd=GPU \
--disable-accelerator-cshift \
--disable-fermion-reps \
--disable-gparity \
CXX=nvcc \
MPICXX=mpicxx \
LDFLAGS="-cudart shared " \
CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared"

View File

@ -0,0 +1,2 @@
module load cuda/12.2
module load openmpi

View File

@ -0,0 +1,6 @@
HDF=$HOME/paboyle/install
LDFLAGS=-L$HDF/lib CXX=clang++ ../../configure --enable-simd=NEONv8 --enable-comms=none --enable-unified=yes --disable-fermion-reps --disable-gparity --disable-debug --with-hdf5=$HDF
#LDFLAGS=-L$HDF/lib CXX=clang++ ../../configure --enable-simd=GEN --enable-comms=none --enable-unified=yes --disable-fermion-reps --disable-gparity --disable-debug --with-hdf5=$HDF

View File

@ -0,0 +1,31 @@
#!/bin/bash
#SBATCH --partition lqcd
#SBATCH --time=00:20:00
#SBATCH -A lqcdtest
#SBATCH --exclusive
#SBATCH --nodes=1
#SBATCH --ntasks=2
#SBATCH --qos lqcd
source sourceme.sh
export OMP_NUM_THREAD=24
#srun -N1 -n1 numactl -H > numa.txt
#srun -N1 -n1 lstopo ice-topo.pdf
cat << EOF > select_socket
#!/bin/bash
export NUM_MAP=(0 1)
export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]}
exec \$*
EOF
chmod +x ./select_socket
#for vol in 8.8.8.16 8.8.8.32 8.8.8.64
#for vol in 8.8.16.16 8.8.16.32 8.8.16.64
for vol in 8.16.16.16 8.16.16.32 8.16.16.64 16.16.16.32 16.16.16.64 24.24.24.64 32.32.32.32
do
srun --cpu-bind=ldoms -N1 -n2 ./select_socket ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid $vol --dslash-asm > $vol.2socket.out
srun --cpu-bind=ldoms -N1 -n1 ./select_socket ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid $vol --dslash-asm > $vol.1socket.out
done

View File

@ -0,0 +1,19 @@
../../configure \
--enable-debug \
--enable-comms=mpi-auto \
--enable-unified=yes \
--enable-shm=shmopen \
--enable-shm-fast-path=shmopen \
--enable-accelerator=none \
--enable-simd=AVX512 \
--disable-accelerator-cshift \
--disable-fermion-reps \
--disable-gparity \
CXX=clang++ \
MPICXX=mpicxx \
LDFLAGS=-L/direct/sdcc+u/paboyle/spack/opt/spack/linux-almalinux8-icelake/gcc-8.5.0/hwloc-2.9.1-hgkscnt5pferhtde4ahctlupb6qf3vtl/lib/ \
LIBS=-lhwloc \
CXXFLAGS="-std=c++17"

View File

@ -0,0 +1,2 @@
export LD_LIBRARY_PATH=/direct/sdcc+u/paboyle/spack/opt/spack/linux-almalinux8-icelake/gcc-8.5.0/llvm-12.0.1-agey6vtuw3e375rewhhobvkznjh5ltz4/lib/:$LD_LIBRARY_PATH
module load openmpi