mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-21 17:22:03 +01:00
Compare commits
6 Commits
b391af4489
...
e5bc679fef
Author | SHA1 | Date | |
---|---|---|---|
e5bc679fef | |||
f48298ad4e | |||
645e47c1ba | |||
d1d9827263 | |||
14643c0aab | |||
b77a9b8947 |
@ -129,6 +129,22 @@ public:
|
||||
virtual ~Action(){}
|
||||
};
|
||||
|
||||
template <class GaugeField >
|
||||
class EmptyAction : public Action <GaugeField>
|
||||
{
|
||||
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) { assert(0);}; // refresh pseudofermions
|
||||
virtual RealD S(const GaugeField& U) { return 0.0;}; // evaluate the action
|
||||
virtual void deriv(const GaugeField& U, GaugeField& dSdU) { assert(0); }; // evaluate the action derivative
|
||||
|
||||
///////////////////////////////
|
||||
// Logging
|
||||
///////////////////////////////
|
||||
virtual std::string action_name() { return std::string("Level Force Log"); };
|
||||
virtual std::string LogParameters() { return std::string("No parameters");};
|
||||
};
|
||||
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif // ACTION_BASE_H
|
||||
|
@ -87,6 +87,8 @@ public:
|
||||
|
||||
const ActionSet<Field, RepresentationPolicy> as;
|
||||
|
||||
ActionSet<Field,RepresentationPolicy> LevelForces;
|
||||
|
||||
//Get a pointer to a shared static instance of the "do-nothing" momentum filter to serve as a default
|
||||
static MomentumFilterBase<MomentaField> const* getDefaultMomFilter(){
|
||||
static MomentumFilterNone<MomentaField> filter;
|
||||
@ -124,6 +126,9 @@ public:
|
||||
// input U actually not used in the fundamental case
|
||||
// Fundamental updates, include smearing
|
||||
|
||||
assert(as.size()==LevelForces.size());
|
||||
|
||||
Field level_force(U.Grid()); level_force =Zero();
|
||||
for (int a = 0; a < as[level].actions.size(); ++a) {
|
||||
|
||||
double start_full = usecond();
|
||||
@ -145,6 +150,9 @@ public:
|
||||
|
||||
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<<" dt "<<ep<< std::endl;
|
||||
|
||||
// track the total
|
||||
level_force = level_force+force;
|
||||
|
||||
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
|
||||
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
|
||||
|
||||
@ -167,6 +175,16 @@ public:
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
// total force
|
||||
Real force_abs = std::sqrt(norm2(level_force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
|
||||
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
|
||||
|
||||
Real force_max = std::sqrt(maxLocalNorm2(level_force));
|
||||
Real impulse_max = force_max * ep * HMC_MOMENTUM_DENOMINATOR;
|
||||
LevelForces[level].actions.at(0)->deriv_log(force_abs,force_max,impulse_abs,impulse_max);
|
||||
}
|
||||
|
||||
// Force from the other representations
|
||||
as[level].apply(update_P_hireps, Representations, Mom, U, ep);
|
||||
|
||||
@ -216,6 +234,16 @@ public:
|
||||
|
||||
//Default the momentum filter to "do-nothing"
|
||||
MomFilter = getDefaultMomFilter();
|
||||
|
||||
for (int level = 0; level < as.size(); ++level) {
|
||||
int multiplier = as.at(level).multiplier;
|
||||
ActionLevel<Field> * Level = new ActionLevel<Field>(multiplier);
|
||||
Level->push_back(new EmptyAction<Field>);
|
||||
LevelForces.push_back(*Level);
|
||||
// does it copy by value or reference??
|
||||
// - answer it copies by value, BUT the action level contains a reference that is NOT updated.
|
||||
// Unsafe code in Guido's area
|
||||
}
|
||||
};
|
||||
|
||||
virtual ~Integrator() {}
|
||||
@ -233,10 +261,14 @@ public:
|
||||
|
||||
void reset_timer(void)
|
||||
{
|
||||
assert(as.size()==LevelForces.size());
|
||||
for (int level = 0; level < as.size(); ++level) {
|
||||
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
|
||||
as[level].actions.at(actionID)->reset_timer();
|
||||
}
|
||||
int actionID=0;
|
||||
assert(LevelForces.at(level).actions.size()==1);
|
||||
LevelForces.at(level).actions.at(actionID)->reset_timer();
|
||||
}
|
||||
}
|
||||
void print_timer(void)
|
||||
@ -298,6 +330,16 @@ public:
|
||||
<<" calls " << as[level].actions.at(actionID)->deriv_num
|
||||
<< std::endl;
|
||||
}
|
||||
int actionID=0;
|
||||
std::cout << GridLogMessage
|
||||
<< LevelForces[level].actions.at(actionID)->action_name()
|
||||
<<"["<<level<<"]["<< actionID<<"] :\n\t\t "
|
||||
<<" force max " << LevelForces[level].actions.at(actionID)->deriv_max_average()
|
||||
<<" norm " << LevelForces[level].actions.at(actionID)->deriv_norm_average()
|
||||
<<" Fdt max " << LevelForces[level].actions.at(actionID)->Fdt_max_average()
|
||||
<<" Fdt norm " << LevelForces[level].actions.at(actionID)->Fdt_norm_average()
|
||||
<<" calls " << LevelForces[level].actions.at(actionID)->deriv_num
|
||||
<< std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl;
|
||||
}
|
||||
@ -319,6 +361,13 @@ public:
|
||||
std::cout << as[level].actions.at(actionID)->LogParameters();
|
||||
}
|
||||
}
|
||||
std::cout << " [Integrator] Total Force loggers: "<< LevelForces.size() <<std::endl;
|
||||
for (int level = 0; level < LevelForces.size(); ++level) {
|
||||
std::cout << GridLogMessage << "[Integrator] ---- Level: "<< level << std::endl;
|
||||
for (int actionID = 0; actionID < LevelForces[level].actions.size(); ++actionID) {
|
||||
std::cout << GridLogMessage << "["<< LevelForces[level].actions.at(actionID)->action_name() << "] ID: " << actionID << std::endl;
|
||||
}
|
||||
}
|
||||
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl;
|
||||
}
|
||||
|
||||
@ -400,6 +449,7 @@ public:
|
||||
RealD S(Field& U)
|
||||
{ // here also U not used
|
||||
|
||||
assert(as.size()==LevelForces.size());
|
||||
std::cout << GridLogIntegrator << "Integrator action\n";
|
||||
|
||||
RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom
|
||||
|
@ -185,6 +185,7 @@ void Benchmark(int Ls, Coordinate Dirichlet)
|
||||
GaugeField Umu(UGrid);
|
||||
GaugeField UmuCopy(UGrid);
|
||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
// SU<Nc>::ColdConfiguration(Umu);
|
||||
UmuCopy=Umu;
|
||||
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||
|
||||
@ -307,6 +308,14 @@ void Benchmark(int Ls, Coordinate Dirichlet)
|
||||
if(( n2e>1.0e-4) ) {
|
||||
std::cout<<GridLogMessage << "WRONG RESULT" << std::endl;
|
||||
FGrid->Barrier();
|
||||
std::cout<<GridLogMessage << "RESULT" << std::endl;
|
||||
// std::cout << result<<std::endl;
|
||||
std::cout << norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "REF" << std::endl;
|
||||
std::cout << norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "ERR" << std::endl;
|
||||
std::cout << norm2(err)<<std::endl;
|
||||
FGrid->Barrier();
|
||||
exit(-1);
|
||||
}
|
||||
assert (n2e< 1.0e-4 );
|
||||
|
42
systems/SDCC-A100/bench.slurm
Normal file
42
systems/SDCC-A100/bench.slurm
Normal file
@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --partition csi
|
||||
#SBATCH --time=00:10:00
|
||||
#SBATCH -A csigeneral
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=4
|
||||
#SBATCH --qos csi
|
||||
#SBATCH --gres=gpu:4
|
||||
|
||||
source sourceme.sh
|
||||
|
||||
cat << EOF > select_gpu
|
||||
#!/bin/bash
|
||||
export GPU_MAP=(0 1 2 3)
|
||||
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
|
||||
export CUDA_VISIBLE_DEVICES=\$GPU
|
||||
unset ROCR_VISIBLE_DEVICES
|
||||
echo RANK \$SLURM_LOCALID using GPU \$GPU
|
||||
exec \$*
|
||||
EOF
|
||||
chmod +x ./select_gpu
|
||||
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export OMPI_MCA_btl=^uct,openib
|
||||
export UCX_TLS=cuda,gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc
|
||||
export UCX_RNDV_SCHEME=put_zcopy
|
||||
export UCX_RNDV_THRESH=16384
|
||||
export UCX_IB_GPU_DIRECT_RDMA=no
|
||||
export UCX_MEMTYPE_CACHE=n
|
||||
|
||||
export OMP_NUM_THREAD=8
|
||||
#srun -N1 -n1 nvidia-smi
|
||||
#srun -N1 -n1 numactl -H > numa.txt
|
||||
srun -N1 -n1 lstopo A100-topo.pdf
|
||||
|
||||
# 4.35 TF/s
|
||||
#srun -N1 -n1 ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 16.32.32.32 --shm 2048 --shm-mpi 0 --accelerator-threads 16
|
||||
|
||||
srun -N1 -n4 ./select_gpu ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.2.2 --grid 32.32.64.64 --shm 2048 --shm-mpi 0 --accelerator-threads 16
|
||||
|
17
systems/SDCC-A100/config-command
Normal file
17
systems/SDCC-A100/config-command
Normal file
@ -0,0 +1,17 @@
|
||||
../../configure \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-unified=no \
|
||||
--enable-shm=nvlink \
|
||||
--enable-accelerator=cuda \
|
||||
--enable-gen-simd-width=64 \
|
||||
--enable-simd=GPU \
|
||||
--disable-accelerator-cshift \
|
||||
--disable-fermion-reps \
|
||||
--disable-gparity \
|
||||
CXX=nvcc \
|
||||
MPICXX=mpicxx \
|
||||
LDFLAGS="-cudart shared " \
|
||||
CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared"
|
||||
|
||||
|
||||
|
2
systems/SDCC-A100/sourceme.sh
Normal file
2
systems/SDCC-A100/sourceme.sh
Normal file
@ -0,0 +1,2 @@
|
||||
module load cuda/12.2
|
||||
module load openmpi
|
6
systems/SDCC-ARM/config-command-mpi
Normal file
6
systems/SDCC-ARM/config-command-mpi
Normal file
@ -0,0 +1,6 @@
|
||||
HDF=$HOME/paboyle/install
|
||||
|
||||
LDFLAGS=-L$HDF/lib CXX=clang++ ../../configure --enable-simd=NEONv8 --enable-comms=none --enable-unified=yes --disable-fermion-reps --disable-gparity --disable-debug --with-hdf5=$HDF
|
||||
#LDFLAGS=-L$HDF/lib CXX=clang++ ../../configure --enable-simd=GEN --enable-comms=none --enable-unified=yes --disable-fermion-reps --disable-gparity --disable-debug --with-hdf5=$HDF
|
||||
|
||||
|
31
systems/SDCC-ICE/bench.slurm
Normal file
31
systems/SDCC-ICE/bench.slurm
Normal file
@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --partition lqcd
|
||||
#SBATCH --time=00:20:00
|
||||
#SBATCH -A lqcdtest
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=2
|
||||
#SBATCH --qos lqcd
|
||||
|
||||
source sourceme.sh
|
||||
|
||||
export OMP_NUM_THREAD=24
|
||||
#srun -N1 -n1 numactl -H > numa.txt
|
||||
#srun -N1 -n1 lstopo ice-topo.pdf
|
||||
|
||||
cat << EOF > select_socket
|
||||
#!/bin/bash
|
||||
export NUM_MAP=(0 1)
|
||||
export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]}
|
||||
exec \$*
|
||||
EOF
|
||||
chmod +x ./select_socket
|
||||
|
||||
#for vol in 8.8.8.16 8.8.8.32 8.8.8.64
|
||||
#for vol in 8.8.16.16 8.8.16.32 8.8.16.64
|
||||
for vol in 8.16.16.16 8.16.16.32 8.16.16.64 16.16.16.32 16.16.16.64 24.24.24.64 32.32.32.32
|
||||
do
|
||||
srun --cpu-bind=ldoms -N1 -n2 ./select_socket ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid $vol --dslash-asm > $vol.2socket.out
|
||||
srun --cpu-bind=ldoms -N1 -n1 ./select_socket ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid $vol --dslash-asm > $vol.1socket.out
|
||||
done
|
||||
|
19
systems/SDCC-ICE/config-command
Normal file
19
systems/SDCC-ICE/config-command
Normal file
@ -0,0 +1,19 @@
|
||||
../../configure \
|
||||
--enable-debug \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-unified=yes \
|
||||
--enable-shm=shmopen \
|
||||
--enable-shm-fast-path=shmopen \
|
||||
--enable-accelerator=none \
|
||||
--enable-simd=AVX512 \
|
||||
--disable-accelerator-cshift \
|
||||
--disable-fermion-reps \
|
||||
--disable-gparity \
|
||||
CXX=clang++ \
|
||||
MPICXX=mpicxx \
|
||||
LDFLAGS=-L/direct/sdcc+u/paboyle/spack/opt/spack/linux-almalinux8-icelake/gcc-8.5.0/hwloc-2.9.1-hgkscnt5pferhtde4ahctlupb6qf3vtl/lib/ \
|
||||
LIBS=-lhwloc \
|
||||
CXXFLAGS="-std=c++17"
|
||||
|
||||
|
||||
|
2
systems/SDCC-ICE/sourceme.sh
Normal file
2
systems/SDCC-ICE/sourceme.sh
Normal file
@ -0,0 +1,2 @@
|
||||
export LD_LIBRARY_PATH=/direct/sdcc+u/paboyle/spack/opt/spack/linux-almalinux8-icelake/gcc-8.5.0/llvm-12.0.1-agey6vtuw3e375rewhhobvkznjh5ltz4/lib/:$LD_LIBRARY_PATH
|
||||
module load openmpi
|
Reference in New Issue
Block a user