mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Compare commits
11 Commits
bfeceae708
...
ee92e08edb
Author | SHA1 | Date | |
---|---|---|---|
|
ee92e08edb | ||
|
c1dcee9328 | ||
|
6b150961fe | ||
|
5bafcaedfa | ||
|
eacb66591f | ||
|
fadaa85626 | ||
|
02a5b0d786 | ||
|
0e2141442a | ||
|
769eb0eecb | ||
85e35c4da1 | |||
477b794bc5 |
@ -423,7 +423,6 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
|||||||
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
|
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
|
||||||
|
|
||||||
#define KERNEL_CALL_EXT(A) \
|
#define KERNEL_CALL_EXT(A) \
|
||||||
const uint64_t NN = Nsite*Ls; \
|
|
||||||
const uint64_t sz = st.surface_list.size(); \
|
const uint64_t sz = st.surface_list.size(); \
|
||||||
auto ptr = &st.surface_list[0]; \
|
auto ptr = &st.surface_list[0]; \
|
||||||
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
|
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
|
||||||
|
@ -40,18 +40,20 @@ Lattice<iScalar<iScalar<iScalar<Vec> > > > Determinant(const Lattice<iScalar<iSc
|
|||||||
GridBase *grid=Umu.Grid();
|
GridBase *grid=Umu.Grid();
|
||||||
auto lvol = grid->lSites();
|
auto lvol = grid->lSites();
|
||||||
Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid);
|
Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid);
|
||||||
|
typedef typename Vec::scalar_type scalar;
|
||||||
autoView(Umu_v,Umu,CpuRead);
|
autoView(Umu_v,Umu,CpuRead);
|
||||||
autoView(ret_v,ret,CpuWrite);
|
autoView(ret_v,ret,CpuWrite);
|
||||||
thread_for(site,lvol,{
|
thread_for(site,lvol,{
|
||||||
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
|
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
|
||||||
Coordinate lcoor;
|
Coordinate lcoor;
|
||||||
grid->LocalIndexToLocalCoor(site, lcoor);
|
grid->LocalIndexToLocalCoor(site, lcoor);
|
||||||
iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
|
iScalar<iScalar<iMatrix<scalar, N> > > Us;
|
||||||
peekLocalSite(Us, Umu_v, lcoor);
|
peekLocalSite(Us, Umu_v, lcoor);
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
EigenU(i,j) = Us()()(i,j);
|
scalar tmp= Us()()(i,j);
|
||||||
|
ComplexD ztmp(real(tmp),imag(tmp));
|
||||||
|
EigenU(i,j)=ztmp;
|
||||||
}}
|
}}
|
||||||
ComplexD detD = EigenU.determinant();
|
ComplexD detD = EigenU.determinant();
|
||||||
typename Vec::scalar_type det(detD.real(),detD.imag());
|
typename Vec::scalar_type det(detD.real(),detD.imag());
|
||||||
|
@ -705,7 +705,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
|
std::cout << GridLogDebug << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
|
||||||
}
|
}
|
||||||
/// Introduce a block structure and switch off comms on boundaries
|
/// Introduce a block structure and switch off comms on boundaries
|
||||||
void DirichletBlock(const Coordinate &dirichlet_block)
|
void DirichletBlock(const Coordinate &dirichlet_block)
|
||||||
|
@ -55,7 +55,7 @@ template<class vtype, int N> accelerator_inline iVector<vtype, N> Exponentiate(c
|
|||||||
|
|
||||||
|
|
||||||
// Specialisation: Cayley-Hamilton exponential for SU(3)
|
// Specialisation: Cayley-Hamilton exponential for SU(3)
|
||||||
#ifndef GRID_ACCELERATED
|
#if 0
|
||||||
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
|
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
|
||||||
accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
|
accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
|
||||||
{
|
{
|
||||||
|
@ -146,6 +146,8 @@ NAMESPACE_END(Grid);
|
|||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
|
std::cout << " Grid Initialise "<<std::endl;
|
||||||
|
|
||||||
Grid_init(&argc, &argv);
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
CartesianCommunicator::BarrierWorld();
|
CartesianCommunicator::BarrierWorld();
|
||||||
@ -170,24 +172,24 @@ int main(int argc, char **argv) {
|
|||||||
IntegratorParameters MD;
|
IntegratorParameters MD;
|
||||||
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
|
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
|
||||||
// MD.name = std::string("Leap Frog");
|
// MD.name = std::string("Leap Frog");
|
||||||
typedef GenericHMCRunner<ForceGradient> HMCWrapper;
|
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
|
||||||
MD.name = std::string("Force Gradient");
|
// MD.name = std::string("Force Gradient");
|
||||||
//typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
||||||
// MD.name = std::string("MinimumNorm2");
|
MD.name = std::string("MinimumNorm2");
|
||||||
// TrajL = 2
|
// TrajL = 2
|
||||||
// 4/2 => 0.6 dH
|
// 4/2 => 0.6 dH
|
||||||
// 3/3 => 0.8 dH .. depth 3, slower
|
// 3/3 => 0.8 dH .. depth 3, slower
|
||||||
//MD.MDsteps = 4;
|
//MD.MDsteps = 4;
|
||||||
MD.MDsteps = 12;
|
MD.MDsteps = 14;
|
||||||
MD.trajL = 0.5;
|
MD.trajL = 0.5;
|
||||||
|
|
||||||
HMCparameters HMCparams;
|
HMCparameters HMCparams;
|
||||||
HMCparams.StartTrajectory = 1077;
|
HMCparams.StartTrajectory = 1077;
|
||||||
HMCparams.Trajectories = 1;
|
HMCparams.Trajectories = 20;
|
||||||
HMCparams.NoMetropolisUntil= 0;
|
HMCparams.NoMetropolisUntil= 0;
|
||||||
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
||||||
// HMCparams.StartingType =std::string("ColdStart");
|
HMCparams.StartingType =std::string("ColdStart");
|
||||||
HMCparams.StartingType =std::string("CheckpointStart");
|
// HMCparams.StartingType =std::string("CheckpointStart");
|
||||||
HMCparams.MD = MD;
|
HMCparams.MD = MD;
|
||||||
HMCWrapper TheHMC(HMCparams);
|
HMCWrapper TheHMC(HMCparams);
|
||||||
|
|
||||||
@ -223,7 +225,7 @@ int main(int argc, char **argv) {
|
|||||||
Real pv_mass = 1.0;
|
Real pv_mass = 1.0;
|
||||||
// std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
// std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 }); // Updated
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
||||||
|
|
||||||
auto GridPtr = TheHMC.Resources.GetCartesian();
|
auto GridPtr = TheHMC.Resources.GetCartesian();
|
||||||
@ -275,10 +277,10 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
// double StoppingCondition = 1e-14;
|
// double StoppingCondition = 1e-14;
|
||||||
// double MDStoppingCondition = 1e-9;
|
// double MDStoppingCondition = 1e-9;
|
||||||
double StoppingCondition = 1e-8;
|
double StoppingCondition = 1e-9;
|
||||||
double MDStoppingCondition = 1e-7;
|
double MDStoppingCondition = 1e-8;
|
||||||
double MDStoppingConditionLoose = 1e-7;
|
double MDStoppingConditionLoose = 1e-8;
|
||||||
double MDStoppingConditionStrange = 1e-7;
|
double MDStoppingConditionStrange = 1e-8;
|
||||||
double MaxCGIterations = 300000;
|
double MaxCGIterations = 300000;
|
||||||
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
||||||
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
|
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
|
||||||
|
44
systems/Lumi/benchmarks/bench2.slurm
Executable file
44
systems/Lumi/benchmarks/bench2.slurm
Executable file
@ -0,0 +1,44 @@
|
|||||||
|
#!/bin/bash -l
|
||||||
|
#SBATCH --job-name=bench_lehner
|
||||||
|
#SBATCH --partition=small-g
|
||||||
|
#SBATCH --nodes=2
|
||||||
|
#SBATCH --ntasks-per-node=8
|
||||||
|
#SBATCH --cpus-per-task=7
|
||||||
|
#SBATCH --gpus-per-node=8
|
||||||
|
#SBATCH --time=00:10:00
|
||||||
|
#SBATCH --account=project_465000546
|
||||||
|
#SBATCH --gpu-bind=none
|
||||||
|
#SBATCH --exclusive
|
||||||
|
#SBATCH --mem=0
|
||||||
|
|
||||||
|
CPU_BIND="map_cpu:48,56,32,40,16,24,1,8"
|
||||||
|
echo $CPU_BIND
|
||||||
|
|
||||||
|
cat << EOF > select_gpu
|
||||||
|
#!/bin/bash
|
||||||
|
export GPU_MAP=(0 1 2 3 4 5 6 7)
|
||||||
|
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
|
||||||
|
export HIP_VISIBLE_DEVICES=\$GPU
|
||||||
|
unset ROCR_VISIBLE_DEVICES
|
||||||
|
echo RANK \$SLURM_LOCALID using GPU \$GPU
|
||||||
|
exec \$*
|
||||||
|
EOF
|
||||||
|
|
||||||
|
chmod +x ./select_gpu
|
||||||
|
|
||||||
|
root=/scratch/project_465000546/boylepet/Grid/systems/Lumi
|
||||||
|
source ${root}/sourceme.sh
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=7
|
||||||
|
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||||
|
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
|
||||||
|
|
||||||
|
for vol in 16.16.16.64 32.32.32.64 32.32.32.128
|
||||||
|
do
|
||||||
|
srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.ov.$vol
|
||||||
|
#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.ov.$vol
|
||||||
|
|
||||||
|
srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.seq.$vol
|
||||||
|
#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol
|
||||||
|
done
|
||||||
|
|
@ -3,30 +3,28 @@ spack load gmp
|
|||||||
spack load mpfr
|
spack load mpfr
|
||||||
CLIME=`spack find --paths c-lime | grep c-lime| cut -c 15-`
|
CLIME=`spack find --paths c-lime | grep c-lime| cut -c 15-`
|
||||||
GMP=`spack find --paths gmp | grep gmp | cut -c 12-`
|
GMP=`spack find --paths gmp | grep gmp | cut -c 12-`
|
||||||
MPFR=`spack find --paths mpfr | grep mpfr | cut -c 12-`
|
MPFR=`spack find --paths mpfr | grep mpfr | cut -c 13-`
|
||||||
echo clime $CLIME
|
echo clime X$CLIME
|
||||||
echo gmp $GMP
|
echo gmp X$GMP
|
||||||
echo mpfr $MPFR
|
echo mpfr X$MPFR
|
||||||
|
|
||||||
../../configure --enable-comms=mpi-auto \
|
../../configure \
|
||||||
|
--enable-comms=mpi-auto \
|
||||||
--with-lime=$CLIME \
|
--with-lime=$CLIME \
|
||||||
--enable-unified=no \
|
--enable-unified=no \
|
||||||
--enable-shm=nvlink \
|
--enable-shm=nvlink \
|
||||||
--enable-tracing=timer \
|
|
||||||
--enable-accelerator=hip \
|
--enable-accelerator=hip \
|
||||||
--enable-gen-simd-width=64 \
|
--enable-gen-simd-width=64 \
|
||||||
--enable-simd=GPU \
|
--enable-simd=GPU \
|
||||||
--disable-accelerator-cshift \
|
--enable-accelerator-cshift \
|
||||||
--with-gmp=$OLCF_GMP_ROOT \
|
--with-gmp=$GMP \
|
||||||
|
--with-mpfr=$MPFR \
|
||||||
--with-fftw=$FFTW_DIR/.. \
|
--with-fftw=$FFTW_DIR/.. \
|
||||||
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
|
|
||||||
--disable-fermion-reps \
|
--disable-fermion-reps \
|
||||||
--disable-gparity \
|
--disable-gparity \
|
||||||
CXX=hipcc MPICXX=mpicxx \
|
CXX=hipcc MPICXX=mpicxx \
|
||||||
CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 --amdgpu-target=gfx90a" \
|
CXXFLAGS="-fPIC --offload-arch=gfx90a -I/opt/rocm/include/ -std=c++14 -I/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/include" \
|
||||||
LDFLAGS="-L/lib64 -L/opt/rocm-5.2.0/lib/ -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 --amdgpu-target=gfx90a "
|
LDFLAGS="-L/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib -lmpi -L/opt/cray/pe/mpich/8.1.23/gtl/lib -lmpi_gtl_hsa -lamdhip64 -fopenmp"
|
||||||
|
|
||||||
|
|
||||||
#--enable-simd=GPU-RRII \
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1 +1,5 @@
|
|||||||
module load CrayEnv LUMI/22.12 partition/G cray-fftw/3.3.10.1
|
source ~/spack/share/spack/setup-env.sh
|
||||||
|
module load CrayEnv LUMI/22.12 partition/G cray-fftw/3.3.10.1 rocm
|
||||||
|
spack load c-lime
|
||||||
|
spack load gmp
|
||||||
|
spack load mpfr
|
||||||
|
Loading…
Reference in New Issue
Block a user