1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Compare commits

..

11 Commits

Author SHA1 Message Date
Peter Boyle
ee92e08edb
Merge pull request #435 from fjosw/fix/warnings_in_WilsonKernelsImplementation
Unused variable in WilsonKernelsImplementation
2023-06-23 11:47:19 -04:00
Peter Boyle
c1dcee9328
Merge pull request #437 from fjosw/fix/stencil_debug
Added GridLogDebug to BuildSurfaceList debug message
2023-06-23 11:47:00 -04:00
Peter Boyle
6b150961fe Better script 2023-06-23 18:09:25 +03:00
Peter Boyle
5bafcaedfa Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-06-22 19:59:45 +03:00
Peter Boyle
eacb66591f Config command 2023-06-22 19:56:40 +03:00
Peter Boyle
fadaa85626 Update 2023-06-22 19:56:27 +03:00
Peter Boyle
02a5b0d786 Updating run during testing 2023-06-22 19:52:46 +03:00
Peter Boyle
0e2141442a Dennis says broken 2023-06-22 19:19:51 +03:00
Peter Boyle
769eb0eecb Precision coverage 2023-06-22 19:19:20 +03:00
85e35c4da1
fix: added GridLogDebug to BuildSurfaceList debug message. 2023-06-16 10:31:16 +01:00
477b794bc5
fix: unused variable removed. 2023-05-29 14:08:53 +01:00
8 changed files with 82 additions and 33 deletions

View File

@ -423,7 +423,6 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier(); #define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
#define KERNEL_CALL_EXT(A) \ #define KERNEL_CALL_EXT(A) \
const uint64_t NN = Nsite*Ls; \
const uint64_t sz = st.surface_list.size(); \ const uint64_t sz = st.surface_list.size(); \
auto ptr = &st.surface_list[0]; \ auto ptr = &st.surface_list[0]; \
accelerator_forNB( ss, sz, Simd::Nsimd(), { \ accelerator_forNB( ss, sz, Simd::Nsimd(), { \

View File

@ -40,18 +40,20 @@ Lattice<iScalar<iScalar<iScalar<Vec> > > > Determinant(const Lattice<iScalar<iSc
GridBase *grid=Umu.Grid(); GridBase *grid=Umu.Grid();
auto lvol = grid->lSites(); auto lvol = grid->lSites();
Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid); Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid);
typedef typename Vec::scalar_type scalar;
autoView(Umu_v,Umu,CpuRead); autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite); autoView(ret_v,ret,CpuWrite);
thread_for(site,lvol,{ thread_for(site,lvol,{
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N); Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
Coordinate lcoor; Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor); grid->LocalIndexToLocalCoor(site, lcoor);
iScalar<iScalar<iMatrix<ComplexD, N> > > Us; iScalar<iScalar<iMatrix<scalar, N> > > Us;
peekLocalSite(Us, Umu_v, lcoor); peekLocalSite(Us, Umu_v, lcoor);
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
for(int j=0;j<N;j++){ for(int j=0;j<N;j++){
EigenU(i,j) = Us()()(i,j); scalar tmp= Us()()(i,j);
ComplexD ztmp(real(tmp),imag(tmp));
EigenU(i,j)=ztmp;
}} }}
ComplexD detD = EigenU.determinant(); ComplexD detD = EigenU.determinant();
typename Vec::scalar_type det(detD.real(),detD.imag()); typename Vec::scalar_type det(detD.real(),detD.imag());

View File

@ -705,7 +705,7 @@ public:
} }
} }
} }
std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl; std::cout << GridLogDebug << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
} }
/// Introduce a block structure and switch off comms on boundaries /// Introduce a block structure and switch off comms on boundaries
void DirichletBlock(const Coordinate &dirichlet_block) void DirichletBlock(const Coordinate &dirichlet_block)

View File

@ -55,7 +55,7 @@ template<class vtype, int N> accelerator_inline iVector<vtype, N> Exponentiate(c
// Specialisation: Cayley-Hamilton exponential for SU(3) // Specialisation: Cayley-Hamilton exponential for SU(3)
#ifndef GRID_ACCELERATED #if 0
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr> template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP ) accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
{ {

View File

@ -146,6 +146,8 @@ NAMESPACE_END(Grid);
int main(int argc, char **argv) { int main(int argc, char **argv) {
using namespace Grid; using namespace Grid;
std::cout << " Grid Initialise "<<std::endl;
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
CartesianCommunicator::BarrierWorld(); CartesianCommunicator::BarrierWorld();
@ -170,24 +172,24 @@ int main(int argc, char **argv) {
IntegratorParameters MD; IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper; // typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog"); // MD.name = std::string("Leap Frog");
typedef GenericHMCRunner<ForceGradient> HMCWrapper; // typedef GenericHMCRunner<ForceGradient> HMCWrapper;
MD.name = std::string("Force Gradient"); // MD.name = std::string("Force Gradient");
//typedef GenericHMCRunner<MinimumNorm2> HMCWrapper; typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
// MD.name = std::string("MinimumNorm2"); MD.name = std::string("MinimumNorm2");
// TrajL = 2 // TrajL = 2
// 4/2 => 0.6 dH // 4/2 => 0.6 dH
// 3/3 => 0.8 dH .. depth 3, slower // 3/3 => 0.8 dH .. depth 3, slower
//MD.MDsteps = 4; //MD.MDsteps = 4;
MD.MDsteps = 12; MD.MDsteps = 14;
MD.trajL = 0.5; MD.trajL = 0.5;
HMCparameters HMCparams; HMCparameters HMCparams;
HMCparams.StartTrajectory = 1077; HMCparams.StartTrajectory = 1077;
HMCparams.Trajectories = 1; HMCparams.Trajectories = 20;
HMCparams.NoMetropolisUntil= 0; HMCparams.NoMetropolisUntil= 0;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
// HMCparams.StartingType =std::string("ColdStart"); HMCparams.StartingType =std::string("ColdStart");
HMCparams.StartingType =std::string("CheckpointStart"); // HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.MD = MD; HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams); HMCWrapper TheHMC(HMCparams);
@ -223,7 +225,7 @@ int main(int argc, char **argv) {
Real pv_mass = 1.0; Real pv_mass = 1.0;
// std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 }); // Updated
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass }); // std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
auto GridPtr = TheHMC.Resources.GetCartesian(); auto GridPtr = TheHMC.Resources.GetCartesian();
@ -275,10 +277,10 @@ int main(int argc, char **argv) {
// double StoppingCondition = 1e-14; // double StoppingCondition = 1e-14;
// double MDStoppingCondition = 1e-9; // double MDStoppingCondition = 1e-9;
double StoppingCondition = 1e-8; double StoppingCondition = 1e-9;
double MDStoppingCondition = 1e-7; double MDStoppingCondition = 1e-8;
double MDStoppingConditionLoose = 1e-7; double MDStoppingConditionLoose = 1e-8;
double MDStoppingConditionStrange = 1e-7; double MDStoppingConditionStrange = 1e-8;
double MaxCGIterations = 300000; double MaxCGIterations = 300000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations); ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations); ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);

View File

@ -0,0 +1,44 @@
#!/bin/bash -l
#SBATCH --job-name=bench_lehner
#SBATCH --partition=small-g
#SBATCH --nodes=2
#SBATCH --ntasks-per-node=8
#SBATCH --cpus-per-task=7
#SBATCH --gpus-per-node=8
#SBATCH --time=00:10:00
#SBATCH --account=project_465000546
#SBATCH --gpu-bind=none
#SBATCH --exclusive
#SBATCH --mem=0
CPU_BIND="map_cpu:48,56,32,40,16,24,1,8"
echo $CPU_BIND
cat << EOF > select_gpu
#!/bin/bash
export GPU_MAP=(0 1 2 3 4 5 6 7)
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
export HIP_VISIBLE_DEVICES=\$GPU
unset ROCR_VISIBLE_DEVICES
echo RANK \$SLURM_LOCALID using GPU \$GPU
exec \$*
EOF
chmod +x ./select_gpu
root=/scratch/project_465000546/boylepet/Grid/systems/Lumi
source ${root}/sourceme.sh
export OMP_NUM_THREADS=7
export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
for vol in 16.16.16.64 32.32.32.64 32.32.32.128
do
srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.ov.$vol
#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.ov.$vol
srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.seq.$vol
#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol
done

View File

@ -3,30 +3,28 @@ spack load gmp
spack load mpfr spack load mpfr
CLIME=`spack find --paths c-lime | grep c-lime| cut -c 15-` CLIME=`spack find --paths c-lime | grep c-lime| cut -c 15-`
GMP=`spack find --paths gmp | grep gmp | cut -c 12-` GMP=`spack find --paths gmp | grep gmp | cut -c 12-`
MPFR=`spack find --paths mpfr | grep mpfr | cut -c 12-` MPFR=`spack find --paths mpfr | grep mpfr | cut -c 13-`
echo clime $CLIME echo clime X$CLIME
echo gmp $GMP echo gmp X$GMP
echo mpfr $MPFR echo mpfr X$MPFR
../../configure --enable-comms=mpi-auto \ ../../configure \
--enable-comms=mpi-auto \
--with-lime=$CLIME \ --with-lime=$CLIME \
--enable-unified=no \ --enable-unified=no \
--enable-shm=nvlink \ --enable-shm=nvlink \
--enable-tracing=timer \
--enable-accelerator=hip \ --enable-accelerator=hip \
--enable-gen-simd-width=64 \ --enable-gen-simd-width=64 \
--enable-simd=GPU \ --enable-simd=GPU \
--disable-accelerator-cshift \ --enable-accelerator-cshift \
--with-gmp=$OLCF_GMP_ROOT \ --with-gmp=$GMP \
--with-mpfr=$MPFR \
--with-fftw=$FFTW_DIR/.. \ --with-fftw=$FFTW_DIR/.. \
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
--disable-fermion-reps \ --disable-fermion-reps \
--disable-gparity \ --disable-gparity \
CXX=hipcc MPICXX=mpicxx \ CXX=hipcc MPICXX=mpicxx \
CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 --amdgpu-target=gfx90a" \ CXXFLAGS="-fPIC --offload-arch=gfx90a -I/opt/rocm/include/ -std=c++14 -I/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/include" \
LDFLAGS="-L/lib64 -L/opt/rocm-5.2.0/lib/ -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 --amdgpu-target=gfx90a " LDFLAGS="-L/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib -lmpi -L/opt/cray/pe/mpich/8.1.23/gtl/lib -lmpi_gtl_hsa -lamdhip64 -fopenmp"
#--enable-simd=GPU-RRII \

View File

@ -1 +1,5 @@
module load CrayEnv LUMI/22.12 partition/G cray-fftw/3.3.10.1 source ~/spack/share/spack/setup-env.sh
module load CrayEnv LUMI/22.12 partition/G cray-fftw/3.3.10.1 rocm
spack load c-lime
spack load gmp
spack load mpfr