mirror of
https://github.com/paboyle/Grid.git
synced 2026-06-04 19:24:36 +01:00
Some improvements that should have been there if in synch with develop,
and also some staggered hdcg type work
This commit is contained in:
@@ -1,76 +1,91 @@
|
||||
Per node summary table
|
||||
|
||||
L , Wilson, DWF4, Staggered, NaiveStag
|
||||
|
||||
8 , 90, 933, 38, 23
|
||||
12 , 403, 1688, 178, 113
|
||||
16 , 188, 1647, 449, 295
|
||||
24 , 947, 1574, 674, 553
|
||||
32 , 931, 1371, 718, 643
|
||||
|
||||
Memory Bandwidth
|
||||
|
||||
Bytes, GB/s per node
|
||||
6291456, 379.297050
|
||||
100663296, 3754.674992
|
||||
509607936, 6521.472413
|
||||
1610612736, 8513.456479
|
||||
3932160000, 9018.901766
|
||||
|
||||
|
||||
GEMM
|
||||
|
||||
M, N, K, BATCH, GF/s per rank
|
||||
16, 8, 16, 256, 0.564958
|
||||
16, 16, 16, 256, 243.148058
|
||||
16, 32, 16, 256, 440.346877
|
||||
32, 8, 32, 256, 439.194136
|
||||
32, 16, 32, 256, 847.334141
|
||||
32, 32, 32, 256, 1430.892623
|
||||
64, 8, 64, 256, 1242.756741
|
||||
64, 16, 64, 256, 2196.689493
|
||||
64, 32, 64, 256, 3697.458072
|
||||
16, 8, 256, 256, 899.582627
|
||||
16, 16, 256, 256, 1673.537756
|
||||
16, 32, 256, 256, 2959.597089
|
||||
32, 8, 256, 256, 1558.858630
|
||||
32, 16, 256, 256, 2864.839445
|
||||
32, 32, 256, 256, 4810.671254
|
||||
64, 8, 256, 256, 2386.092942
|
||||
64, 16, 256, 256, 4451.665937
|
||||
64, 32, 256, 256, 5942.124095
|
||||
8, 256, 16, 256, 799.867271
|
||||
16, 256, 16, 256, 1584.624888
|
||||
32, 256, 16, 256, 1949.422338
|
||||
8, 256, 32, 256, 1389.417474
|
||||
16, 256, 32, 256, 2668.344493
|
||||
32, 256, 32, 256, 3234.162120
|
||||
8, 256, 64, 256, 2150.925128
|
||||
16, 256, 64, 256, 4012.488132
|
||||
32, 256, 64, 256, 5154.785521
|
||||
|
||||
786432, 40.271620
|
||||
12582912, 433.611792
|
||||
63700992, 905.374321
|
||||
201326592, 1114.979152
|
||||
491520000, 1180.241898
|
||||
|
||||
|
||||
Communications
|
||||
|
||||
Packet bytes, direction, GB/s per node
|
||||
4718592, 1, 245.026198
|
||||
4718592, 2, 251.180996
|
||||
4718592, 3, 361.110977
|
||||
4718592, 5, 247.898447
|
||||
4718592, 6, 249.867523
|
||||
4718592, 7, 359.033061
|
||||
15925248, 1, 255.030946
|
||||
15925248, 2, 264.453890
|
||||
15925248, 3, 392.949183
|
||||
15925248, 5, 256.040644
|
||||
15925248, 6, 264.681896
|
||||
15925248, 7, 392.102622
|
||||
37748736, 1, 258.823333
|
||||
37748736, 2, 268.181577
|
||||
37748736, 3, 401.478191
|
||||
37748736, 5, 258.995363
|
||||
37748736, 6, 268.206586
|
||||
37748736, 7, 400.397611
|
||||
|
||||
|
||||
Per node summary table
|
||||
GEMM
|
||||
|
||||
M, N, K, BATCH, GF/s per rank fp64
|
||||
16, 8, 16, 4096, 693.316363
|
||||
16, 12, 16, 4096, 657.277058
|
||||
16, 16, 16, 4096, 711.992616
|
||||
32, 8, 32, 4096, 821.084324
|
||||
32, 12, 32, 4096, 1279.852719
|
||||
32, 16, 32, 4096, 2647.096674
|
||||
64, 8, 64, 4096, 2630.192325
|
||||
64, 12, 64, 4096, 3338.071321
|
||||
64, 16, 64, 4096, 3950.899281
|
||||
16, 8, 256, 4096, 1638.362501
|
||||
16, 12, 256, 4096, 2377.502234
|
||||
16, 16, 256, 4096, 3048.328833
|
||||
32, 8, 256, 4096, 2917.384276
|
||||
32, 12, 256, 4096, 4103.085151
|
||||
32, 16, 256, 4096, 5102.971860
|
||||
64, 8, 256, 4096, 3222.258206
|
||||
64, 12, 256, 4096, 4619.456391
|
||||
64, 16, 256, 4096, 5847.916650
|
||||
8, 256, 16, 4096, 1728.073337
|
||||
12, 256, 16, 4096, 2356.653970
|
||||
16, 256, 16, 4096, 2676.876038
|
||||
8, 256, 32, 4096, 2611.531990
|
||||
12, 256, 32, 4096, 3451.573106
|
||||
16, 256, 32, 4096, 3966.915301
|
||||
8, 256, 64, 4096, 3436.248737
|
||||
12, 256, 64, 4096, 4539.497945
|
||||
16, 256, 64, 4096, 5307.992323
|
||||
|
||||
|
||||
|
||||
GEMM
|
||||
|
||||
M, N, K, BATCH, GF/s per rank fp32
|
||||
16, 8, 16, 4096, 499.017445
|
||||
16, 12, 16, 4096, 731.543385
|
||||
16, 16, 16, 4096, 958.800786
|
||||
32, 8, 32, 4096, 1549.813550
|
||||
32, 12, 32, 4096, 2147.907502
|
||||
32, 16, 32, 4096, 2601.698596
|
||||
64, 8, 64, 4096, 3785.446233
|
||||
64, 12, 64, 4096, 5116.694843
|
||||
64, 16, 64, 4096, 6109.345016
|
||||
16, 8, 256, 4096, 1206.627737
|
||||
16, 12, 256, 4096, 1809.699599
|
||||
16, 16, 256, 4096, 2412.014053
|
||||
32, 8, 256, 4096, 2406.114488
|
||||
32, 12, 256, 4096, 3605.531907
|
||||
32, 16, 256, 4096, 4798.444037
|
||||
64, 8, 256, 4096, 4688.711196
|
||||
64, 12, 256, 4096, 6990.696301
|
||||
64, 16, 256, 4096, 9214.749925
|
||||
8, 256, 16, 4096, 2596.307289
|
||||
12, 256, 16, 4096, 3439.892562
|
||||
16, 256, 16, 4096, 3907.201036
|
||||
8, 256, 32, 4096, 3012.752067
|
||||
12, 256, 32, 4096, 3904.217583
|
||||
16, 256, 32, 4096, 4599.047092
|
||||
8, 256, 64, 4096, 3721.999042
|
||||
12, 256, 64, 4096, 5098.573927
|
||||
16, 256, 64, 4096, 6159.080872
|
||||
|
||||
L , Wilson, DWF4, Staggered, GF/s per node
|
||||
|
||||
8 , 155, 1386, 50
|
||||
12 , 694, 4208, 230
|
||||
16 , 1841, 6675, 609
|
||||
24 , 3934, 8573, 1641
|
||||
32 , 5083, 9771, 3086
|
||||
|
||||
|
||||
|
@@ -1,4 +1,3 @@
|
||||
CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
|
||||
../../configure --enable-comms=mpi-auto \
|
||||
--with-lime=$CLIME \
|
||||
--enable-unified=no \
|
||||
@@ -9,12 +8,13 @@ CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
|
||||
--disable-gparity \
|
||||
--disable-fermion-reps \
|
||||
--enable-simd=GPU \
|
||||
--with-gmp=$OLCF_GMP_ROOT \
|
||||
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
|
||||
--with-gmp=$GMP \
|
||||
--with-mpfr=$MPFR \
|
||||
--with-openssl=$OPENSSL \
|
||||
--disable-fermion-reps \
|
||||
CXX=hipcc MPICXX=mpicxx \
|
||||
CXXFLAGS="-fPIC -I${ROCM_PATH}/include/ -I${MPICH_DIR}/include -L/lib64 " \
|
||||
LDFLAGS="-L/lib64 -L${ROCM_PATH}/lib -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lhipblas -lrocblas -lhipfft"
|
||||
CXXFLAGS="-fPIC -I${ROCM_PATH}/include/ -I${MPICH_DIR}/include " \
|
||||
LDFLAGS="-L${ROCM_PATH}/lib -L${MPICH_DIR}/lib -lmpi -lmpi_gtl_hsa -lhipblas -lrocblas -lhipfft -lamdhip64"
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,16 +1,14 @@
|
||||
|
||||
echo spack
|
||||
. /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh
|
||||
. /autofs/nccs-svm1_home1/paboyle/spack/share/spack/setup-env.sh
|
||||
|
||||
module load amd/7.0.2
|
||||
module load cray-fftw
|
||||
module load craype-accel-amd-gfx90a
|
||||
mkdir $HOME/LD_PATH
|
||||
ln -s /opt/rocm-6.4.2/lib/libamdhip* $HOME/LD_PATH
|
||||
export CLIME=`spack find --paths c-lime | grep ^c-lime | awk '{print $2}' `
|
||||
export MPFR=`spack find --paths mpfr | grep ^mpfr | awk '{print $2}' `
|
||||
export OPENSSL=`spack find --paths openssl | grep openssl | awk '{print $2}' `
|
||||
export GMP=`spack find --paths gmp | grep ^gmp | awk '{print $2}' `
|
||||
|
||||
#Ugly hacks to get down level software working on current system
|
||||
export LD_LIBRARY_PATH=/opt/cray/libfabric/1.20.1/lib64/:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH
|
||||
#export LD_LIBRARY_PATH=`pwd`/:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/LD_PATH/
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm-7.0.2/lib
|
||||
module load cce/21.0.0
|
||||
module load cpe/26.03
|
||||
module load rocm/7.0.2
|
||||
export LD_LIBRARY_PATH=$CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH=/opt/rocm-7.0.2/lib/llvm/lib/:$LD_LIBRARY_PATH
|
||||
|
||||
Reference in New Issue
Block a user