Grid comms warmup sequence

Grid Tursa 32 node MPI layout change
Grid overflow fix
2023-02-03 20:59:20 +00:00 · 2023-02-03 20:59:02 +00:00 · 2023-02-03 20:58:41 +00:00
2 changed files with 25 additions and 5 deletions
--- a/Grid/Benchmark_Grid.cpp
+++ b/Grid/Benchmark_Grid.cpp
@ -168,6 +168,7 @@ class Benchmark
      }

      double dbytes;
+#define NWARMUP 50

      for (int dir = 0; dir < 8; dir++)
      {
@ -176,6 +177,24 @@ class Benchmark
        {

          std::vector<double> times(Nloop);
+          for (int i = 0; i < NWARMUP; i++)
+          {
+            int xmit_to_rank;
+            int recv_from_rank;
+
+            if (dir == mu)
+            {
+              int comm_proc = 1;
+              Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
+            }
+            else
+            {
+              int comm_proc = mpi_layout[mu] - 1;
+              Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
+            }
+            Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
+                                (void *)&rbuf[dir][0], recv_from_rank, bytes);
+          }
          for (int i = 0; i < Nloop; i++)
          {

@ -249,7 +268,6 @@ class Benchmark
    uint64_t NN;
    uint64_t lmax = 64;
 #define NLOOP (200 * lmax * lmax * lmax / lat / lat / lat)
-#define NWARMUP 50

    GridSerialRNG sRNG;
    sRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
@ -258,7 +276,8 @@ class Benchmark

      Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
                            lat * mpi_layout[3]});
-      uint64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
+      double vol =
+          static_cast<double>(latt_size[0]) * latt_size[1] * latt_size[2] * latt_size[3];

      GridCartesian Grid(latt_size, simd_layout, mpi_layout);

@ -328,7 +347,8 @@ class Benchmark

      Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
                            lat * mpi_layout[3]});
-      int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
+      double vol =
+          static_cast<double>(latt_size[0]) * latt_size[1] * latt_size[2] * latt_size[3];

      GridCartesian Grid(latt_size, simd_layout, mpi_layout);

--- a/Grid/systems/tursa/files/run.gpu.32nodes.sh
+++ b/Grid/systems/tursa/files/run.gpu.32nodes.sh
@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 # shellcheck disable=SC1091,SC2050,SC2170

-#SBATCH -J benchmark-grid-16
+#SBATCH -J benchmark-grid-32
 #SBATCH -t 1:00:00
 #SBATCH --nodes=32
 #SBATCH --ntasks=128
@ -48,7 +48,7 @@ mpirun -np "${SLURM_NTASKS}" -x LD_LIBRARY_PATH --bind-to none \
 	"${env_dir}/gpu-mpi-wrapper.sh" \
  "${app}" \
 	--json-out "${job_info_dir}/result.json" \
-	--mpi 2.4.4.4 \
+	--mpi 1.4.4.8 \
  --accelerator-threads 8 \
 	--threads 8 \
 	--shm 2048 &> "${job_info_dir}/log"
Author	SHA1	Message	Date
Antonin Portelli	ce0d4d9457	Grid comms warmup sequence	2023-02-03 20:59:20 +00:00
Antonin Portelli	cc4c0255bc	Grid Tursa 32 node MPI layout change	2023-02-03 20:59:02 +00:00
Antonin Portelli	bdfb94bf11	Grid overflow fix	2023-02-03 20:58:41 +00:00