2023-01-13 19:00:20 +00:00
|
|
|
/*
|
|
|
|
Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
|
|
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
2023-01-28 15:26:03 +00:00
|
|
|
Copyright © 2022 Simon Buerger <simon.buerger@rwth-aachen.de>
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-01-28 14:18:48 +00:00
|
|
|
This is a fork of Benchmark_ITT.cpp from Grid
|
2023-01-24 18:44:47 +00:00
|
|
|
|
2023-01-13 19:00:20 +00:00
|
|
|
This program is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU General Public License
|
|
|
|
as published by the Free Software Foundation; either version 2
|
|
|
|
of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2023-01-26 18:57:33 +00:00
|
|
|
#include "Common.hpp"
|
2023-01-27 00:18:14 +00:00
|
|
|
#include "json.hpp"
|
2023-01-13 19:00:20 +00:00
|
|
|
#include <Grid/Grid.h>
|
|
|
|
|
|
|
|
using namespace Grid;
|
|
|
|
|
|
|
|
int NN_global;
|
|
|
|
|
2023-01-27 00:18:14 +00:00
|
|
|
nlohmann::json json_results;
|
|
|
|
|
2023-01-13 19:00:20 +00:00
|
|
|
struct time_statistics
|
|
|
|
{
|
|
|
|
double mean;
|
|
|
|
double err;
|
|
|
|
double min;
|
|
|
|
double max;
|
|
|
|
|
|
|
|
void statistics(std::vector<double> v)
|
|
|
|
{
|
|
|
|
double sum = std::accumulate(v.begin(), v.end(), 0.0);
|
|
|
|
mean = sum / v.size();
|
|
|
|
|
|
|
|
std::vector<double> diff(v.size());
|
2023-01-25 17:47:40 +00:00
|
|
|
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
2023-01-13 19:00:20 +00:00
|
|
|
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
|
|
|
err = std::sqrt(sq_sum / (v.size() * (v.size() - 1)));
|
|
|
|
|
|
|
|
auto result = std::minmax_element(v.begin(), v.end());
|
|
|
|
min = *result.first;
|
|
|
|
max = *result.second;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct controls
|
|
|
|
{
|
|
|
|
int Opt;
|
|
|
|
int CommsOverlap;
|
|
|
|
Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch;
|
|
|
|
};
|
|
|
|
|
|
|
|
class Benchmark
|
|
|
|
{
|
2023-01-24 18:44:47 +00:00
|
|
|
public:
|
2023-01-13 19:00:20 +00:00
|
|
|
static void Decomposition(void)
|
|
|
|
{
|
2023-01-30 17:43:35 +00:00
|
|
|
nlohmann::json tmp;
|
2023-01-13 19:00:20 +00:00
|
|
|
int threads = GridThread::GetThreads();
|
2023-01-30 17:43:35 +00:00
|
|
|
Grid::Coordinate mpi = GridDefaultMpi();
|
|
|
|
assert(mpi.size() == 4);
|
|
|
|
Coordinate local({8, 8, 8, 8});
|
|
|
|
Coordinate latt4(
|
|
|
|
{local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
|
|
|
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(
|
|
|
|
latt4, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
|
2023-04-12 11:40:39 +01:00
|
|
|
Grid::Coordinate shm;
|
|
|
|
GlobalSharedMemory::GetShmDims(mpi, shm);
|
2023-01-30 17:43:35 +00:00
|
|
|
|
|
|
|
uint64_t NP = TmpGrid->RankCount();
|
|
|
|
uint64_t NN = TmpGrid->NodeCount();
|
|
|
|
NN_global = NN;
|
|
|
|
uint64_t SHM = NP / NN;
|
|
|
|
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-13 19:00:20 +00:00
|
|
|
std::cout << GridLogMessage << "Grid Default Decomposition patterns\n";
|
2023-01-28 14:18:48 +00:00
|
|
|
grid_small_sep();
|
2023-01-30 17:43:35 +00:00
|
|
|
std::cout << GridLogMessage << "* OpenMP threads : " << GridThread::GetThreads()
|
2023-01-25 17:47:40 +00:00
|
|
|
<< std::endl;
|
2023-01-30 17:43:35 +00:00
|
|
|
|
2023-04-12 11:40:39 +01:00
|
|
|
std::cout << GridLogMessage << "* MPI layout : " << GridCmdVectorIntToString(mpi)
|
|
|
|
<< std::endl;
|
|
|
|
std::cout << GridLogMessage << "* Shm layout : " << GridCmdVectorIntToString(shm)
|
2023-01-25 17:47:40 +00:00
|
|
|
<< std::endl;
|
2023-01-30 17:43:35 +00:00
|
|
|
|
|
|
|
std::cout << GridLogMessage << "* vReal : " << sizeof(vReal) * 8 << "bits ; "
|
2023-01-25 17:47:40 +00:00
|
|
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vReal::Nsimd()))
|
|
|
|
<< std::endl;
|
2023-01-30 17:43:35 +00:00
|
|
|
std::cout << GridLogMessage << "* vRealF : " << sizeof(vRealF) * 8
|
2023-01-25 17:47:40 +00:00
|
|
|
<< "bits ; "
|
|
|
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vRealF::Nsimd()))
|
|
|
|
<< std::endl;
|
2023-01-30 17:43:35 +00:00
|
|
|
std::cout << GridLogMessage << "* vRealD : " << sizeof(vRealD) * 8
|
2023-01-25 17:47:40 +00:00
|
|
|
<< "bits ; "
|
|
|
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vRealD::Nsimd()))
|
|
|
|
<< std::endl;
|
2023-01-30 17:43:35 +00:00
|
|
|
std::cout << GridLogMessage << "* vComplex : " << sizeof(vComplex) * 8
|
2023-01-25 17:47:40 +00:00
|
|
|
<< "bits ; "
|
|
|
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplex::Nsimd()))
|
|
|
|
<< std::endl;
|
2023-01-30 17:43:35 +00:00
|
|
|
std::cout << GridLogMessage << "* vComplexF : " << sizeof(vComplexF) * 8
|
2023-01-25 17:47:40 +00:00
|
|
|
<< "bits ; "
|
|
|
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplexF::Nsimd()))
|
|
|
|
<< std::endl;
|
2023-01-30 17:43:35 +00:00
|
|
|
std::cout << GridLogMessage << "* vComplexD : " << sizeof(vComplexD) * 8
|
2023-01-25 17:47:40 +00:00
|
|
|
<< "bits ; "
|
|
|
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd()))
|
|
|
|
<< std::endl;
|
2023-01-30 17:43:35 +00:00
|
|
|
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
|
|
|
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
|
|
|
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
|
|
|
|
|
|
|
for (unsigned int i = 0; i < mpi.size(); ++i)
|
|
|
|
{
|
|
|
|
tmp["mpi"].push_back(mpi[i]);
|
2023-04-12 11:40:39 +01:00
|
|
|
tmp["shm"].push_back(shm[i]);
|
2023-01-30 17:43:35 +00:00
|
|
|
}
|
|
|
|
tmp["ranks"] = NP;
|
|
|
|
tmp["nodes"] = NN;
|
|
|
|
json_results["geometry"] = tmp;
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void Comms(void)
|
|
|
|
{
|
|
|
|
int Nloop = 200;
|
|
|
|
int nmu = 0;
|
2023-01-26 18:57:33 +00:00
|
|
|
int maxlat = 48;
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
|
|
|
|
Coordinate mpi_layout = GridDefaultMpi();
|
2023-04-12 11:40:39 +01:00
|
|
|
Coordinate shm_layout;
|
|
|
|
GlobalSharedMemory::GetShmDims(mpi_layout, shm_layout);
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
for (int mu = 0; mu < Nd; mu++)
|
|
|
|
if (mpi_layout[mu] > 1)
|
|
|
|
nmu++;
|
|
|
|
|
|
|
|
std::vector<double> t_time(Nloop);
|
|
|
|
time_statistics timestat;
|
|
|
|
|
2023-01-28 15:26:03 +00:00
|
|
|
std::cout << GridLogMessage << "Benchmarking threaded STENCIL halo exchange in "
|
2023-01-25 17:47:40 +00:00
|
|
|
<< nmu << " dimensions" << std::endl;
|
2023-01-28 15:26:03 +00:00
|
|
|
grid_small_sep();
|
2023-04-12 11:40:39 +01:00
|
|
|
grid_printf("%5s %5s %7s %15s %15s %15s %15s %15s\n", "L", "dir", "shm",
|
|
|
|
"payload (B)", "time (usec)", "rate (GB/s/node)", "std dev", "max");
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
for (int lat = 16; lat <= maxlat; lat += 8)
|
|
|
|
{
|
2023-01-25 17:45:30 +00:00
|
|
|
int Ls = 12;
|
|
|
|
|
2023-01-25 17:47:40 +00:00
|
|
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
2023-01-25 17:45:30 +00:00
|
|
|
lat * mpi_layout[3]});
|
|
|
|
|
|
|
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
|
|
|
RealD Nrank = Grid._Nprocessors;
|
|
|
|
RealD Nnode = Grid.NodeCount();
|
|
|
|
RealD ppn = Nrank / Nnode;
|
|
|
|
|
|
|
|
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
|
|
|
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
|
|
|
uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD);
|
|
|
|
for (int d = 0; d < 8; d++)
|
2023-01-13 19:00:20 +00:00
|
|
|
{
|
2023-01-25 17:45:30 +00:00
|
|
|
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
|
|
|
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
|
|
|
}
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-01-25 17:45:30 +00:00
|
|
|
double dbytes;
|
2023-02-03 20:59:20 +00:00
|
|
|
#define NWARMUP 50
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-01-25 17:45:30 +00:00
|
|
|
for (int dir = 0; dir < 8; dir++)
|
|
|
|
{
|
|
|
|
int mu = dir % 4;
|
2023-04-12 11:40:39 +01:00
|
|
|
if (mpi_layout[mu] == 1) // skip directions that are not distributed
|
|
|
|
continue;
|
|
|
|
bool is_shm = mpi_layout[mu] == shm_layout[mu];
|
|
|
|
bool is_partial_shm = !is_shm && shm_layout[mu] != 1;
|
|
|
|
|
|
|
|
std::vector<double> times(Nloop);
|
|
|
|
for (int i = 0; i < NWARMUP; i++)
|
2023-01-13 19:00:20 +00:00
|
|
|
{
|
2023-04-12 11:40:39 +01:00
|
|
|
int xmit_to_rank;
|
|
|
|
int recv_from_rank;
|
2023-01-25 17:45:30 +00:00
|
|
|
|
2023-04-12 11:40:39 +01:00
|
|
|
if (dir == mu)
|
2023-02-03 20:59:20 +00:00
|
|
|
{
|
2023-04-12 11:40:39 +01:00
|
|
|
int comm_proc = 1;
|
|
|
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
2023-02-03 20:59:20 +00:00
|
|
|
}
|
2023-04-12 11:40:39 +01:00
|
|
|
else
|
2023-01-13 19:00:20 +00:00
|
|
|
{
|
2023-04-12 11:40:39 +01:00
|
|
|
int comm_proc = mpi_layout[mu] - 1;
|
|
|
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
|
|
|
}
|
|
|
|
Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, (void *)&rbuf[dir][0],
|
|
|
|
recv_from_rank, bytes);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < Nloop; i++)
|
|
|
|
{
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-04-12 11:40:39 +01:00
|
|
|
dbytes = 0;
|
|
|
|
double start = usecond();
|
|
|
|
int xmit_to_rank;
|
|
|
|
int recv_from_rank;
|
|
|
|
|
|
|
|
if (dir == mu)
|
|
|
|
{
|
|
|
|
int comm_proc = 1;
|
|
|
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
2023-04-12 11:40:39 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
int comm_proc = mpi_layout[mu] - 1;
|
|
|
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
|
|
|
}
|
|
|
|
Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, (void *)&rbuf[dir][0],
|
|
|
|
recv_from_rank, bytes);
|
|
|
|
dbytes += bytes;
|
|
|
|
|
|
|
|
double stop = usecond();
|
|
|
|
t_time[i] = stop - start; // microseconds
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
2023-04-12 11:40:39 +01:00
|
|
|
timestat.statistics(t_time);
|
|
|
|
|
|
|
|
dbytes = dbytes * ppn;
|
|
|
|
double bidibytes = 2. * dbytes;
|
|
|
|
double rate = bidibytes / (timestat.mean / 1.e6) / 1024. / 1024. / 1024.;
|
|
|
|
double rate_err = rate * timestat.err / timestat.mean;
|
|
|
|
double rate_max = rate * timestat.mean / timestat.min;
|
|
|
|
grid_printf("%5d %5d %7s %15d %15.2f %15.2f %15.1f %15.2f\n", lat, dir,
|
|
|
|
is_shm ? "yes"
|
|
|
|
: is_partial_shm ? "partial"
|
|
|
|
: "no",
|
|
|
|
bytes, timestat.mean, rate, rate_err, rate_max);
|
|
|
|
nlohmann::json tmp;
|
|
|
|
nlohmann::json tmp_rate;
|
|
|
|
tmp["L"] = lat;
|
|
|
|
tmp["dir"] = dir;
|
|
|
|
tmp["shared_mem"] = is_shm;
|
|
|
|
tmp["partial_shared_mem"] = is_partial_shm;
|
|
|
|
tmp["bytes"] = bytes;
|
|
|
|
tmp["time_usec"] = timestat.mean;
|
|
|
|
tmp_rate["mean"] = rate;
|
|
|
|
tmp_rate["error"] = rate_err;
|
|
|
|
tmp_rate["max"] = rate_max;
|
|
|
|
tmp["rate_GBps"] = tmp_rate;
|
|
|
|
json_results["comms"].push_back(tmp);
|
2023-01-25 17:45:30 +00:00
|
|
|
}
|
|
|
|
for (int d = 0; d < 8; d++)
|
|
|
|
{
|
|
|
|
acceleratorFreeDevice(xbuf[d]);
|
|
|
|
acceleratorFreeDevice(rbuf[d]);
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void Memory(void)
|
|
|
|
{
|
|
|
|
const int Nvec = 8;
|
|
|
|
typedef Lattice<iVector<vReal, Nvec>> LatticeVec;
|
|
|
|
typedef iVector<vReal, Nvec> Vec;
|
|
|
|
|
|
|
|
Coordinate simd_layout = GridDefaultSimd(Nd, vReal::Nsimd());
|
|
|
|
Coordinate mpi_layout = GridDefaultMpi();
|
|
|
|
|
2023-01-28 14:18:48 +00:00
|
|
|
std::cout << GridLogMessage << "Benchmarking a*x + y bandwidth" << std::endl;
|
|
|
|
grid_small_sep();
|
|
|
|
grid_printf("%5s %15s %15s %15s %15s\n", "L", "size (MB/node)", "time (usec)",
|
|
|
|
"GB/s/node", "Gflop/s/node");
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-01-28 14:18:48 +00:00
|
|
|
uint64_t NN;
|
|
|
|
uint64_t lmax = 64;
|
|
|
|
#define NLOOP (200 * lmax * lmax * lmax / lat / lat / lat)
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
GridSerialRNG sRNG;
|
|
|
|
sRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
|
|
|
|
for (int lat = 8; lat <= lmax; lat += 8)
|
|
|
|
{
|
|
|
|
|
2023-01-25 17:47:40 +00:00
|
|
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
|
|
|
lat * mpi_layout[3]});
|
2023-02-03 20:58:41 +00:00
|
|
|
double vol =
|
|
|
|
static_cast<double>(latt_size[0]) * latt_size[1] * latt_size[2] * latt_size[3];
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
|
|
|
|
|
|
|
NN = Grid.NodeCount();
|
|
|
|
|
|
|
|
Vec rn;
|
|
|
|
random(sRNG, rn);
|
|
|
|
|
|
|
|
LatticeVec z(&Grid);
|
|
|
|
z = Zero();
|
|
|
|
LatticeVec x(&Grid);
|
|
|
|
x = Zero();
|
|
|
|
LatticeVec y(&Grid);
|
|
|
|
y = Zero();
|
|
|
|
double a = 2.0;
|
|
|
|
|
|
|
|
uint64_t Nloop = NLOOP;
|
|
|
|
|
2023-01-28 14:18:48 +00:00
|
|
|
for (int i = 0; i < NWARMUP; i++)
|
|
|
|
{
|
|
|
|
z = a * x - y;
|
|
|
|
}
|
2023-01-13 19:00:20 +00:00
|
|
|
double start = usecond();
|
|
|
|
for (int i = 0; i < Nloop; i++)
|
|
|
|
{
|
|
|
|
z = a * x - y;
|
|
|
|
}
|
|
|
|
double stop = usecond();
|
2023-01-28 14:18:48 +00:00
|
|
|
double time = (stop - start) / Nloop / 1.e6;
|
|
|
|
|
|
|
|
double flops = vol * Nvec * 2 / 1.e9; // mul,add
|
|
|
|
double bytes = 3.0 * vol * Nvec * sizeof(Real) / 1024. / 1024.;
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-01-28 14:18:48 +00:00
|
|
|
grid_printf("%5d %15.2f %15.2f %15.2f %15.2f\n", lat, bytes / NN, time * 1.e6,
|
|
|
|
bytes / time / NN / 1024., flops / time / NN);
|
2023-01-27 00:18:14 +00:00
|
|
|
|
|
|
|
nlohmann::json tmp;
|
|
|
|
tmp["L"] = lat;
|
2023-01-28 14:44:32 +00:00
|
|
|
tmp["size_MB"] = bytes / NN;
|
|
|
|
tmp["GBps"] = bytes / time / NN / 1024.;
|
|
|
|
tmp["GFlops"] = flops / time / NN;
|
2023-01-27 00:18:14 +00:00
|
|
|
json_results["axpy"].push_back(tmp);
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
static void SU4(void)
|
|
|
|
{
|
|
|
|
const int Nc4 = 4;
|
|
|
|
typedef Lattice<iMatrix<vComplexF, Nc4>> LatticeSU4;
|
|
|
|
|
|
|
|
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexF::Nsimd());
|
|
|
|
Coordinate mpi_layout = GridDefaultMpi();
|
|
|
|
|
2023-01-28 14:18:48 +00:00
|
|
|
std::cout << GridLogMessage << "Benchmarking z = y*x SU(4) bandwidth" << std::endl;
|
|
|
|
grid_small_sep();
|
|
|
|
grid_printf("%5s %15s %15s %15s %15s\n", "L", "size (MB/node)", "time (usec)",
|
|
|
|
"GB/s/node", "Gflop/s/node");
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
uint64_t NN;
|
|
|
|
|
2023-01-28 14:18:48 +00:00
|
|
|
uint64_t lmax = 48;
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
GridSerialRNG sRNG;
|
|
|
|
sRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
|
|
|
|
for (int lat = 8; lat <= lmax; lat += 8)
|
|
|
|
{
|
|
|
|
|
2023-01-25 17:47:40 +00:00
|
|
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
|
|
|
lat * mpi_layout[3]});
|
2023-02-03 20:58:41 +00:00
|
|
|
double vol =
|
|
|
|
static_cast<double>(latt_size[0]) * latt_size[1] * latt_size[2] * latt_size[3];
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
|
|
|
|
|
|
|
NN = Grid.NodeCount();
|
|
|
|
|
|
|
|
LatticeSU4 z(&Grid);
|
|
|
|
z = Zero();
|
|
|
|
LatticeSU4 x(&Grid);
|
|
|
|
x = Zero();
|
|
|
|
LatticeSU4 y(&Grid);
|
|
|
|
y = Zero();
|
|
|
|
|
|
|
|
uint64_t Nloop = NLOOP;
|
|
|
|
|
2023-01-28 14:18:48 +00:00
|
|
|
for (int i = 0; i < NWARMUP; i++)
|
|
|
|
{
|
|
|
|
z = x * y;
|
|
|
|
}
|
2023-01-13 19:00:20 +00:00
|
|
|
double start = usecond();
|
|
|
|
for (int i = 0; i < Nloop; i++)
|
|
|
|
{
|
|
|
|
z = x * y;
|
|
|
|
}
|
|
|
|
double stop = usecond();
|
2023-01-28 14:18:48 +00:00
|
|
|
double time = (stop - start) / Nloop / 1.e6;
|
|
|
|
|
|
|
|
double flops = vol * Nc4 * Nc4 * (6 + (Nc4 - 1) * 8) / 1.e9; // mul,add
|
|
|
|
double bytes = 3.0 * vol * Nc4 * Nc4 * 2 * sizeof(RealF) / 1024. / 1024.;
|
|
|
|
grid_printf("%5d %15.2f %15.2f %15.2f %15.2f\n", lat, bytes / NN, time * 1.e6,
|
|
|
|
bytes / time / NN / 1024., flops / time / NN);
|
2023-01-27 00:18:14 +00:00
|
|
|
|
|
|
|
nlohmann::json tmp;
|
|
|
|
tmp["L"] = lat;
|
2023-01-28 14:44:32 +00:00
|
|
|
tmp["size_MB"] = bytes / NN;
|
|
|
|
tmp["GBps"] = bytes / time / NN / 1024.;
|
|
|
|
tmp["GFlops"] = flops / time / NN;
|
2023-01-27 00:18:14 +00:00
|
|
|
json_results["SU4"].push_back(tmp);
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
static double DWF(int Ls, int L)
|
|
|
|
{
|
|
|
|
RealD mass = 0.1;
|
|
|
|
RealD M5 = 1.8;
|
|
|
|
|
2023-01-28 15:26:03 +00:00
|
|
|
double gflops;
|
|
|
|
double gflops_best = 0;
|
|
|
|
double gflops_worst = 0;
|
|
|
|
std::vector<double> gflops_all;
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
///////////////////////////////////////////////////////
|
|
|
|
// Set/Get the layout & grid size
|
|
|
|
///////////////////////////////////////////////////////
|
|
|
|
int threads = GridThread::GetThreads();
|
|
|
|
Coordinate mpi = GridDefaultMpi();
|
|
|
|
assert(mpi.size() == 4);
|
|
|
|
Coordinate local({L, L, L, L});
|
2023-01-25 17:47:40 +00:00
|
|
|
Coordinate latt4(
|
|
|
|
{local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-01-25 17:47:40 +00:00
|
|
|
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(
|
|
|
|
latt4, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
|
2023-01-13 19:00:20 +00:00
|
|
|
uint64_t NP = TmpGrid->RankCount();
|
|
|
|
uint64_t NN = TmpGrid->NodeCount();
|
|
|
|
NN_global = NN;
|
|
|
|
uint64_t SHM = NP / NN;
|
|
|
|
|
|
|
|
///////// Welcome message ////////////
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << "Benchmark DWF on " << L << "^4 local volume "
|
|
|
|
<< std::endl;
|
2023-01-13 19:00:20 +00:00
|
|
|
std::cout << GridLogMessage << "* Nc : " << Nc << std::endl;
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage
|
|
|
|
<< "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
2023-01-13 19:00:20 +00:00
|
|
|
std::cout << GridLogMessage << "* Ls : " << Ls << std::endl;
|
|
|
|
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
|
|
|
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
|
|
|
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi)
|
|
|
|
<< std::endl;
|
2023-01-13 19:00:20 +00:00
|
|
|
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
///////// Lattice Init ////////////
|
2023-01-25 17:47:40 +00:00
|
|
|
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(
|
|
|
|
latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
2023-01-13 19:00:20 +00:00
|
|
|
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
|
|
|
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
|
|
|
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
|
|
|
|
|
|
|
///////// RNG Init ////////////
|
|
|
|
std::vector<int> seeds4({1, 2, 3, 4});
|
|
|
|
std::vector<int> seeds5({5, 6, 7, 8});
|
|
|
|
GridParallelRNG RNG4(UGrid);
|
|
|
|
RNG4.SeedFixedIntegers(seeds4);
|
|
|
|
GridParallelRNG RNG5(FGrid);
|
|
|
|
RNG5.SeedFixedIntegers(seeds5);
|
|
|
|
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
|
|
|
|
|
|
|
typedef DomainWallFermionF Action;
|
|
|
|
typedef typename Action::FermionField Fermion;
|
|
|
|
typedef LatticeGaugeFieldF Gauge;
|
|
|
|
|
|
|
|
///////// Source preparation ////////////
|
|
|
|
Gauge Umu(UGrid);
|
|
|
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
|
|
|
Fermion src(FGrid);
|
|
|
|
random(RNG5, src);
|
|
|
|
Fermion src_e(FrbGrid);
|
|
|
|
Fermion src_o(FrbGrid);
|
|
|
|
Fermion r_e(FrbGrid);
|
|
|
|
Fermion r_o(FrbGrid);
|
|
|
|
Fermion r_eo(FGrid);
|
|
|
|
Action Dw(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
pickCheckerboard(Even, src_e, src);
|
|
|
|
pickCheckerboard(Odd, src_o, src);
|
|
|
|
|
|
|
|
const int num_cases = 4;
|
|
|
|
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
|
|
|
|
|
|
|
controls Cases[] = {
|
2023-01-25 17:47:40 +00:00
|
|
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute,
|
|
|
|
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
|
|
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute,
|
|
|
|
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
|
|
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute,
|
|
|
|
CartesianCommunicator::CommunicatorPolicySequential},
|
|
|
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute,
|
|
|
|
CartesianCommunicator::CommunicatorPolicySequential}};
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
for (int c = 0; c < num_cases; c++)
|
|
|
|
{
|
|
|
|
|
|
|
|
WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
|
|
|
|
WilsonKernelsStatic::Opt = Cases[c].Opt;
|
|
|
|
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
|
|
|
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_small_sep();
|
2023-01-13 19:00:20 +00:00
|
|
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric)
|
|
|
|
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl;
|
|
|
|
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute)
|
|
|
|
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
|
|
|
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute)
|
|
|
|
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
|
|
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_small_sep();
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
int nwarm = 10;
|
|
|
|
double t0 = usecond();
|
|
|
|
FGrid->Barrier();
|
|
|
|
for (int i = 0; i < nwarm; i++)
|
|
|
|
{
|
|
|
|
Dw.DhopEO(src_o, r_e, DaggerNo);
|
|
|
|
}
|
|
|
|
FGrid->Barrier();
|
|
|
|
double t1 = usecond();
|
|
|
|
uint64_t ncall = 500;
|
|
|
|
|
|
|
|
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
|
|
|
|
|
|
|
Dw.ZeroCounters();
|
|
|
|
|
|
|
|
time_statistics timestat;
|
|
|
|
std::vector<double> t_time(ncall);
|
|
|
|
for (uint64_t i = 0; i < ncall; i++)
|
|
|
|
{
|
|
|
|
t0 = usecond();
|
|
|
|
Dw.DhopEO(src_o, r_e, DaggerNo);
|
|
|
|
t1 = usecond();
|
|
|
|
t_time[i] = t1 - t0;
|
|
|
|
}
|
|
|
|
FGrid->Barrier();
|
|
|
|
|
|
|
|
double volume = Ls;
|
|
|
|
for (int mu = 0; mu < Nd; mu++)
|
|
|
|
volume = volume * latt4[mu];
|
|
|
|
|
|
|
|
// Nc=3 gives
|
|
|
|
// 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8
|
|
|
|
// 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2
|
|
|
|
// double flops=(1344.0*volume)/2;
|
|
|
|
#if 0
|
|
|
|
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
|
|
|
|
#else
|
2023-01-25 17:47:40 +00:00
|
|
|
double fps =
|
|
|
|
Nc * (6 + (Nc - 1) * 8) * Ns * Nd + 2 * Nd * Nc * Ns + 2 * Nd * Nc * Ns * 2;
|
2023-01-13 19:00:20 +00:00
|
|
|
#endif
|
2023-01-28 15:26:03 +00:00
|
|
|
double flops = (fps * volume) / 2.;
|
|
|
|
double gf_hi, gf_lo, gf_err;
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
timestat.statistics(t_time);
|
2023-01-28 15:26:03 +00:00
|
|
|
gf_hi = flops / timestat.min / 1000.;
|
|
|
|
gf_lo = flops / timestat.max / 1000.;
|
|
|
|
gf_err = flops / timestat.min * timestat.err / timestat.mean / 1000.;
|
|
|
|
|
|
|
|
gflops = flops / timestat.mean / 1000.;
|
|
|
|
gflops_all.push_back(gflops);
|
|
|
|
if (gflops_best == 0)
|
|
|
|
gflops_best = gflops;
|
|
|
|
if (gflops_worst == 0)
|
|
|
|
gflops_worst = gflops;
|
|
|
|
if (gflops > gflops_best)
|
|
|
|
gflops_best = gflops;
|
|
|
|
if (gflops < gflops_worst)
|
|
|
|
gflops_worst = gflops;
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
std::cout << GridLogMessage << "Deo FlopsPerSite is " << fps << std::endl;
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
2023-01-28 15:26:03 +00:00
|
|
|
<< "Deo Gflop/s = " << gflops << " (" << gf_err << ") " << gf_lo
|
|
|
|
<< "-" << gf_hi << std::endl;
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
2023-01-28 15:26:03 +00:00
|
|
|
<< "Deo Gflop/s per rank " << gflops / NP << std::endl;
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
2023-01-28 15:26:03 +00:00
|
|
|
<< "Deo Gflop/s per node " << gflops / NN << std::endl;
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
|
|
|
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_small_sep();
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << L << "^4 x " << Ls
|
2023-01-28 15:26:03 +00:00
|
|
|
<< " Deo Best Gflop/s = " << gflops_best << " ; "
|
|
|
|
<< gflops_best / NN << " per node " << std::endl;
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << L << "^4 x " << Ls
|
2023-01-28 15:26:03 +00:00
|
|
|
<< " Deo Worst Gflop/s = " << gflops_worst << " ; "
|
|
|
|
<< gflops_worst / NN << " per node " << std::endl;
|
2023-01-13 19:00:20 +00:00
|
|
|
std::cout << GridLogMessage << fmt << std::endl;
|
|
|
|
std::cout << GridLogMessage;
|
|
|
|
|
2023-01-28 15:26:03 +00:00
|
|
|
for (int i = 0; i < gflops_all.size(); i++)
|
2023-01-13 19:00:20 +00:00
|
|
|
{
|
2023-01-28 15:26:03 +00:00
|
|
|
std::cout << gflops_all[i] / NN << " ; ";
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
|
|
|
std::cout << std::endl;
|
|
|
|
}
|
2023-01-28 15:26:03 +00:00
|
|
|
return gflops_best;
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static double Staggered(int L)
|
|
|
|
{
|
2023-01-28 15:26:03 +00:00
|
|
|
double gflops;
|
|
|
|
double gflops_best = 0;
|
|
|
|
double gflops_worst = 0;
|
|
|
|
std::vector<double> gflops_all;
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
///////////////////////////////////////////////////////
|
|
|
|
// Set/Get the layout & grid size
|
|
|
|
///////////////////////////////////////////////////////
|
|
|
|
int threads = GridThread::GetThreads();
|
|
|
|
Coordinate mpi = GridDefaultMpi();
|
|
|
|
assert(mpi.size() == 4);
|
|
|
|
Coordinate local({L, L, L, L});
|
2023-01-25 17:47:40 +00:00
|
|
|
Coordinate latt4(
|
|
|
|
{local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-01-25 17:47:40 +00:00
|
|
|
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(
|
|
|
|
latt4, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
|
2023-01-13 19:00:20 +00:00
|
|
|
uint64_t NP = TmpGrid->RankCount();
|
|
|
|
uint64_t NN = TmpGrid->NodeCount();
|
|
|
|
NN_global = NN;
|
|
|
|
uint64_t SHM = NP / NN;
|
|
|
|
|
|
|
|
///////// Welcome message ////////////
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << "Benchmark ImprovedStaggered on " << L
|
|
|
|
<< "^4 local volume " << std::endl;
|
|
|
|
std::cout << GridLogMessage
|
|
|
|
<< "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
2023-01-13 19:00:20 +00:00
|
|
|
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
|
|
|
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
|
|
|
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi)
|
|
|
|
<< std::endl;
|
2023-01-13 19:00:20 +00:00
|
|
|
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
///////// Lattice Init ////////////
|
2023-01-25 17:47:40 +00:00
|
|
|
GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(
|
|
|
|
latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
2023-01-13 19:00:20 +00:00
|
|
|
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
|
|
|
|
|
|
|
|
///////// RNG Init ////////////
|
|
|
|
std::vector<int> seeds4({1, 2, 3, 4});
|
|
|
|
GridParallelRNG RNG4(FGrid);
|
|
|
|
RNG4.SeedFixedIntegers(seeds4);
|
|
|
|
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
|
|
|
|
|
|
|
RealD mass = 0.1;
|
|
|
|
RealD c1 = 9.0 / 8.0;
|
|
|
|
RealD c2 = -1.0 / 24.0;
|
|
|
|
RealD u0 = 1.0;
|
|
|
|
|
|
|
|
typedef ImprovedStaggeredFermionF Action;
|
|
|
|
typedef typename Action::FermionField Fermion;
|
|
|
|
typedef LatticeGaugeFieldF Gauge;
|
|
|
|
|
|
|
|
Gauge Umu(FGrid);
|
|
|
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
|
|
|
|
|
|
|
typename Action::ImplParams params;
|
|
|
|
Action Ds(Umu, Umu, *FGrid, *FrbGrid, mass, c1, c2, u0, params);
|
|
|
|
|
|
|
|
///////// Source preparation ////////////
|
|
|
|
Fermion src(FGrid);
|
|
|
|
random(RNG4, src);
|
|
|
|
Fermion src_e(FrbGrid);
|
|
|
|
Fermion src_o(FrbGrid);
|
|
|
|
Fermion r_e(FrbGrid);
|
|
|
|
Fermion r_o(FrbGrid);
|
|
|
|
Fermion r_eo(FGrid);
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
pickCheckerboard(Even, src_e, src);
|
|
|
|
pickCheckerboard(Odd, src_o, src);
|
|
|
|
|
|
|
|
const int num_cases = 4;
|
|
|
|
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
|
|
|
|
|
|
|
controls Cases[] = {
|
2023-01-25 17:47:40 +00:00
|
|
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute,
|
|
|
|
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
|
|
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute,
|
|
|
|
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
|
|
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute,
|
|
|
|
CartesianCommunicator::CommunicatorPolicySequential},
|
|
|
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute,
|
|
|
|
CartesianCommunicator::CommunicatorPolicySequential}};
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
for (int c = 0; c < num_cases; c++)
|
|
|
|
{
|
|
|
|
|
|
|
|
StaggeredKernelsStatic::Comms = Cases[c].CommsOverlap;
|
|
|
|
StaggeredKernelsStatic::Opt = Cases[c].Opt;
|
|
|
|
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
|
|
|
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_small_sep();
|
2023-01-13 19:00:20 +00:00
|
|
|
if (StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric)
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << "* Using GENERIC Nc StaggeredKernels"
|
|
|
|
<< std::endl;
|
2023-01-13 19:00:20 +00:00
|
|
|
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute)
|
|
|
|
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
|
|
|
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsThenCompute)
|
|
|
|
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
|
|
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_small_sep();
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
int nwarm = 10;
|
|
|
|
double t0 = usecond();
|
|
|
|
FGrid->Barrier();
|
|
|
|
for (int i = 0; i < nwarm; i++)
|
|
|
|
{
|
|
|
|
Ds.DhopEO(src_o, r_e, DaggerNo);
|
|
|
|
}
|
|
|
|
FGrid->Barrier();
|
|
|
|
double t1 = usecond();
|
|
|
|
uint64_t ncall = 500;
|
|
|
|
|
|
|
|
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
|
|
|
Ds.ZeroCounters();
|
|
|
|
|
|
|
|
time_statistics timestat;
|
|
|
|
std::vector<double> t_time(ncall);
|
|
|
|
for (uint64_t i = 0; i < ncall; i++)
|
|
|
|
{
|
|
|
|
t0 = usecond();
|
|
|
|
Ds.DhopEO(src_o, r_e, DaggerNo);
|
|
|
|
t1 = usecond();
|
|
|
|
t_time[i] = t1 - t0;
|
|
|
|
}
|
|
|
|
FGrid->Barrier();
|
|
|
|
|
|
|
|
double volume = 1;
|
|
|
|
for (int mu = 0; mu < Nd; mu++)
|
|
|
|
volume = volume * latt4[mu];
|
2023-01-28 15:26:03 +00:00
|
|
|
double flops = (1146.0 * volume) / 2.;
|
|
|
|
double gf_hi, gf_lo, gf_err;
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
timestat.statistics(t_time);
|
2023-01-28 15:26:03 +00:00
|
|
|
gf_hi = flops / timestat.min / 1000.;
|
|
|
|
gf_lo = flops / timestat.max / 1000.;
|
|
|
|
gf_err = flops / timestat.min * timestat.err / timestat.mean / 1000.;
|
|
|
|
|
|
|
|
gflops = flops / timestat.mean / 1000.;
|
|
|
|
gflops_all.push_back(gflops);
|
|
|
|
if (gflops_best == 0)
|
|
|
|
gflops_best = gflops;
|
|
|
|
if (gflops_worst == 0)
|
|
|
|
gflops_worst = gflops;
|
|
|
|
if (gflops > gflops_best)
|
|
|
|
gflops_best = gflops;
|
|
|
|
if (gflops < gflops_worst)
|
|
|
|
gflops_worst = gflops;
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
2023-01-28 15:26:03 +00:00
|
|
|
<< "Deo Gflop/s = " << gflops << " (" << gf_err << ") " << gf_lo
|
|
|
|
<< "-" << gf_hi << std::endl;
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
2023-01-28 15:26:03 +00:00
|
|
|
<< "Deo Gflop/s per rank " << gflops / NP << std::endl;
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
2023-01-28 15:26:03 +00:00
|
|
|
<< "Deo Gflop/s per node " << gflops / NN << std::endl;
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
|
|
|
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_small_sep();
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << L
|
2023-01-28 15:26:03 +00:00
|
|
|
<< "^4 Deo Best Gflop/s = " << gflops_best << " ; "
|
|
|
|
<< gflops_best / NN << " per node " << std::endl;
|
2023-01-25 17:47:40 +00:00
|
|
|
std::cout << GridLogMessage << L
|
2023-01-28 15:26:03 +00:00
|
|
|
<< "^4 Deo Worst Gflop/s = " << gflops_worst << " ; "
|
|
|
|
<< gflops_worst / NN << " per node " << std::endl;
|
2023-01-13 19:00:20 +00:00
|
|
|
std::cout << GridLogMessage << fmt << std::endl;
|
|
|
|
std::cout << GridLogMessage;
|
|
|
|
|
2023-01-28 15:26:03 +00:00
|
|
|
for (int i = 0; i < gflops_all.size(); i++)
|
2023-01-13 19:00:20 +00:00
|
|
|
{
|
2023-01-28 15:26:03 +00:00
|
|
|
std::cout << gflops_all[i] / NN << " ; ";
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
|
|
|
std::cout << std::endl;
|
|
|
|
}
|
2023-01-28 15:26:03 +00:00
|
|
|
return gflops_best;
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
int main(int argc, char **argv)
|
|
|
|
{
|
|
|
|
Grid_init(&argc, &argv);
|
|
|
|
|
2023-01-27 00:18:14 +00:00
|
|
|
std::string json_filename = ""; // empty indicates no json output
|
|
|
|
for (int i = 0; i < argc; i++)
|
|
|
|
{
|
|
|
|
if (std::string(argv[i]) == "--json-out")
|
|
|
|
json_filename = argv[i + 1];
|
|
|
|
}
|
|
|
|
|
2023-01-25 17:47:40 +00:00
|
|
|
CartesianCommunicator::SetCommunicatorPolicy(
|
|
|
|
CartesianCommunicator::CommunicatorPolicySequential);
|
2023-01-13 19:00:20 +00:00
|
|
|
#ifdef KNL
|
|
|
|
LebesgueOrder::Block = std::vector<int>({8, 2, 2, 2});
|
|
|
|
#else
|
|
|
|
LebesgueOrder::Block = std::vector<int>({2, 2, 2, 2});
|
|
|
|
#endif
|
|
|
|
Benchmark::Decomposition();
|
|
|
|
|
2023-01-27 00:18:14 +00:00
|
|
|
int do_su4 = 1;
|
|
|
|
int do_memory = 1;
|
2023-01-13 19:00:20 +00:00
|
|
|
int do_comms = 1;
|
2023-01-27 00:18:14 +00:00
|
|
|
int do_flops = 1;
|
2023-01-26 18:57:33 +00:00
|
|
|
int Ls = 1;
|
2023-01-13 19:00:20 +00:00
|
|
|
|
|
|
|
int sel = 4;
|
|
|
|
std::vector<int> L_list({8, 12, 16, 24, 32});
|
|
|
|
int selm1 = sel - 1;
|
|
|
|
|
|
|
|
std::vector<double> wilson;
|
|
|
|
std::vector<double> dwf4;
|
|
|
|
std::vector<double> staggered;
|
|
|
|
|
2023-01-28 15:26:03 +00:00
|
|
|
if (do_memory)
|
|
|
|
{
|
|
|
|
grid_big_sep();
|
|
|
|
std::cout << GridLogMessage << " Memory benchmark " << std::endl;
|
|
|
|
grid_big_sep();
|
|
|
|
Benchmark::Memory();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (do_su4)
|
|
|
|
{
|
|
|
|
grid_big_sep();
|
|
|
|
std::cout << GridLogMessage << " SU(4) benchmark " << std::endl;
|
|
|
|
grid_big_sep();
|
|
|
|
Benchmark::SU4();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (do_comms)
|
|
|
|
{
|
|
|
|
grid_big_sep();
|
|
|
|
std::cout << GridLogMessage << " Communications benchmark " << std::endl;
|
|
|
|
grid_big_sep();
|
|
|
|
Benchmark::Comms();
|
|
|
|
}
|
|
|
|
|
2023-01-26 18:57:33 +00:00
|
|
|
if (do_flops)
|
2023-01-13 19:00:20 +00:00
|
|
|
{
|
2023-01-26 18:57:33 +00:00
|
|
|
Ls = 1;
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-26 18:57:33 +00:00
|
|
|
std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
|
|
|
|
for (int l = 0; l < L_list.size(); l++)
|
|
|
|
{
|
|
|
|
wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
|
|
|
|
}
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-01-26 18:57:33 +00:00
|
|
|
Ls = 12;
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-26 18:57:33 +00:00
|
|
|
std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
|
|
|
|
for (int l = 0; l < L_list.size(); l++)
|
|
|
|
{
|
|
|
|
double result = Benchmark::DWF(Ls, L_list[l]);
|
|
|
|
dwf4.push_back(result);
|
|
|
|
}
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-26 18:57:33 +00:00
|
|
|
std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised"
|
|
|
|
<< std::endl;
|
|
|
|
for (int l = 0; l < L_list.size(); l++)
|
|
|
|
{
|
|
|
|
double result = Benchmark::Staggered(L_list[l]);
|
|
|
|
staggered.push_back(result);
|
|
|
|
}
|
2023-01-13 19:00:20 +00:00
|
|
|
|
2023-01-28 15:26:03 +00:00
|
|
|
int NN = NN_global;
|
|
|
|
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-28 15:26:03 +00:00
|
|
|
std::cout << GridLogMessage << "Gflop/s/node Summary table Ls=" << Ls << std::endl;
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-28 15:26:03 +00:00
|
|
|
grid_printf("%5s %12s %12s %12s\n", "L", "Wilson", "DWF", "Staggered");
|
|
|
|
nlohmann::json tmp_flops;
|
2023-01-26 18:57:33 +00:00
|
|
|
for (int l = 0; l < L_list.size(); l++)
|
|
|
|
{
|
2023-01-28 15:26:03 +00:00
|
|
|
grid_printf("%5d %12.2f %12.2f %12.2f\n", L_list[l], wilson[l] / NN, dwf4[l] / NN,
|
|
|
|
staggered[l] / NN);
|
|
|
|
|
2023-01-27 00:18:14 +00:00
|
|
|
nlohmann::json tmp;
|
|
|
|
tmp["L"] = L_list[l];
|
2023-01-28 15:26:03 +00:00
|
|
|
tmp["Gflops_wilson"] = wilson[l] / NN;
|
|
|
|
tmp["Gflops_dwf4"] = dwf4[l] / NN;
|
|
|
|
tmp["Gflops_staggered"] = staggered[l] / NN;
|
|
|
|
tmp_flops["results"].push_back(tmp);
|
2023-01-26 18:57:33 +00:00
|
|
|
}
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-26 18:57:33 +00:00
|
|
|
std::cout << GridLogMessage
|
|
|
|
<< " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN
|
2023-01-28 15:26:03 +00:00
|
|
|
<< " Gflop/s per node" << std::endl;
|
2023-01-26 18:57:33 +00:00
|
|
|
std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+"
|
|
|
|
<< dwf4[selm1] / NN << ") " << std::endl;
|
|
|
|
std::cout << std::setprecision(3);
|
2023-01-27 18:49:53 +00:00
|
|
|
grid_big_sep();
|
2023-01-28 15:26:03 +00:00
|
|
|
tmp_flops["comparison_point_Gflops"] = 0.5 * (dwf4[sel] + dwf4[selm1]) / NN;
|
|
|
|
json_results["flops"] = tmp_flops;
|
2023-01-13 19:00:20 +00:00
|
|
|
}
|
|
|
|
|
2023-01-27 00:18:14 +00:00
|
|
|
if (!json_filename.empty())
|
|
|
|
{
|
|
|
|
std::cout << GridLogMessage << "writing benchmark results to " << json_filename
|
|
|
|
<< std::endl;
|
|
|
|
|
|
|
|
int me = 0;
|
|
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
|
|
|
if (me == 0)
|
|
|
|
{
|
|
|
|
std::ofstream json_file(json_filename);
|
2023-01-28 14:44:32 +00:00
|
|
|
json_file << std::setw(2) << json_results;
|
2023-01-27 00:18:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-13 19:00:20 +00:00
|
|
|
Grid_finalize();
|
|
|
|
}
|