Memory benchmarks cleanup and fix
This commit is contained in:
parent
78c464d1d8
commit
77c75ea5b8
@ -2,7 +2,7 @@
|
|||||||
Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk>
|
Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
|
||||||
This is a refactoring of Benchmark_ITT.cpp from Grid
|
This is a fork of Benchmark_ITT.cpp from Grid
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or
|
This program is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU General Public License
|
modify it under the terms of the GNU General Public License
|
||||||
@ -85,10 +85,8 @@ class Benchmark
|
|||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
grid_big_sep();
|
grid_big_sep();
|
||||||
std::cout << GridLogMessage << "= Grid is setup to use " << threads << " threads"
|
|
||||||
<< std::endl;
|
|
||||||
grid_big_sep();
|
|
||||||
std::cout << GridLogMessage << "Grid Default Decomposition patterns\n";
|
std::cout << GridLogMessage << "Grid Default Decomposition patterns\n";
|
||||||
|
grid_small_sep();
|
||||||
std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads()
|
std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads()
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage
|
std::cout << GridLogMessage
|
||||||
@ -233,24 +231,15 @@ class Benchmark
|
|||||||
Coordinate simd_layout = GridDefaultSimd(Nd, vReal::Nsimd());
|
Coordinate simd_layout = GridDefaultSimd(Nd, vReal::Nsimd());
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
grid_big_sep();
|
std::cout << GridLogMessage << "Benchmarking a*x + y bandwidth" << std::endl;
|
||||||
std::cout << GridLogMessage << "= Benchmarking a*x + y bandwidth" << std::endl;
|
grid_small_sep();
|
||||||
grid_big_sep();
|
grid_printf("%5s %15s %15s %15s %15s\n", "L", "size (MB/node)", "time (usec)",
|
||||||
std::cout << GridLogMessage << " L "
|
"GB/s/node", "Gflop/s/node");
|
||||||
<< "\t\t"
|
|
||||||
<< "bytes"
|
|
||||||
<< "\t\t\t"
|
|
||||||
<< "GB/s"
|
|
||||||
<< "\t\t"
|
|
||||||
<< "Gflop/s"
|
|
||||||
<< "\t\t seconds"
|
|
||||||
<< "\t\tGB/s / node" << std::endl;
|
|
||||||
|
|
||||||
// uint64_t NP;
|
|
||||||
uint64_t NN;
|
uint64_t NN;
|
||||||
|
uint64_t lmax = 64;
|
||||||
uint64_t lmax = 32;
|
#define NLOOP (200 * lmax * lmax * lmax / lat / lat / lat)
|
||||||
#define NLOOP (1000 * lmax * lmax * lmax * lmax / lat / lat / lat / lat)
|
#define NWARMUP 50
|
||||||
|
|
||||||
GridSerialRNG sRNG;
|
GridSerialRNG sRNG;
|
||||||
sRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
|
sRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
|
||||||
@ -259,11 +248,10 @@ class Benchmark
|
|||||||
|
|
||||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||||
lat * mpi_layout[3]});
|
lat * mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
uint64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
||||||
|
|
||||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
|
|
||||||
// NP= Grid.RankCount();
|
|
||||||
NN = Grid.NodeCount();
|
NN = Grid.NodeCount();
|
||||||
|
|
||||||
Vec rn;
|
Vec rn;
|
||||||
@ -279,20 +267,23 @@ class Benchmark
|
|||||||
|
|
||||||
uint64_t Nloop = NLOOP;
|
uint64_t Nloop = NLOOP;
|
||||||
|
|
||||||
|
for (int i = 0; i < NWARMUP; i++)
|
||||||
|
{
|
||||||
|
z = a * x - y;
|
||||||
|
}
|
||||||
double start = usecond();
|
double start = usecond();
|
||||||
for (int i = 0; i < Nloop; i++)
|
for (int i = 0; i < Nloop; i++)
|
||||||
{
|
{
|
||||||
z = a * x - y;
|
z = a * x - y;
|
||||||
}
|
}
|
||||||
double stop = usecond();
|
double stop = usecond();
|
||||||
double time = (stop - start) / Nloop * 1000;
|
double time = (stop - start) / Nloop / 1.e6;
|
||||||
|
|
||||||
double flops = vol * Nvec * 2; // mul,add
|
double flops = vol * Nvec * 2 / 1.e9; // mul,add
|
||||||
double bytes = 3.0 * vol * Nvec * sizeof(Real);
|
double bytes = 3.0 * vol * Nvec * sizeof(Real) / 1024. / 1024.;
|
||||||
std::cout << GridLogMessage << std::setprecision(3) << lat << "\t\t" << bytes
|
|
||||||
<< " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t"
|
grid_printf("%5d %15.2f %15.2f %15.2f %15.2f\n", lat, bytes / NN, time * 1.e6,
|
||||||
<< (stop - start) / 1000. / 1000. << "\t\t" << bytes / time / NN
|
bytes / time / NN / 1024., flops / time / NN);
|
||||||
<< std::endl;
|
|
||||||
|
|
||||||
nlohmann::json tmp;
|
nlohmann::json tmp;
|
||||||
tmp["L"] = lat;
|
tmp["L"] = lat;
|
||||||
@ -311,22 +302,14 @@ class Benchmark
|
|||||||
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexF::Nsimd());
|
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexF::Nsimd());
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
grid_big_sep();
|
std::cout << GridLogMessage << "Benchmarking z = y*x SU(4) bandwidth" << std::endl;
|
||||||
std::cout << GridLogMessage << "= Benchmarking z = y*x SU(4) bandwidth" << std::endl;
|
grid_small_sep();
|
||||||
grid_big_sep();
|
grid_printf("%5s %15s %15s %15s %15s\n", "L", "size (MB/node)", "time (usec)",
|
||||||
std::cout << GridLogMessage << " L "
|
"GB/s/node", "Gflop/s/node");
|
||||||
<< "\t\t"
|
|
||||||
<< "bytes"
|
|
||||||
<< "\t\t\t"
|
|
||||||
<< "GB/s"
|
|
||||||
<< "\t\t"
|
|
||||||
<< "Gflop/s"
|
|
||||||
<< "\t\t seconds"
|
|
||||||
<< "\t\tGB/s / node" << std::endl;
|
|
||||||
|
|
||||||
uint64_t NN;
|
uint64_t NN;
|
||||||
|
|
||||||
uint64_t lmax = 32;
|
uint64_t lmax = 48;
|
||||||
|
|
||||||
GridSerialRNG sRNG;
|
GridSerialRNG sRNG;
|
||||||
sRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
|
sRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
|
||||||
@ -347,24 +330,25 @@ class Benchmark
|
|||||||
x = Zero();
|
x = Zero();
|
||||||
LatticeSU4 y(&Grid);
|
LatticeSU4 y(&Grid);
|
||||||
y = Zero();
|
y = Zero();
|
||||||
// double a=2.0;
|
|
||||||
|
|
||||||
uint64_t Nloop = NLOOP;
|
uint64_t Nloop = NLOOP;
|
||||||
|
|
||||||
|
for (int i = 0; i < NWARMUP; i++)
|
||||||
|
{
|
||||||
|
z = x * y;
|
||||||
|
}
|
||||||
double start = usecond();
|
double start = usecond();
|
||||||
for (int i = 0; i < Nloop; i++)
|
for (int i = 0; i < Nloop; i++)
|
||||||
{
|
{
|
||||||
z = x * y;
|
z = x * y;
|
||||||
}
|
}
|
||||||
double stop = usecond();
|
double stop = usecond();
|
||||||
double time = (stop - start) / Nloop * 1000;
|
double time = (stop - start) / Nloop / 1.e6;
|
||||||
|
|
||||||
double flops = vol * Nc4 * Nc4 * (6 + (Nc4 - 1) * 8); // mul,add
|
double flops = vol * Nc4 * Nc4 * (6 + (Nc4 - 1) * 8) / 1.e9; // mul,add
|
||||||
double bytes = 3.0 * vol * Nc4 * Nc4 * 2 * sizeof(RealF);
|
double bytes = 3.0 * vol * Nc4 * Nc4 * 2 * sizeof(RealF) / 1024. / 1024.;
|
||||||
std::cout << GridLogMessage << std::setprecision(3) << lat << "\t\t" << bytes
|
grid_printf("%5d %15.2f %15.2f %15.2f %15.2f\n", lat, bytes / NN, time * 1.e6,
|
||||||
<< " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t"
|
bytes / time / NN / 1024., flops / time / NN);
|
||||||
<< (stop - start) / 1000. / 1000. << "\t\t" << bytes / time / NN
|
|
||||||
<< std::endl;
|
|
||||||
|
|
||||||
nlohmann::json tmp;
|
nlohmann::json tmp;
|
||||||
tmp["L"] = lat;
|
tmp["L"] = lat;
|
||||||
|
Loading…
Reference in New Issue
Block a user