lattice-benchmarks/Grid/Benchmark_comms_host_device.cpp

266 lines
8.6 KiB
C++
Raw Normal View History

2023-01-13 19:00:20 +00:00
/*
Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk>
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
2023-01-24 18:44:47 +00:00
struct time_statistics
{
2023-01-13 19:00:20 +00:00
double mean;
double err;
double min;
double max;
2023-01-24 18:44:47 +00:00
void statistics(std::vector<double> v)
{
double sum = std::accumulate(v.begin(), v.end(), 0.0);
mean = sum / v.size();
2023-01-13 19:00:20 +00:00
2023-01-24 18:44:47 +00:00
std::vector<double> diff(v.size());
2023-01-25 17:45:30 +00:00
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
2023-01-24 18:44:47 +00:00
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
err = std::sqrt(sq_sum / (v.size() * (v.size() - 1)));
2023-01-13 19:00:20 +00:00
2023-01-24 18:44:47 +00:00
auto result = std::minmax_element(v.begin(), v.end());
min = *result.first;
max = *result.second;
}
2023-01-13 19:00:20 +00:00
};
2023-01-24 18:44:47 +00:00
void header()
{
std::cout << GridLogMessage << " L "
<< "\t"
<< " Ls "
2023-01-25 17:45:30 +00:00
<< "\t" << std::setw(11) << "bytes\t\t"
2023-01-24 18:44:47 +00:00
<< "MB/s uni"
<< "\t"
<< "MB/s bidi" << std::endl;
2023-01-13 19:00:20 +00:00
};
2023-01-24 18:44:47 +00:00
int main(int argc, char **argv)
2023-01-13 19:00:20 +00:00
{
2023-01-24 18:44:47 +00:00
Grid_init(&argc, &argv);
2023-01-13 19:00:20 +00:00
2023-01-24 18:44:47 +00:00
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
2023-01-13 19:00:20 +00:00
int threads = GridThread::GetThreads();
2023-01-25 17:45:30 +00:00
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads"
<< std::endl;
2023-01-13 19:00:20 +00:00
2023-01-24 18:44:47 +00:00
int Nloop = 250;
int nmu = 0;
int maxlat = 32;
for (int mu = 0; mu < Nd; mu++)
if (mpi_layout[mu] > 1)
nmu++;
2023-01-13 19:00:20 +00:00
2023-01-25 17:45:30 +00:00
std::cout << GridLogMessage << "Number of iterations to average: " << Nloop
<< std::endl;
2023-01-13 19:00:20 +00:00
std::vector<double> t_time(Nloop);
// time_statistics timestat;
2023-01-25 17:45:30 +00:00
std::cout << GridLogMessage
<< "========================================================================="
"==========================="
<< std::endl;
std::cout << GridLogMessage
<< "= Benchmarking sequential halo exchange from host memory " << std::endl;
std::cout << GridLogMessage
<< "========================================================================="
"==========================="
<< std::endl;
2023-01-13 19:00:20 +00:00
header();
2023-01-24 18:44:47 +00:00
for (int lat = 8; lat <= maxlat; lat += 4)
{
for (int Ls = 8; Ls <= 8; Ls *= 2)
{
2023-01-13 19:00:20 +00:00
2023-01-25 17:45:30 +00:00
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
2023-01-24 18:44:47 +00:00
lat * mpi_layout[3]});
2023-01-13 19:00:20 +00:00
2023-01-24 18:44:47 +00:00
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
2023-01-13 19:00:20 +00:00
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
2023-01-24 18:44:47 +00:00
RealD ppn = Nrank / Nnode;
2023-01-13 19:00:20 +00:00
2023-01-24 18:44:47 +00:00
std::vector<std::vector<HalfSpinColourVectorD>> xbuf(8);
std::vector<std::vector<HalfSpinColourVectorD>> rbuf(8);
2023-01-13 19:00:20 +00:00
2023-01-24 18:44:47 +00:00
for (int mu = 0; mu < 8; mu++)
{
xbuf[mu].resize(lat * lat * lat * Ls);
rbuf[mu].resize(lat * lat * lat * Ls);
2023-01-13 19:00:20 +00:00
}
2023-01-24 18:44:47 +00:00
uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD);
2023-01-13 19:00:20 +00:00
int ncomm;
2023-01-24 18:44:47 +00:00
for (int mu = 0; mu < 4; mu++)
{
if (mpi_layout[mu] > 1)
{
double start = usecond();
for (int i = 0; i < Nloop; i++)
{
ncomm = 0;
ncomm++;
int comm_proc = 1;
int xmit_to_rank;
int recv_from_rank;
{
std::vector<CommsRequest_t> requests;
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
2023-01-25 17:45:30 +00:00
Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank,
(void *)&rbuf[mu][0], recv_from_rank, bytes);
2023-01-24 18:44:47 +00:00
}
comm_proc = mpi_layout[mu] - 1;
{
std::vector<CommsRequest_t> requests;
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
2023-01-25 17:45:30 +00:00
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank,
(void *)&rbuf[mu + 4][0], recv_from_rank, bytes);
2023-01-24 18:44:47 +00:00
}
}
Grid.Barrier();
double stop = usecond();
double mean = (stop - start) / Nloop;
double dbytes = bytes * ppn;
double xbytes = dbytes * 2.0 * ncomm;
double rbytes = xbytes;
double bidibytes = xbytes + rbytes;
std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t"
2023-01-25 17:45:30 +00:00
<< std::setw(11) << bytes << std::fixed << std::setprecision(1)
<< std::setw(7) << " " << std::right << xbytes / mean << " "
2023-01-24 18:44:47 +00:00
<< "\t\t" << std::setw(7) << bidibytes / mean << std::endl;
}
2023-01-13 19:00:20 +00:00
}
}
}
2023-01-25 17:45:30 +00:00
std::cout << GridLogMessage
<< "========================================================================="
"==========================="
<< std::endl;
std::cout << GridLogMessage
<< "= Benchmarking sequential halo exchange from GPU memory " << std::endl;
std::cout << GridLogMessage
<< "========================================================================="
"==========================="
<< std::endl;
2023-01-13 19:00:20 +00:00
header();
2023-01-24 18:44:47 +00:00
for (int lat = 8; lat <= maxlat; lat += 4)
{
for (int Ls = 8; Ls <= 8; Ls *= 2)
{
2023-01-13 19:00:20 +00:00
2023-01-25 17:45:30 +00:00
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
2023-01-24 18:44:47 +00:00
lat * mpi_layout[3]});
2023-01-13 19:00:20 +00:00
2023-01-24 18:44:47 +00:00
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
2023-01-13 19:00:20 +00:00
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
2023-01-24 18:44:47 +00:00
RealD ppn = Nrank / Nnode;
2023-01-13 19:00:20 +00:00
std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8);
2023-01-24 18:44:47 +00:00
uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD);
for (int d = 0; d < 8; d++)
{
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
2023-01-13 19:00:20 +00:00
}
int ncomm;
2023-01-24 18:44:47 +00:00
for (int mu = 0; mu < 4; mu++)
{
if (mpi_layout[mu] > 1)
{
double start = usecond();
for (int i = 0; i < Nloop; i++)
{
ncomm = 0;
ncomm++;
int comm_proc = 1;
int xmit_to_rank;
int recv_from_rank;
{
std::vector<CommsRequest_t> requests;
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
2023-01-25 17:45:30 +00:00
Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank,
(void *)&rbuf[mu][0], recv_from_rank, bytes);
2023-01-24 18:44:47 +00:00
}
comm_proc = mpi_layout[mu] - 1;
{
std::vector<CommsRequest_t> requests;
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
2023-01-25 17:45:30 +00:00
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank,
(void *)&rbuf[mu + 4][0], recv_from_rank, bytes);
2023-01-24 18:44:47 +00:00
}
}
Grid.Barrier();
double stop = usecond();
double mean = (stop - start) / Nloop;
double dbytes = bytes * ppn;
double xbytes = dbytes * 2.0 * ncomm;
double rbytes = xbytes;
double bidibytes = xbytes + rbytes;
std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t"
2023-01-25 17:45:30 +00:00
<< std::setw(11) << bytes << std::fixed << std::setprecision(1)
<< std::setw(7) << " " << std::right << xbytes / mean << " "
2023-01-24 18:44:47 +00:00
<< "\t\t" << std::setw(7) << bidibytes / mean << std::endl;
}
2023-01-13 19:00:20 +00:00
}
2023-01-24 18:44:47 +00:00
for (int d = 0; d < 8; d++)
{
acceleratorFreeDevice(xbuf[d]);
acceleratorFreeDevice(rbuf[d]);
2023-01-13 19:00:20 +00:00
}
}
}
2023-01-25 17:45:30 +00:00
std::cout << GridLogMessage
<< "========================================================================="
"==========================="
<< std::endl;
2023-01-24 18:44:47 +00:00
std::cout << GridLogMessage << "= All done; Bye Bye" << std::endl;
2023-01-25 17:45:30 +00:00
std::cout << GridLogMessage
<< "========================================================================="
"==========================="
<< std::endl;
2023-01-13 19:00:20 +00:00
Grid_finalize();
}