Grid comms benchmark fix
This commit is contained in:
parent
503a993232
commit
4e57d9a4e0
@ -18,6 +18,7 @@ You should have received a copy of the GNU General Public License
|
|||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
#include <Grid/Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -119,17 +120,13 @@ class Benchmark
|
|||||||
<< "bits ; "
|
<< "bits ; "
|
||||||
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd()))
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd()))
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage
|
|
||||||
<< "======================================================================="
|
|
||||||
"==========="
|
|
||||||
<< std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void Comms(void)
|
static void Comms(void)
|
||||||
{
|
{
|
||||||
int Nloop = 200;
|
int Nloop = 200;
|
||||||
int nmu = 0;
|
int nmu = 0;
|
||||||
int maxlat = 32;
|
int maxlat = 48;
|
||||||
|
|
||||||
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
|
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
@ -151,7 +148,8 @@ class Benchmark
|
|||||||
<< "======================================================================="
|
<< "======================================================================="
|
||||||
"============================="
|
"============================="
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
comms_header();
|
grid_printf("%5s %5s %15s %15s %15s %15s %15s\n", "L", "dir", "payload (B)",
|
||||||
|
"time (usec)", "rate (GB/s)", "std dev", "max");
|
||||||
|
|
||||||
for (int lat = 16; lat <= maxlat; lat += 8)
|
for (int lat = 16; lat <= maxlat; lat += 8)
|
||||||
{
|
{
|
||||||
@ -172,8 +170,6 @@ class Benchmark
|
|||||||
{
|
{
|
||||||
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||||
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||||
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
|
||||||
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double dbytes;
|
double dbytes;
|
||||||
@ -213,17 +209,12 @@ class Benchmark
|
|||||||
timestat.statistics(t_time);
|
timestat.statistics(t_time);
|
||||||
|
|
||||||
dbytes = dbytes * ppn;
|
dbytes = dbytes * ppn;
|
||||||
double xbytes = dbytes * 0.5;
|
double bidibytes = 2. * dbytes;
|
||||||
double bidibytes = dbytes;
|
double rate = bidibytes / (timestat.mean / 1.e6) / 1024. / 1024. / 1024.;
|
||||||
|
double rate_err = rate * timestat.err / timestat.mean;
|
||||||
std::cout << GridLogMessage << lat << "\t" << Ls << "\t " << bytes << " \t "
|
double rate_max = rate * timestat.mean / timestat.min;
|
||||||
<< xbytes / timestat.mean << " \t "
|
grid_printf("%5d %5d %15d %15.2f %15.2f %15.1f %15.2f\n", lat, dir, bytes,
|
||||||
<< xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t "
|
timestat.mean, rate, rate_err, rate_max);
|
||||||
<< xbytes / timestat.max << " " << xbytes / timestat.min << "\t\t"
|
|
||||||
<< bidibytes / timestat.mean << " "
|
|
||||||
<< bidibytes * timestat.err / (timestat.mean * timestat.mean) << " "
|
|
||||||
<< bidibytes / timestat.max << " " << bidibytes / timestat.min
|
|
||||||
<< std::endl;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int d = 0; d < 8; d++)
|
for (int d = 0; d < 8; d++)
|
||||||
@ -526,7 +517,7 @@ class Benchmark
|
|||||||
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
||||||
|
|
||||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per
|
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per
|
||||||
//second"<<std::endl;
|
// second"<<std::endl;
|
||||||
Dw.ZeroCounters();
|
Dw.ZeroCounters();
|
||||||
|
|
||||||
time_statistics timestat;
|
time_statistics timestat;
|
||||||
@ -744,7 +735,7 @@ class Benchmark
|
|||||||
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
||||||
|
|
||||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per
|
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per
|
||||||
//second"<<std::endl;
|
// second"<<std::endl;
|
||||||
Ds.ZeroCounters();
|
Ds.ZeroCounters();
|
||||||
|
|
||||||
time_statistics timestat;
|
time_statistics timestat;
|
||||||
@ -829,9 +820,11 @@ int main(int argc, char **argv)
|
|||||||
#endif
|
#endif
|
||||||
Benchmark::Decomposition();
|
Benchmark::Decomposition();
|
||||||
|
|
||||||
int do_su4 = 1;
|
int do_su4 = 0;
|
||||||
int do_memory = 1;
|
int do_memory = 0;
|
||||||
int do_comms = 1;
|
int do_comms = 1;
|
||||||
|
int do_flops = 0;
|
||||||
|
int Ls = 1;
|
||||||
|
|
||||||
int sel = 4;
|
int sel = 4;
|
||||||
std::vector<int> L_list({8, 12, 16, 24, 32});
|
std::vector<int> L_list({8, 12, 16, 24, 32});
|
||||||
@ -841,71 +834,84 @@ int main(int argc, char **argv)
|
|||||||
std::vector<double> dwf4;
|
std::vector<double> dwf4;
|
||||||
std::vector<double> staggered;
|
std::vector<double> staggered;
|
||||||
|
|
||||||
int Ls = 1;
|
if (do_flops)
|
||||||
std::cout << GridLogMessage
|
|
||||||
<< "========================================================================="
|
|
||||||
"========="
|
|
||||||
<< std::endl;
|
|
||||||
std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
|
|
||||||
std::cout << GridLogMessage
|
|
||||||
<< "========================================================================="
|
|
||||||
"========="
|
|
||||||
<< std::endl;
|
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
|
||||||
{
|
{
|
||||||
wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
|
Ls = 1;
|
||||||
}
|
std::cout
|
||||||
|
<< GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
|
||||||
|
std::cout
|
||||||
|
<< GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
|
{
|
||||||
|
wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
|
||||||
|
}
|
||||||
|
|
||||||
Ls = 12;
|
Ls = 12;
|
||||||
std::cout << GridLogMessage
|
std::cout
|
||||||
<< "========================================================================="
|
<< GridLogMessage
|
||||||
"========="
|
<< "========================================================================="
|
||||||
<< std::endl;
|
"========="
|
||||||
std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage
|
std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
|
||||||
<< "========================================================================="
|
std::cout
|
||||||
"========="
|
<< GridLogMessage
|
||||||
<< std::endl;
|
<< "========================================================================="
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
"========="
|
||||||
{
|
<< std::endl;
|
||||||
double result = Benchmark::DWF(Ls, L_list[l]);
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
dwf4.push_back(result);
|
{
|
||||||
}
|
double result = Benchmark::DWF(Ls, L_list[l]);
|
||||||
|
dwf4.push_back(result);
|
||||||
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage
|
std::cout
|
||||||
<< "========================================================================="
|
<< GridLogMessage
|
||||||
"========="
|
<< "========================================================================="
|
||||||
<< std::endl;
|
"========="
|
||||||
std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" << std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage
|
std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised"
|
||||||
<< "========================================================================="
|
<< std::endl;
|
||||||
"========="
|
std::cout
|
||||||
<< std::endl;
|
<< GridLogMessage
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
<< "========================================================================="
|
||||||
{
|
"========="
|
||||||
double result = Benchmark::Staggered(L_list[l]);
|
<< std::endl;
|
||||||
staggered.push_back(result);
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
}
|
{
|
||||||
|
double result = Benchmark::Staggered(L_list[l]);
|
||||||
|
staggered.push_back(result);
|
||||||
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage
|
std::cout
|
||||||
<< "========================================================================="
|
<< GridLogMessage
|
||||||
"========="
|
<< "========================================================================="
|
||||||
<< std::endl;
|
"========="
|
||||||
std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage
|
std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
|
||||||
<< "========================================================================="
|
std::cout
|
||||||
"========="
|
<< GridLogMessage
|
||||||
<< std::endl;
|
<< "========================================================================="
|
||||||
std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl;
|
"========="
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
<< std::endl;
|
||||||
{
|
std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl;
|
||||||
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t "
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
<< dwf4[l] << " \t\t " << staggered[l] << std::endl;
|
{
|
||||||
|
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t "
|
||||||
|
<< dwf4[l] << " \t\t " << staggered[l] << std::endl;
|
||||||
|
}
|
||||||
|
std::cout
|
||||||
|
<< GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
std::cout << GridLogMessage
|
|
||||||
<< "========================================================================="
|
|
||||||
"========="
|
|
||||||
<< std::endl;
|
|
||||||
|
|
||||||
int NN = NN_global;
|
int NN = NN_global;
|
||||||
if (do_memory)
|
if (do_memory)
|
||||||
@ -950,40 +956,48 @@ int main(int argc, char **argv)
|
|||||||
Benchmark::Comms();
|
Benchmark::Comms();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage
|
if (do_flops)
|
||||||
<< "========================================================================="
|
|
||||||
"========="
|
|
||||||
<< std::endl;
|
|
||||||
std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
|
|
||||||
std::cout << GridLogMessage
|
|
||||||
<< "========================================================================="
|
|
||||||
"========="
|
|
||||||
<< std::endl;
|
|
||||||
std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl;
|
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
|
||||||
{
|
{
|
||||||
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t "
|
std::cout
|
||||||
<< dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
|
<< GridLogMessage
|
||||||
}
|
<< "========================================================================="
|
||||||
std::cout << GridLogMessage
|
"========="
|
||||||
<< "========================================================================="
|
<< std::endl;
|
||||||
"========="
|
std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
|
||||||
<< std::endl;
|
std::cout
|
||||||
|
<< GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl;
|
||||||
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t "
|
||||||
|
<< dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
|
||||||
|
}
|
||||||
|
std::cout
|
||||||
|
<< GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage
|
std::cout
|
||||||
<< "========================================================================="
|
<< GridLogMessage
|
||||||
"========="
|
<< "========================================================================="
|
||||||
<< std::endl;
|
"========="
|
||||||
std::cout << GridLogMessage
|
<< std::endl;
|
||||||
<< " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN
|
std::cout << GridLogMessage
|
||||||
<< " Mflop/s per node" << std::endl;
|
<< " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN
|
||||||
std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+"
|
<< " Mflop/s per node" << std::endl;
|
||||||
<< dwf4[selm1] / NN << ") " << std::endl;
|
std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+"
|
||||||
std::cout << std::setprecision(3);
|
<< dwf4[selm1] / NN << ") " << std::endl;
|
||||||
std::cout << GridLogMessage
|
std::cout << std::setprecision(3);
|
||||||
<< "========================================================================="
|
std::cout
|
||||||
"========="
|
<< GridLogMessage
|
||||||
<< std::endl;
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
36
Grid/Common.hpp
Normal file
36
Grid/Common.hpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef Grid_Benchmarks_Common_hpp_
|
||||||
|
#define Grid_Benchmarks_Common_hpp_
|
||||||
|
|
||||||
|
#ifndef GRID_MSG
|
||||||
|
#define GRID_MSG std::cout << GridLogMessage
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef GRID_MSG_MAXSIZE
|
||||||
|
#define GRID_MSG_MAXSIZE 1024
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define grid_printf(...) \
|
||||||
|
{ \
|
||||||
|
char _buf[GRID_MSG_MAXSIZE]; \
|
||||||
|
snprintf(_buf, GRID_MSG_MAXSIZE, __VA_ARGS__); \
|
||||||
|
GRID_MSG << _buf; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // Grid_Benchmarks_Common_hpp_
|
Loading…
Reference in New Issue
Block a user