Grid comms benchmark fix

This commit is contained in:
Antonin Portelli 2023-01-26 18:57:33 +00:00
parent 503a993232
commit 4e57d9a4e0
2 changed files with 165 additions and 115 deletions

View File

@ -18,6 +18,7 @@ You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "Common.hpp"
#include <Grid/Grid.h> #include <Grid/Grid.h>
using namespace Grid; using namespace Grid;
@ -119,17 +120,13 @@ class Benchmark
<< "bits ; " << "bits ; "
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd())) << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd()))
<< std::endl; << std::endl;
std::cout << GridLogMessage
<< "======================================================================="
"==========="
<< std::endl;
} }
static void Comms(void) static void Comms(void)
{ {
int Nloop = 200; int Nloop = 200;
int nmu = 0; int nmu = 0;
int maxlat = 32; int maxlat = 48;
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd()); Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
Coordinate mpi_layout = GridDefaultMpi(); Coordinate mpi_layout = GridDefaultMpi();
@ -151,7 +148,8 @@ class Benchmark
<< "=======================================================================" << "======================================================================="
"=============================" "============================="
<< std::endl; << std::endl;
comms_header(); grid_printf("%5s %5s %15s %15s %15s %15s %15s\n", "L", "dir", "payload (B)",
"time (usec)", "rate (GB/s)", "std dev", "max");
for (int lat = 16; lat <= maxlat; lat += 8) for (int lat = 16; lat <= maxlat; lat += 8)
{ {
@ -172,8 +170,6 @@ class Benchmark
{ {
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
} }
double dbytes; double dbytes;
@ -213,17 +209,12 @@ class Benchmark
timestat.statistics(t_time); timestat.statistics(t_time);
dbytes = dbytes * ppn; dbytes = dbytes * ppn;
double xbytes = dbytes * 0.5; double bidibytes = 2. * dbytes;
double bidibytes = dbytes; double rate = bidibytes / (timestat.mean / 1.e6) / 1024. / 1024. / 1024.;
double rate_err = rate * timestat.err / timestat.mean;
std::cout << GridLogMessage << lat << "\t" << Ls << "\t " << bytes << " \t " double rate_max = rate * timestat.mean / timestat.min;
<< xbytes / timestat.mean << " \t " grid_printf("%5d %5d %15d %15.2f %15.2f %15.1f %15.2f\n", lat, dir, bytes,
<< xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t " timestat.mean, rate, rate_err, rate_max);
<< xbytes / timestat.max << " " << xbytes / timestat.min << "\t\t"
<< bidibytes / timestat.mean << " "
<< bidibytes * timestat.err / (timestat.mean * timestat.mean) << " "
<< bidibytes / timestat.max << " " << bidibytes / timestat.min
<< std::endl;
} }
} }
for (int d = 0; d < 8; d++) for (int d = 0; d < 8; d++)
@ -526,7 +517,7 @@ class Benchmark
FGrid->Broadcast(0, &ncall, sizeof(ncall)); FGrid->Broadcast(0, &ncall, sizeof(ncall));
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per // std::cout << GridLogMessage << " Estimate " << ncall << " calls per
//second"<<std::endl; // second"<<std::endl;
Dw.ZeroCounters(); Dw.ZeroCounters();
time_statistics timestat; time_statistics timestat;
@ -744,7 +735,7 @@ class Benchmark
FGrid->Broadcast(0, &ncall, sizeof(ncall)); FGrid->Broadcast(0, &ncall, sizeof(ncall));
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per // std::cout << GridLogMessage << " Estimate " << ncall << " calls per
//second"<<std::endl; // second"<<std::endl;
Ds.ZeroCounters(); Ds.ZeroCounters();
time_statistics timestat; time_statistics timestat;
@ -829,9 +820,11 @@ int main(int argc, char **argv)
#endif #endif
Benchmark::Decomposition(); Benchmark::Decomposition();
int do_su4 = 1; int do_su4 = 0;
int do_memory = 1; int do_memory = 0;
int do_comms = 1; int do_comms = 1;
int do_flops = 0;
int Ls = 1;
int sel = 4; int sel = 4;
std::vector<int> L_list({8, 12, 16, 24, 32}); std::vector<int> L_list({8, 12, 16, 24, 32});
@ -841,13 +834,17 @@ int main(int argc, char **argv)
std::vector<double> dwf4; std::vector<double> dwf4;
std::vector<double> staggered; std::vector<double> staggered;
int Ls = 1; if (do_flops)
std::cout << GridLogMessage {
Ls = 1;
std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl; std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
std::cout << GridLogMessage std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
@ -857,12 +854,14 @@ int main(int argc, char **argv)
} }
Ls = 12; Ls = 12;
std::cout << GridLogMessage std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl; std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
std::cout << GridLogMessage std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
@ -872,12 +871,15 @@ int main(int argc, char **argv)
dwf4.push_back(result); dwf4.push_back(result);
} }
std::cout << GridLogMessage std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" << std::endl; std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised"
std::cout << GridLogMessage << std::endl;
std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
@ -887,12 +889,14 @@ int main(int argc, char **argv)
staggered.push_back(result); staggered.push_back(result);
} }
std::cout << GridLogMessage std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl; std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
std::cout << GridLogMessage std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
@ -902,10 +906,12 @@ int main(int argc, char **argv)
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t " std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t "
<< dwf4[l] << " \t\t " << staggered[l] << std::endl; << dwf4[l] << " \t\t " << staggered[l] << std::endl;
} }
std::cout << GridLogMessage std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
}
int NN = NN_global; int NN = NN_global;
if (do_memory) if (do_memory)
@ -950,12 +956,16 @@ int main(int argc, char **argv)
Benchmark::Comms(); Benchmark::Comms();
} }
std::cout << GridLogMessage if (do_flops)
{
std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl; std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
std::cout << GridLogMessage std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
@ -965,12 +975,14 @@ int main(int argc, char **argv)
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t " std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t "
<< dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl; << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
} }
std::cout << GridLogMessage std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
std::cout << GridLogMessage std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
@ -980,10 +992,12 @@ int main(int argc, char **argv)
std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+" std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+"
<< dwf4[selm1] / NN << ") " << std::endl; << dwf4[selm1] / NN << ") " << std::endl;
std::cout << std::setprecision(3); std::cout << std::setprecision(3);
std::cout << GridLogMessage std::cout
<< GridLogMessage
<< "=========================================================================" << "========================================================================="
"=========" "========="
<< std::endl; << std::endl;
}
Grid_finalize(); Grid_finalize();
} }

36
Grid/Common.hpp Normal file
View File

@ -0,0 +1,36 @@
/*
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef Grid_Benchmarks_Common_hpp_
#define Grid_Benchmarks_Common_hpp_
#ifndef GRID_MSG
#define GRID_MSG std::cout << GridLogMessage
#endif
#ifndef GRID_MSG_MAXSIZE
#define GRID_MSG_MAXSIZE 1024
#endif
#define grid_printf(...) \
{ \
char _buf[GRID_MSG_MAXSIZE]; \
snprintf(_buf, GRID_MSG_MAXSIZE, __VA_ARGS__); \
GRID_MSG << _buf; \
}
#endif // Grid_Benchmarks_Common_hpp_