Grid column formatting
This commit is contained in:
parent
514d81d351
commit
03badefec7
@ -6,7 +6,7 @@
|
|||||||
BreakBeforeBraces: Allman,
|
BreakBeforeBraces: Allman,
|
||||||
AllowShortIfStatementsOnASingleLine: false,
|
AllowShortIfStatementsOnASingleLine: false,
|
||||||
IndentCaseLabels: false,
|
IndentCaseLabels: false,
|
||||||
ColumnLimit: 0,
|
ColumnLimit: 90,
|
||||||
AccessModifierOffset: -4,
|
AccessModifierOffset: -4,
|
||||||
NamespaceIndentation: All,
|
NamespaceIndentation: All,
|
||||||
FixNamespaceComments: false,
|
FixNamespaceComments: false,
|
||||||
|
@ -122,87 +122,82 @@ class Benchmark
|
|||||||
|
|
||||||
for (int lat = 16; lat <= maxlat; lat += 8)
|
for (int lat = 16; lat <= maxlat; lat += 8)
|
||||||
{
|
{
|
||||||
// for(int Ls=8;Ls<=8;Ls*=2){
|
int Ls = 12;
|
||||||
|
|
||||||
|
Coordinate latt_size({lat * mpi_layout[0],
|
||||||
|
lat * mpi_layout[1],
|
||||||
|
lat * mpi_layout[2],
|
||||||
|
lat * mpi_layout[3]});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
|
RealD Nrank = Grid._Nprocessors;
|
||||||
|
RealD Nnode = Grid.NodeCount();
|
||||||
|
RealD ppn = Nrank / Nnode;
|
||||||
|
|
||||||
|
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||||
|
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||||
|
uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD);
|
||||||
|
for (int d = 0; d < 8; d++)
|
||||||
{
|
{
|
||||||
int Ls = 12;
|
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||||
|
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||||
|
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
}
|
||||||
|
|
||||||
Coordinate latt_size({lat * mpi_layout[0],
|
double dbytes;
|
||||||
lat * mpi_layout[1],
|
|
||||||
lat * mpi_layout[2],
|
|
||||||
lat * mpi_layout[3]});
|
|
||||||
|
|
||||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
for (int dir = 0; dir < 8; dir++)
|
||||||
RealD Nrank = Grid._Nprocessors;
|
{
|
||||||
RealD Nnode = Grid.NodeCount();
|
int mu = dir % 4;
|
||||||
RealD ppn = Nrank / Nnode;
|
if (mpi_layout[mu] > 1)
|
||||||
|
|
||||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
|
||||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
|
||||||
// Grid.ShmBufferFreeAll();
|
|
||||||
uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD);
|
|
||||||
for (int d = 0; d < 8; d++)
|
|
||||||
{
|
{
|
||||||
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
|
||||||
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
|
||||||
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
|
||||||
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
|
||||||
}
|
|
||||||
|
|
||||||
// int ncomm;
|
std::vector<double> times(Nloop);
|
||||||
double dbytes;
|
for (int i = 0; i < Nloop; i++)
|
||||||
|
|
||||||
for (int dir = 0; dir < 8; dir++)
|
|
||||||
{
|
|
||||||
int mu = dir % 4;
|
|
||||||
if (mpi_layout[mu] > 1)
|
|
||||||
{
|
{
|
||||||
|
|
||||||
std::vector<double> times(Nloop);
|
dbytes = 0;
|
||||||
for (int i = 0; i < Nloop; i++)
|
double start = usecond();
|
||||||
|
int xmit_to_rank;
|
||||||
|
int recv_from_rank;
|
||||||
|
|
||||||
|
if (dir == mu)
|
||||||
{
|
{
|
||||||
|
int comm_proc = 1;
|
||||||
dbytes = 0;
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
double start = usecond();
|
|
||||||
int xmit_to_rank;
|
|
||||||
int recv_from_rank;
|
|
||||||
|
|
||||||
if (dir == mu)
|
|
||||||
{
|
|
||||||
int comm_proc = 1;
|
|
||||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int comm_proc = mpi_layout[mu] - 1;
|
|
||||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
|
||||||
}
|
|
||||||
Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
|
||||||
(void *)&rbuf[dir][0], recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
dbytes += bytes;
|
|
||||||
|
|
||||||
double stop = usecond();
|
|
||||||
t_time[i] = stop - start; // microseconds
|
|
||||||
}
|
}
|
||||||
timestat.statistics(t_time);
|
else
|
||||||
|
{
|
||||||
|
int comm_proc = mpi_layout[mu] - 1;
|
||||||
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
|
}
|
||||||
|
Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||||
|
(void *)&rbuf[dir][0], recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
dbytes += bytes;
|
||||||
|
|
||||||
dbytes = dbytes * ppn;
|
double stop = usecond();
|
||||||
double xbytes = dbytes * 0.5;
|
t_time[i] = stop - start; // microseconds
|
||||||
double bidibytes = dbytes;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << lat << "\t" << Ls << "\t "
|
|
||||||
<< bytes << " \t "
|
|
||||||
<< xbytes / timestat.mean << " \t " << xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t "
|
|
||||||
<< xbytes / timestat.max << " " << xbytes / timestat.min
|
|
||||||
<< "\t\t" << bidibytes / timestat.mean << " " << bidibytes * timestat.err / (timestat.mean * timestat.mean) << " "
|
|
||||||
<< bidibytes / timestat.max << " " << bidibytes / timestat.min << std::endl;
|
|
||||||
}
|
}
|
||||||
|
timestat.statistics(t_time);
|
||||||
|
|
||||||
|
dbytes = dbytes * ppn;
|
||||||
|
double xbytes = dbytes * 0.5;
|
||||||
|
double bidibytes = dbytes;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << lat << "\t" << Ls << "\t "
|
||||||
|
<< bytes << " \t "
|
||||||
|
<< xbytes / timestat.mean << " \t " << xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t "
|
||||||
|
<< xbytes / timestat.max << " " << xbytes / timestat.min
|
||||||
|
<< "\t\t" << bidibytes / timestat.mean << " " << bidibytes * timestat.err / (timestat.mean * timestat.mean) << " "
|
||||||
|
<< bidibytes / timestat.max << " " << bidibytes / timestat.min << std::endl;
|
||||||
}
|
}
|
||||||
for (int d = 0; d < 8; d++)
|
}
|
||||||
{
|
for (int d = 0; d < 8; d++)
|
||||||
acceleratorFreeDevice(xbuf[d]);
|
{
|
||||||
acceleratorFreeDevice(rbuf[d]);
|
acceleratorFreeDevice(xbuf[d]);
|
||||||
}
|
acceleratorFreeDevice(rbuf[d]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
@ -32,23 +32,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
#ifdef HAVE_LIME
|
#ifdef HAVE_LIME
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
std::string filestem(const int l)
|
std::string filestem(const int l) { return "iobench_l" + std::to_string(l); }
|
||||||
{
|
|
||||||
return "iobench_l" + std::to_string(l);
|
|
||||||
}
|
|
||||||
|
|
||||||
int vol(const int i)
|
int vol(const int i) { return BENCH_IO_LMIN + 2 * i; }
|
||||||
{
|
|
||||||
return BENCH_IO_LMIN + 2 * i;
|
|
||||||
}
|
|
||||||
|
|
||||||
int volInd(const int l)
|
int volInd(const int l) { return (l - BENCH_IO_LMIN) / 2; }
|
||||||
{
|
|
||||||
return (l - BENCH_IO_LMIN) / 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Mat>
|
template <typename Mat> void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
||||||
void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
|
||||||
{
|
{
|
||||||
auto nr = data[0].rows(), nc = data[0].cols();
|
auto nr = data[0].rows(), nc = data[0].cols();
|
||||||
Eigen::MatrixXd sqSum(nr, nc);
|
Eigen::MatrixXd sqSum(nr, nc);
|
||||||
@ -66,11 +56,11 @@ void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
|||||||
mean /= n;
|
mean /= n;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define grid_printf(...) \
|
#define grid_printf(...) \
|
||||||
{ \
|
{ \
|
||||||
char _buf[1024]; \
|
char _buf[1024]; \
|
||||||
sprintf(_buf, __VA_ARGS__); \
|
sprintf(_buf, __VA_ARGS__); \
|
||||||
MSG << _buf; \
|
MSG << _buf; \
|
||||||
}
|
}
|
||||||
|
|
||||||
enum
|
enum
|
||||||
@ -173,47 +163,49 @@ int main(int argc, char **argv)
|
|||||||
MSG << "SUMMARY" << std::endl;
|
MSG << "SUMMARY" << std::endl;
|
||||||
MSG << BIGSEP << std::endl;
|
MSG << BIGSEP << std::endl;
|
||||||
MSG << "Summary of individual results (all results in MB/s)." << std::endl;
|
MSG << "Summary of individual results (all results in MB/s)." << std::endl;
|
||||||
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
MSG << "Every second colum gives the standard deviation of the previous column."
|
||||||
|
<< std::endl;
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n",
|
grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n", "L", "std read", "std dev",
|
||||||
"L", "std read", "std dev", "std write", "std dev",
|
"std write", "std dev", "Grid read", "std dev", "Grid write", "std dev");
|
||||||
"Grid read", "std dev", "Grid write", "std dev");
|
|
||||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
{
|
{
|
||||||
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", l,
|
||||||
l, mean(volInd(l), sRead), stdDev(volInd(l), sRead),
|
mean(volInd(l), sRead), stdDev(volInd(l), sRead), mean(volInd(l), sWrite),
|
||||||
mean(volInd(l), sWrite), stdDev(volInd(l), sWrite),
|
stdDev(volInd(l), sWrite), mean(volInd(l), gRead),
|
||||||
mean(volInd(l), gRead), stdDev(volInd(l), gRead),
|
stdDev(volInd(l), gRead), mean(volInd(l), gWrite),
|
||||||
mean(volInd(l), gWrite), stdDev(volInd(l), gWrite));
|
stdDev(volInd(l), gWrite));
|
||||||
}
|
}
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)" << std::endl;
|
MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)"
|
||||||
|
<< std::endl;
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
grid_printf("%4s %12s %12s %12s %12s\n",
|
grid_printf("%4s %12s %12s %12s %12s\n", "L", "std read", "std write", "Grid read",
|
||||||
"L", "std read", "std write", "Grid read", "Grid write");
|
"Grid write");
|
||||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
{
|
{
|
||||||
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n",
|
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n", l, rob(volInd(l), sRead),
|
||||||
l, rob(volInd(l), sRead), rob(volInd(l), sWrite),
|
rob(volInd(l), sWrite), rob(volInd(l), gRead), rob(volInd(l), gWrite));
|
||||||
rob(volInd(l), gRead), rob(volInd(l), gWrite));
|
|
||||||
}
|
}
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX << "^4 (all results in MB/s)." << std::endl;
|
MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX
|
||||||
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
<< "^4 (all results in MB/s)." << std::endl;
|
||||||
|
MSG << "Every second colum gives the standard deviation of the previous column."
|
||||||
|
<< std::endl;
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n",
|
grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n", "std read", "std dev",
|
||||||
"std read", "std dev", "std write", "std dev",
|
"std write", "std dev", "Grid read", "std dev", "Grid write", "std dev");
|
||||||
"Grid read", "std dev", "Grid write", "std dev");
|
grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", avMean(sRead),
|
||||||
grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
avStdDev(sRead), avMean(sWrite), avStdDev(sWrite), avMean(gRead),
|
||||||
avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite),
|
avStdDev(gRead), avMean(gWrite), avStdDev(gWrite));
|
||||||
avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite));
|
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)" << std::endl;
|
MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)"
|
||||||
|
<< std::endl;
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
grid_printf("%12s %12s %12s %12s\n",
|
grid_printf("%12s %12s %12s %12s\n", "std read", "std write", "Grid read",
|
||||||
"std read", "std write", "Grid read", "Grid write");
|
"Grid write");
|
||||||
grid_printf("%12.1f %12.1f %12.1f %12.1f\n",
|
grid_printf("%12.1f %12.1f %12.1f %12.1f\n", avRob(sRead), avRob(sWrite), avRob(gRead),
|
||||||
avRob(sRead), avRob(sWrite), avRob(gRead), avRob(gWrite));
|
avRob(gWrite));
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
|
|
||||||
|
@ -34,8 +34,7 @@ struct time_statistics
|
|||||||
mean = sum / v.size();
|
mean = sum / v.size();
|
||||||
|
|
||||||
std::vector<double> diff(v.size());
|
std::vector<double> diff(v.size());
|
||||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x)
|
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
||||||
{ return x - mean; });
|
|
||||||
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
||||||
err = std::sqrt(sq_sum / (v.size() * (v.size() - 1)));
|
err = std::sqrt(sq_sum / (v.size() * (v.size() - 1)));
|
||||||
|
|
||||||
@ -50,8 +49,7 @@ void header()
|
|||||||
std::cout << GridLogMessage << " L "
|
std::cout << GridLogMessage << " L "
|
||||||
<< "\t"
|
<< "\t"
|
||||||
<< " Ls "
|
<< " Ls "
|
||||||
<< "\t"
|
<< "\t" << std::setw(11) << "bytes\t\t"
|
||||||
<< std::setw(11) << "bytes\t\t"
|
|
||||||
<< "MB/s uni"
|
<< "MB/s uni"
|
||||||
<< "\t"
|
<< "\t"
|
||||||
<< "MB/s bidi" << std::endl;
|
<< "MB/s bidi" << std::endl;
|
||||||
@ -64,7 +62,8 @@ int main(int argc, char **argv)
|
|||||||
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
|
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
|
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads"
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
int Nloop = 250;
|
int Nloop = 250;
|
||||||
int nmu = 0;
|
int nmu = 0;
|
||||||
@ -73,13 +72,21 @@ int main(int argc, char **argv)
|
|||||||
if (mpi_layout[mu] > 1)
|
if (mpi_layout[mu] > 1)
|
||||||
nmu++;
|
nmu++;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Number of iterations to average: " << Nloop << std::endl;
|
std::cout << GridLogMessage << "Number of iterations to average: " << Nloop
|
||||||
|
<< std::endl;
|
||||||
std::vector<double> t_time(Nloop);
|
std::vector<double> t_time(Nloop);
|
||||||
// time_statistics timestat;
|
// time_statistics timestat;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "= Benchmarking sequential halo exchange from host memory " << std::endl;
|
<< "========================================================================="
|
||||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
"==========================="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "= Benchmarking sequential halo exchange from host memory " << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"==========================="
|
||||||
|
<< std::endl;
|
||||||
header();
|
header();
|
||||||
|
|
||||||
for (int lat = 8; lat <= maxlat; lat += 4)
|
for (int lat = 8; lat <= maxlat; lat += 4)
|
||||||
@ -87,9 +94,7 @@ int main(int argc, char **argv)
|
|||||||
for (int Ls = 8; Ls <= 8; Ls *= 2)
|
for (int Ls = 8; Ls <= 8; Ls *= 2)
|
||||||
{
|
{
|
||||||
|
|
||||||
Coordinate latt_size({lat * mpi_layout[0],
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||||
lat * mpi_layout[1],
|
|
||||||
lat * mpi_layout[2],
|
|
||||||
lat * mpi_layout[3]});
|
lat * mpi_layout[3]});
|
||||||
|
|
||||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
@ -127,22 +132,16 @@ int main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
std::vector<CommsRequest_t> requests;
|
std::vector<CommsRequest_t> requests;
|
||||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
Grid.SendToRecvFrom((void *)&xbuf[mu][0],
|
Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank,
|
||||||
xmit_to_rank,
|
(void *)&rbuf[mu][0], recv_from_rank, bytes);
|
||||||
(void *)&rbuf[mu][0],
|
|
||||||
recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
comm_proc = mpi_layout[mu] - 1;
|
comm_proc = mpi_layout[mu] - 1;
|
||||||
{
|
{
|
||||||
std::vector<CommsRequest_t> requests;
|
std::vector<CommsRequest_t> requests;
|
||||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0],
|
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank,
|
||||||
xmit_to_rank,
|
(void *)&rbuf[mu + 4][0], recv_from_rank, bytes);
|
||||||
(void *)&rbuf[mu + 4][0],
|
|
||||||
recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Grid.Barrier();
|
Grid.Barrier();
|
||||||
@ -154,17 +153,24 @@ int main(int argc, char **argv)
|
|||||||
double bidibytes = xbytes + rbytes;
|
double bidibytes = xbytes + rbytes;
|
||||||
|
|
||||||
std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t"
|
std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t"
|
||||||
<< std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " "
|
<< std::setw(11) << bytes << std::fixed << std::setprecision(1)
|
||||||
<< std::right << xbytes / mean << " "
|
<< std::setw(7) << " " << std::right << xbytes / mean << " "
|
||||||
<< "\t\t" << std::setw(7) << bidibytes / mean << std::endl;
|
<< "\t\t" << std::setw(7) << bidibytes / mean << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "= Benchmarking sequential halo exchange from GPU memory " << std::endl;
|
<< "========================================================================="
|
||||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
"==========================="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "= Benchmarking sequential halo exchange from GPU memory " << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"==========================="
|
||||||
|
<< std::endl;
|
||||||
header();
|
header();
|
||||||
|
|
||||||
for (int lat = 8; lat <= maxlat; lat += 4)
|
for (int lat = 8; lat <= maxlat; lat += 4)
|
||||||
@ -172,9 +178,7 @@ int main(int argc, char **argv)
|
|||||||
for (int Ls = 8; Ls <= 8; Ls *= 2)
|
for (int Ls = 8; Ls <= 8; Ls *= 2)
|
||||||
{
|
{
|
||||||
|
|
||||||
Coordinate latt_size({lat * mpi_layout[0],
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||||
lat * mpi_layout[1],
|
|
||||||
lat * mpi_layout[2],
|
|
||||||
lat * mpi_layout[3]});
|
lat * mpi_layout[3]});
|
||||||
|
|
||||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
@ -212,22 +216,16 @@ int main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
std::vector<CommsRequest_t> requests;
|
std::vector<CommsRequest_t> requests;
|
||||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
Grid.SendToRecvFrom((void *)&xbuf[mu][0],
|
Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank,
|
||||||
xmit_to_rank,
|
(void *)&rbuf[mu][0], recv_from_rank, bytes);
|
||||||
(void *)&rbuf[mu][0],
|
|
||||||
recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
comm_proc = mpi_layout[mu] - 1;
|
comm_proc = mpi_layout[mu] - 1;
|
||||||
{
|
{
|
||||||
std::vector<CommsRequest_t> requests;
|
std::vector<CommsRequest_t> requests;
|
||||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0],
|
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank,
|
||||||
xmit_to_rank,
|
(void *)&rbuf[mu + 4][0], recv_from_rank, bytes);
|
||||||
(void *)&rbuf[mu + 4][0],
|
|
||||||
recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Grid.Barrier();
|
Grid.Barrier();
|
||||||
@ -239,8 +237,8 @@ int main(int argc, char **argv)
|
|||||||
double bidibytes = xbytes + rbytes;
|
double bidibytes = xbytes + rbytes;
|
||||||
|
|
||||||
std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t"
|
std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t"
|
||||||
<< std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " "
|
<< std::setw(11) << bytes << std::fixed << std::setprecision(1)
|
||||||
<< std::right << xbytes / mean << " "
|
<< std::setw(7) << " " << std::right << xbytes / mean << " "
|
||||||
<< "\t\t" << std::setw(7) << bidibytes / mean << std::endl;
|
<< "\t\t" << std::setw(7) << bidibytes / mean << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -253,9 +251,15 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"==========================="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "= All done; Bye Bye" << std::endl;
|
std::cout << GridLogMessage << "= All done; Bye Bye" << std::endl;
|
||||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"==========================="
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -28,17 +28,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
template <class d>
|
template <class d> struct scal
|
||||||
struct scal
|
|
||||||
{
|
{
|
||||||
d internal;
|
d internal;
|
||||||
};
|
};
|
||||||
|
|
||||||
Gamma::Algebra Gmu[] = {
|
Gamma::Algebra Gmu[] = {Gamma::Algebra::GammaX, Gamma::Algebra::GammaY,
|
||||||
Gamma::Algebra::GammaX,
|
Gamma::Algebra::GammaZ, Gamma::Algebra::GammaT};
|
||||||
Gamma::Algebra::GammaY,
|
|
||||||
Gamma::Algebra::GammaZ,
|
|
||||||
Gamma::Algebra::GammaT};
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
@ -59,13 +55,15 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
long unsigned int single_site_flops = 8 * Nc * (7 + 16 * Nc);
|
long unsigned int single_site_flops = 8 * Nc * (7 + 16 * Nc);
|
||||||
|
|
||||||
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||||
|
GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||||
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
||||||
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Making s innermost grids" << std::endl;
|
std::cout << GridLogMessage << "Making s innermost grids" << std::endl;
|
||||||
GridCartesian *sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(), GridDefaultMpi());
|
GridCartesian *sUGrid =
|
||||||
|
SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(), GridDefaultMpi());
|
||||||
GridRedBlackCartesian *sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
GridRedBlackCartesian *sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
GridCartesian *sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls, UGrid);
|
GridCartesian *sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls, UGrid);
|
||||||
GridRedBlackCartesian *sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls, UGrid);
|
GridRedBlackCartesian *sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls, UGrid);
|
||||||
@ -177,13 +175,24 @@ int main(int argc, char **argv)
|
|||||||
RealD NP = UGrid->_Nprocessors;
|
RealD NP = UGrid->_Nprocessors;
|
||||||
RealD NN = UGrid->NodeCount();
|
RealD NN = UGrid->NodeCount();
|
||||||
|
|
||||||
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" << std::endl;
|
<< "*****************************************************************"
|
||||||
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "* Benchmarking DomainWallFermionR::Dhop " << std::endl;
|
<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm"
|
||||||
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd() << std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF) << " B" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "*****************************************************************"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "*****************************************************************"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "* Benchmarking DomainWallFermionR::Dhop " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd()
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF) << " B"
|
||||||
|
<< std::endl;
|
||||||
if (sizeof(RealF) == 4)
|
if (sizeof(RealF) == 4)
|
||||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||||
if (sizeof(RealF) == 8)
|
if (sizeof(RealF) == 8)
|
||||||
@ -200,7 +209,9 @@ int main(int argc, char **argv)
|
|||||||
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
||||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
||||||
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
||||||
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "*****************************************************************"
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
DomainWallFermionF Dw(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
|
DomainWallFermionF Dw(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
|
||||||
int ncall = 300;
|
int ncall = 300;
|
||||||
@ -230,19 +241,29 @@ int main(int argc, char **argv)
|
|||||||
auto simdwidth = sizeof(vComplex);
|
auto simdwidth = sizeof(vComplex);
|
||||||
|
|
||||||
// RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors
|
// RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors
|
||||||
double data_rf = volume * ((2 * Nd + 1) * Nd * Nc + 2 * Nd * Nc * Nc) * simdwidth / nsimd * ncall / (1024. * 1024. * 1024.);
|
double data_rf = volume * ((2 * Nd + 1) * Nd * Nc + 2 * Nd * Nc * Nc) * simdwidth /
|
||||||
|
nsimd * ncall / (1024. * 1024. * 1024.);
|
||||||
|
|
||||||
// mem: Nd Wilson * Ls, Nd gauge, Nc colors
|
// mem: Nd Wilson * Ls, Nd gauge, Nc colors
|
||||||
double data_mem = (volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) * simdwidth / nsimd * ncall / (1024. * 1024. * 1024.);
|
double data_mem =
|
||||||
|
(volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) * simdwidth /
|
||||||
|
nsimd * ncall / (1024. * 1024. * 1024.);
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Called Dw " << ncall << " times in " << t1 - t0 << " us" << std::endl;
|
std::cout << GridLogMessage << "Called Dw " << ncall << " times in " << t1 - t0
|
||||||
|
<< " us" << std::endl;
|
||||||
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
std::cout << GridLogMessage << "mflop/s = " << flops / (t1 - t0) << std::endl;
|
std::cout << GridLogMessage << "mflop/s = " << flops / (t1 - t0) << std::endl;
|
||||||
std::cout << GridLogMessage << "mflop/s per rank = " << flops / (t1 - t0) / NP << std::endl;
|
std::cout << GridLogMessage << "mflop/s per rank = " << flops / (t1 - t0) / NP
|
||||||
std::cout << GridLogMessage << "mflop/s per node = " << flops / (t1 - t0) / NN << std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "RF GiB/s (base 2) = " << 1000000. * data_rf / ((t1 - t0)) << std::endl;
|
std::cout << GridLogMessage << "mflop/s per node = " << flops / (t1 - t0) / NN
|
||||||
std::cout << GridLogMessage << "mem GiB/s (base 2) = " << 1000000. * data_mem / ((t1 - t0)) << std::endl;
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "RF GiB/s (base 2) = " << 1000000. * data_rf / ((t1 - t0))
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "mem GiB/s (base 2) = " << 1000000. * data_mem / ((t1 - t0))
|
||||||
|
<< std::endl;
|
||||||
err = ref - result;
|
err = ref - result;
|
||||||
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
|
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
|
||||||
// exit(0);
|
// exit(0);
|
||||||
@ -313,7 +334,9 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
// dump=1;
|
// dump=1;
|
||||||
Dw.Dhop(src, result, 1);
|
Dw.Dhop(src, result, 1);
|
||||||
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "Compare to naive wilson implementation Dag to verify correctness"
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "Called DwDag" << std::endl;
|
std::cout << GridLogMessage << "Called DwDag" << std::endl;
|
||||||
std::cout << GridLogMessage << "norm dag result " << norm2(result) << std::endl;
|
std::cout << GridLogMessage << "norm dag result " << norm2(result) << std::endl;
|
||||||
std::cout << GridLogMessage << "norm dag ref " << norm2(ref) << std::endl;
|
std::cout << GridLogMessage << "norm dag ref " << norm2(ref) << std::endl;
|
||||||
@ -333,7 +356,8 @@ int main(int argc, char **argv)
|
|||||||
LatticeFermionF r_o(FrbGrid);
|
LatticeFermionF r_o(FrbGrid);
|
||||||
LatticeFermionF r_eo(FGrid);
|
LatticeFermionF r_eo(FGrid);
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec" << std::endl;
|
std::cout << GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"
|
||||||
|
<< std::endl;
|
||||||
pickCheckerboard(Even, src_e, src);
|
pickCheckerboard(Even, src_e, src);
|
||||||
pickCheckerboard(Odd, src_o, src);
|
pickCheckerboard(Odd, src_o, src);
|
||||||
|
|
||||||
@ -341,9 +365,12 @@ int main(int argc, char **argv)
|
|||||||
std::cout << GridLogMessage << "src_o" << norm2(src_o) << std::endl;
|
std::cout << GridLogMessage << "src_o" << norm2(src_o) << std::endl;
|
||||||
|
|
||||||
// S-direction is INNERMOST and takes no part in the parity.
|
// S-direction is INNERMOST and takes no part in the parity.
|
||||||
std::cout << GridLogMessage << "*********************************************************" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "* Benchmarking DomainWallFermionF::DhopEO " << std::endl;
|
<< "*********************************************************" << std::endl;
|
||||||
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd() << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "* Benchmarking DomainWallFermionF::DhopEO " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd()
|
||||||
|
<< std::endl;
|
||||||
if (sizeof(RealF) == 4)
|
if (sizeof(RealF) == 4)
|
||||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||||
if (sizeof(RealF) == 8)
|
if (sizeof(RealF) == 8)
|
||||||
@ -360,7 +387,8 @@ int main(int argc, char **argv)
|
|||||||
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
||||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
||||||
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
||||||
std::cout << GridLogMessage << "*********************************************************" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "*********************************************************" << std::endl;
|
||||||
{
|
{
|
||||||
Dw.ZeroCounters();
|
Dw.ZeroCounters();
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
@ -387,8 +415,10 @@ int main(int argc, char **argv)
|
|||||||
double flops = (single_site_flops * volume * ncall) / 2.0;
|
double flops = (single_site_flops * volume * ncall) / 2.0;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Deo mflop/s = " << flops / (t1 - t0) << std::endl;
|
std::cout << GridLogMessage << "Deo mflop/s = " << flops / (t1 - t0) << std::endl;
|
||||||
std::cout << GridLogMessage << "Deo mflop/s per rank " << flops / (t1 - t0) / NP << std::endl;
|
std::cout << GridLogMessage << "Deo mflop/s per rank " << flops / (t1 - t0) / NP
|
||||||
std::cout << GridLogMessage << "Deo mflop/s per node " << flops / (t1 - t0) / NN << std::endl;
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "Deo mflop/s per node " << flops / (t1 - t0) / NN
|
||||||
|
<< std::endl;
|
||||||
Dw.Report();
|
Dw.Report();
|
||||||
}
|
}
|
||||||
Dw.DhopEO(src_o, r_e, DaggerNo);
|
Dw.DhopEO(src_o, r_e, DaggerNo);
|
||||||
|
Loading…
Reference in New Issue
Block a user