more formatting
This commit is contained in:
parent
03badefec7
commit
503a993232
@ -44,8 +44,7 @@ struct time_statistics
|
|||||||
mean = sum / v.size();
|
mean = sum / v.size();
|
||||||
|
|
||||||
std::vector<double> diff(v.size());
|
std::vector<double> diff(v.size());
|
||||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x)
|
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
||||||
{ return x - mean; });
|
|
||||||
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
||||||
err = std::sqrt(sq_sum / (v.size() * (v.size() - 1)));
|
err = std::sqrt(sq_sum / (v.size() * (v.size() - 1)));
|
||||||
|
|
||||||
@ -64,11 +63,8 @@ void comms_header()
|
|||||||
<< "bytes\t MB/s uni (err/min/max) \t\t MB/s bidi (err/min/max)" << std::endl;
|
<< "bytes\t MB/s uni (err/min/max) \t\t MB/s bidi (err/min/max)" << std::endl;
|
||||||
};
|
};
|
||||||
|
|
||||||
Gamma::Algebra Gmu[] = {
|
Gamma::Algebra Gmu[] = {Gamma::Algebra::GammaX, Gamma::Algebra::GammaY,
|
||||||
Gamma::Algebra::GammaX,
|
Gamma::Algebra::GammaZ, Gamma::Algebra::GammaT};
|
||||||
Gamma::Algebra::GammaY,
|
|
||||||
Gamma::Algebra::GammaZ,
|
|
||||||
Gamma::Algebra::GammaT};
|
|
||||||
|
|
||||||
struct controls
|
struct controls
|
||||||
{
|
{
|
||||||
@ -84,19 +80,49 @@ class Benchmark
|
|||||||
{
|
{
|
||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "= Grid is setup to use " << threads << " threads" << std::endl;
|
<< "======================================================================="
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "= Grid is setup to use " << threads << " threads"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "Grid Default Decomposition patterns\n";
|
std::cout << GridLogMessage << "Grid Default Decomposition patterns\n";
|
||||||
std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads() << std::endl;
|
std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads()
|
||||||
std::cout << GridLogMessage << "\tMPI tasks : " << GridCmdVectorIntToString(GridDefaultMpi()) << std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "\tvReal : " << sizeof(vReal) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vReal::Nsimd())) << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "\tvRealF : " << sizeof(vRealF) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vRealF::Nsimd())) << std::endl;
|
<< "\tMPI tasks : " << GridCmdVectorIntToString(GridDefaultMpi())
|
||||||
std::cout << GridLogMessage << "\tvRealD : " << sizeof(vRealD) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vRealD::Nsimd())) << std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "\tvComplex : " << sizeof(vComplex) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplex::Nsimd())) << std::endl;
|
std::cout << GridLogMessage << "\tvReal : " << sizeof(vReal) * 8 << "bits ; "
|
||||||
std::cout << GridLogMessage << "\tvComplexF : " << sizeof(vComplexF) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexF::Nsimd())) << std::endl;
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vReal::Nsimd()))
|
||||||
std::cout << GridLogMessage << "\tvComplexD : " << sizeof(vComplexD) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd())) << std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage << "\tvRealF : " << sizeof(vRealF) * 8
|
||||||
|
<< "bits ; "
|
||||||
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vRealF::Nsimd()))
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tvRealD : " << sizeof(vRealD) * 8
|
||||||
|
<< "bits ; "
|
||||||
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vRealD::Nsimd()))
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tvComplex : " << sizeof(vComplex) * 8
|
||||||
|
<< "bits ; "
|
||||||
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplex::Nsimd()))
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tvComplexF : " << sizeof(vComplexF) * 8
|
||||||
|
<< "bits ; "
|
||||||
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplexF::Nsimd()))
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tvComplexD : " << sizeof(vComplexD) * 8
|
||||||
|
<< "bits ; "
|
||||||
|
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd()))
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void Comms(void)
|
static void Comms(void)
|
||||||
@ -115,18 +141,23 @@ class Benchmark
|
|||||||
std::vector<double> t_time(Nloop);
|
std::vector<double> t_time(Nloop);
|
||||||
time_statistics timestat;
|
time_statistics timestat;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in " << nmu << " dimensions" << std::endl;
|
<< "======================================================================="
|
||||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
"============================="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "
|
||||||
|
<< nmu << " dimensions" << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"============================="
|
||||||
|
<< std::endl;
|
||||||
comms_header();
|
comms_header();
|
||||||
|
|
||||||
for (int lat = 16; lat <= maxlat; lat += 8)
|
for (int lat = 16; lat <= maxlat; lat += 8)
|
||||||
{
|
{
|
||||||
int Ls = 12;
|
int Ls = 12;
|
||||||
|
|
||||||
Coordinate latt_size({lat * mpi_layout[0],
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||||
lat * mpi_layout[1],
|
|
||||||
lat * mpi_layout[2],
|
|
||||||
lat * mpi_layout[3]});
|
lat * mpi_layout[3]});
|
||||||
|
|
||||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
@ -173,8 +204,7 @@ class Benchmark
|
|||||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
}
|
}
|
||||||
Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||||
(void *)&rbuf[dir][0], recv_from_rank,
|
(void *)&rbuf[dir][0], recv_from_rank, bytes);
|
||||||
bytes);
|
|
||||||
dbytes += bytes;
|
dbytes += bytes;
|
||||||
|
|
||||||
double stop = usecond();
|
double stop = usecond();
|
||||||
@ -186,12 +216,14 @@ class Benchmark
|
|||||||
double xbytes = dbytes * 0.5;
|
double xbytes = dbytes * 0.5;
|
||||||
double bidibytes = dbytes;
|
double bidibytes = dbytes;
|
||||||
|
|
||||||
std::cout << GridLogMessage << lat << "\t" << Ls << "\t "
|
std::cout << GridLogMessage << lat << "\t" << Ls << "\t " << bytes << " \t "
|
||||||
<< bytes << " \t "
|
<< xbytes / timestat.mean << " \t "
|
||||||
<< xbytes / timestat.mean << " \t " << xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t "
|
<< xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t "
|
||||||
<< xbytes / timestat.max << " " << xbytes / timestat.min
|
<< xbytes / timestat.max << " " << xbytes / timestat.min << "\t\t"
|
||||||
<< "\t\t" << bidibytes / timestat.mean << " " << bidibytes * timestat.err / (timestat.mean * timestat.mean) << " "
|
<< bidibytes / timestat.mean << " "
|
||||||
<< bidibytes / timestat.max << " " << bidibytes / timestat.min << std::endl;
|
<< bidibytes * timestat.err / (timestat.mean * timestat.mean) << " "
|
||||||
|
<< bidibytes / timestat.max << " " << bidibytes / timestat.min
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int d = 0; d < 8; d++)
|
for (int d = 0; d < 8; d++)
|
||||||
@ -212,9 +244,15 @@ class Benchmark
|
|||||||
Coordinate simd_layout = GridDefaultSimd(Nd, vReal::Nsimd());
|
Coordinate simd_layout = GridDefaultSimd(Nd, vReal::Nsimd());
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "= Benchmarking a*x + y bandwidth" << std::endl;
|
std::cout << GridLogMessage << "= Benchmarking a*x + y bandwidth" << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << " L "
|
std::cout << GridLogMessage << " L "
|
||||||
<< "\t\t"
|
<< "\t\t"
|
||||||
<< "bytes"
|
<< "bytes"
|
||||||
@ -224,7 +262,9 @@ class Benchmark
|
|||||||
<< "Gflop/s"
|
<< "Gflop/s"
|
||||||
<< "\t\t seconds"
|
<< "\t\t seconds"
|
||||||
<< "\t\tGB/s / node" << std::endl;
|
<< "\t\tGB/s / node" << std::endl;
|
||||||
std::cout << GridLogMessage << "----------------------------------------------------------" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "----------------------------------------------------------"
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
// uint64_t NP;
|
// uint64_t NP;
|
||||||
uint64_t NN;
|
uint64_t NN;
|
||||||
@ -237,7 +277,8 @@ class Benchmark
|
|||||||
for (int lat = 8; lat <= lmax; lat += 8)
|
for (int lat = 8; lat <= lmax; lat += 8)
|
||||||
{
|
{
|
||||||
|
|
||||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]});
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||||
|
lat * mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
||||||
|
|
||||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
@ -268,9 +309,10 @@ class Benchmark
|
|||||||
|
|
||||||
double flops = vol * Nvec * 2; // mul,add
|
double flops = vol * Nvec * 2; // mul,add
|
||||||
double bytes = 3.0 * vol * Nvec * sizeof(Real);
|
double bytes = 3.0 * vol * Nvec * sizeof(Real);
|
||||||
std::cout << GridLogMessage << std::setprecision(3)
|
std::cout << GridLogMessage << std::setprecision(3) << lat << "\t\t" << bytes
|
||||||
<< lat << "\t\t" << bytes << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" << (stop - start) / 1000. / 1000.
|
<< " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t"
|
||||||
<< "\t\t" << bytes / time / NN << std::endl;
|
<< (stop - start) / 1000. / 1000. << "\t\t" << bytes / time / NN
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -282,9 +324,15 @@ class Benchmark
|
|||||||
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexF::Nsimd());
|
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexF::Nsimd());
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "= Benchmarking z = y*x SU(4) bandwidth" << std::endl;
|
std::cout << GridLogMessage << "= Benchmarking z = y*x SU(4) bandwidth" << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << " L "
|
std::cout << GridLogMessage << " L "
|
||||||
<< "\t\t"
|
<< "\t\t"
|
||||||
<< "bytes"
|
<< "bytes"
|
||||||
@ -294,7 +342,9 @@ class Benchmark
|
|||||||
<< "Gflop/s"
|
<< "Gflop/s"
|
||||||
<< "\t\t seconds"
|
<< "\t\t seconds"
|
||||||
<< "\t\tGB/s / node" << std::endl;
|
<< "\t\tGB/s / node" << std::endl;
|
||||||
std::cout << GridLogMessage << "----------------------------------------------------------" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "----------------------------------------------------------"
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
uint64_t NN;
|
uint64_t NN;
|
||||||
|
|
||||||
@ -305,7 +355,8 @@ class Benchmark
|
|||||||
for (int lat = 8; lat <= lmax; lat += 8)
|
for (int lat = 8; lat <= lmax; lat += 8)
|
||||||
{
|
{
|
||||||
|
|
||||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]});
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||||
|
lat * mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
||||||
|
|
||||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
@ -332,9 +383,10 @@ class Benchmark
|
|||||||
|
|
||||||
double flops = vol * Nc4 * Nc4 * (6 + (Nc4 - 1) * 8); // mul,add
|
double flops = vol * Nc4 * Nc4 * (6 + (Nc4 - 1) * 8); // mul,add
|
||||||
double bytes = 3.0 * vol * Nc4 * Nc4 * 2 * sizeof(RealF);
|
double bytes = 3.0 * vol * Nc4 * Nc4 * 2 * sizeof(RealF);
|
||||||
std::cout << GridLogMessage << std::setprecision(3)
|
std::cout << GridLogMessage << std::setprecision(3) << lat << "\t\t" << bytes
|
||||||
<< lat << "\t\t" << bytes << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" << (stop - start) / 1000. / 1000.
|
<< " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t"
|
||||||
<< "\t\t" << bytes / time / NN << std::endl;
|
<< (stop - start) / 1000. / 1000. << "\t\t" << bytes / time / NN
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -355,31 +407,41 @@ class Benchmark
|
|||||||
Coordinate mpi = GridDefaultMpi();
|
Coordinate mpi = GridDefaultMpi();
|
||||||
assert(mpi.size() == 4);
|
assert(mpi.size() == 4);
|
||||||
Coordinate local({L, L, L, L});
|
Coordinate local({L, L, L, L});
|
||||||
Coordinate latt4({local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
Coordinate latt4(
|
||||||
|
{local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
||||||
|
|
||||||
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(latt4,
|
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||||
GridDefaultSimd(Nd, vComplex::Nsimd()),
|
latt4, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
|
||||||
GridDefaultMpi());
|
|
||||||
uint64_t NP = TmpGrid->RankCount();
|
uint64_t NP = TmpGrid->RankCount();
|
||||||
uint64_t NN = TmpGrid->NodeCount();
|
uint64_t NN = TmpGrid->NodeCount();
|
||||||
NN_global = NN;
|
NN_global = NN;
|
||||||
uint64_t SHM = NP / NN;
|
uint64_t SHM = NP / NN;
|
||||||
|
|
||||||
///////// Welcome message ////////////
|
///////// Welcome message ////////////
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "Benchmark DWF on " << L << "^4 local volume " << std::endl;
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "Benchmark DWF on " << L << "^4 local volume "
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "* Nc : " << Nc << std::endl;
|
std::cout << GridLogMessage << "* Nc : " << Nc << std::endl;
|
||||||
std::cout << GridLogMessage << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
||||||
std::cout << GridLogMessage << "* Ls : " << Ls << std::endl;
|
std::cout << GridLogMessage << "* Ls : " << Ls << std::endl;
|
||||||
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
||||||
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
||||||
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
||||||
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) << std::endl;
|
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi)
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
///////// Lattice Init ////////////
|
///////// Lattice Init ////////////
|
||||||
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||||
|
latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||||
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
||||||
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
||||||
@ -418,10 +480,14 @@ class Benchmark
|
|||||||
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
||||||
|
|
||||||
controls Cases[] = {
|
controls Cases[] = {
|
||||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute,
|
||||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicySequential},
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute,
|
||||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicySequential}};
|
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||||
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute,
|
||||||
|
CartesianCommunicator::CommunicatorPolicySequential},
|
||||||
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute,
|
||||||
|
CartesianCommunicator::CommunicatorPolicySequential}};
|
||||||
|
|
||||||
for (int c = 0; c < num_cases; c++)
|
for (int c = 0; c < num_cases; c++)
|
||||||
{
|
{
|
||||||
@ -430,7 +496,10 @@ class Benchmark
|
|||||||
WilsonKernelsStatic::Opt = Cases[c].Opt;
|
WilsonKernelsStatic::Opt = Cases[c].Opt;
|
||||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "==================================================================="
|
||||||
|
"==============="
|
||||||
|
<< std::endl;
|
||||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric)
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric)
|
||||||
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl;
|
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl;
|
||||||
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute)
|
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute)
|
||||||
@ -438,7 +507,10 @@ class Benchmark
|
|||||||
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute)
|
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute)
|
||||||
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
||||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "==================================================================="
|
||||||
|
"==============="
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
int nwarm = 10;
|
int nwarm = 10;
|
||||||
double t0 = usecond();
|
double t0 = usecond();
|
||||||
@ -453,7 +525,8 @@ class Benchmark
|
|||||||
|
|
||||||
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
||||||
|
|
||||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per
|
||||||
|
//second"<<std::endl;
|
||||||
Dw.ZeroCounters();
|
Dw.ZeroCounters();
|
||||||
|
|
||||||
time_statistics timestat;
|
time_statistics timestat;
|
||||||
@ -478,7 +551,8 @@ class Benchmark
|
|||||||
#if 0
|
#if 0
|
||||||
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
|
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
|
||||||
#else
|
#else
|
||||||
double fps = Nc * (6 + (Nc - 1) * 8) * Ns * Nd + 2 * Nd * Nc * Ns + 2 * Nd * Nc * Ns * 2;
|
double fps =
|
||||||
|
Nc * (6 + (Nc - 1) * 8) * Ns * Nd + 2 * Nd * Nc * Ns + 2 * Nd * Nc * Ns * 2;
|
||||||
#endif
|
#endif
|
||||||
double flops = (fps * volume) / 2;
|
double flops = (fps * volume) / 2;
|
||||||
double mf_hi, mf_lo, mf_err;
|
double mf_hi, mf_lo, mf_err;
|
||||||
@ -500,14 +574,25 @@ class Benchmark
|
|||||||
mflops_worst = mflops;
|
mflops_worst = mflops;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Deo FlopsPerSite is " << fps << std::endl;
|
std::cout << GridLogMessage << "Deo FlopsPerSite is " << fps << std::endl;
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo << "-" << mf_hi << std::endl;
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per rank " << mflops / NP << std::endl;
|
<< "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per node " << mflops / NN << std::endl;
|
<< "-" << mf_hi << std::endl;
|
||||||
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||||
|
<< "Deo mflop/s per rank " << mflops / NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||||
|
<< "Deo mflop/s per node " << mflops / NN << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << L << "^4 x " << Ls << " Deo Best mflop/s = " << mflops_best << " ; " << mflops_best / NN << " per node " << std::endl;
|
<< "====================================================================="
|
||||||
std::cout << GridLogMessage << L << "^4 x " << Ls << " Deo Worst mflop/s = " << mflops_worst << " ; " << mflops_worst / NN << " per node " << std::endl;
|
"============="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << L << "^4 x " << Ls
|
||||||
|
<< " Deo Best mflop/s = " << mflops_best << " ; "
|
||||||
|
<< mflops_best / NN << " per node " << std::endl;
|
||||||
|
std::cout << GridLogMessage << L << "^4 x " << Ls
|
||||||
|
<< " Deo Worst mflop/s = " << mflops_worst << " ; "
|
||||||
|
<< mflops_worst / NN << " per node " << std::endl;
|
||||||
std::cout << GridLogMessage << fmt << std::endl;
|
std::cout << GridLogMessage << fmt << std::endl;
|
||||||
std::cout << GridLogMessage;
|
std::cout << GridLogMessage;
|
||||||
|
|
||||||
@ -516,7 +601,10 @@ class Benchmark
|
|||||||
std::cout << mflops_all[i] / NN << " ; ";
|
std::cout << mflops_all[i] / NN << " ; ";
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "====================================================================="
|
||||||
|
"============="
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
return mflops_best;
|
return mflops_best;
|
||||||
}
|
}
|
||||||
@ -535,29 +623,39 @@ class Benchmark
|
|||||||
Coordinate mpi = GridDefaultMpi();
|
Coordinate mpi = GridDefaultMpi();
|
||||||
assert(mpi.size() == 4);
|
assert(mpi.size() == 4);
|
||||||
Coordinate local({L, L, L, L});
|
Coordinate local({L, L, L, L});
|
||||||
Coordinate latt4({local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
Coordinate latt4(
|
||||||
|
{local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
||||||
|
|
||||||
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(latt4,
|
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||||
GridDefaultSimd(Nd, vComplex::Nsimd()),
|
latt4, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
|
||||||
GridDefaultMpi());
|
|
||||||
uint64_t NP = TmpGrid->RankCount();
|
uint64_t NP = TmpGrid->RankCount();
|
||||||
uint64_t NN = TmpGrid->NodeCount();
|
uint64_t NN = TmpGrid->NodeCount();
|
||||||
NN_global = NN;
|
NN_global = NN;
|
||||||
uint64_t SHM = NP / NN;
|
uint64_t SHM = NP / NN;
|
||||||
|
|
||||||
///////// Welcome message ////////////
|
///////// Welcome message ////////////
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "Benchmark ImprovedStaggered on " << L << "^4 local volume " << std::endl;
|
<< "======================================================================="
|
||||||
std::cout << GridLogMessage << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "Benchmark ImprovedStaggered on " << L
|
||||||
|
<< "^4 local volume " << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
||||||
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
||||||
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
||||||
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
||||||
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) << std::endl;
|
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi)
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
///////// Lattice Init ////////////
|
///////// Lattice Init ////////////
|
||||||
GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||||
|
latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||||
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
|
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
|
||||||
|
|
||||||
///////// RNG Init ////////////
|
///////// RNG Init ////////////
|
||||||
@ -599,10 +697,14 @@ class Benchmark
|
|||||||
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
||||||
|
|
||||||
controls Cases[] = {
|
controls Cases[] = {
|
||||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute,
|
||||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicySequential},
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute,
|
||||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicySequential}};
|
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||||
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute,
|
||||||
|
CartesianCommunicator::CommunicatorPolicySequential},
|
||||||
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute,
|
||||||
|
CartesianCommunicator::CommunicatorPolicySequential}};
|
||||||
|
|
||||||
for (int c = 0; c < num_cases; c++)
|
for (int c = 0; c < num_cases; c++)
|
||||||
{
|
{
|
||||||
@ -611,15 +713,22 @@ class Benchmark
|
|||||||
StaggeredKernelsStatic::Opt = Cases[c].Opt;
|
StaggeredKernelsStatic::Opt = Cases[c].Opt;
|
||||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "==================================================================="
|
||||||
|
"==============="
|
||||||
|
<< std::endl;
|
||||||
if (StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric)
|
if (StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric)
|
||||||
std::cout << GridLogMessage << "* Using GENERIC Nc StaggeredKernels" << std::endl;
|
std::cout << GridLogMessage << "* Using GENERIC Nc StaggeredKernels"
|
||||||
|
<< std::endl;
|
||||||
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute)
|
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute)
|
||||||
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
||||||
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsThenCompute)
|
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsThenCompute)
|
||||||
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
||||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "==================================================================="
|
||||||
|
"==============="
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
int nwarm = 10;
|
int nwarm = 10;
|
||||||
double t0 = usecond();
|
double t0 = usecond();
|
||||||
@ -634,7 +743,8 @@ class Benchmark
|
|||||||
|
|
||||||
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
||||||
|
|
||||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per
|
||||||
|
//second"<<std::endl;
|
||||||
Ds.ZeroCounters();
|
Ds.ZeroCounters();
|
||||||
|
|
||||||
time_statistics timestat;
|
time_statistics timestat;
|
||||||
@ -670,14 +780,25 @@ class Benchmark
|
|||||||
if (mflops < mflops_worst)
|
if (mflops < mflops_worst)
|
||||||
mflops_worst = mflops;
|
mflops_worst = mflops;
|
||||||
|
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo << "-" << mf_hi << std::endl;
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per rank " << mflops / NP << std::endl;
|
<< "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per node " << mflops / NN << std::endl;
|
<< "-" << mf_hi << std::endl;
|
||||||
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||||
|
<< "Deo mflop/s per rank " << mflops / NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||||
|
<< "Deo mflop/s per node " << mflops / NN << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << L << "^4 Deo Best mflop/s = " << mflops_best << " ; " << mflops_best / NN << " per node " << std::endl;
|
<< "====================================================================="
|
||||||
std::cout << GridLogMessage << L << "^4 Deo Worst mflop/s = " << mflops_worst << " ; " << mflops_worst / NN << " per node " << std::endl;
|
"============="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << L
|
||||||
|
<< "^4 Deo Best mflop/s = " << mflops_best << " ; "
|
||||||
|
<< mflops_best / NN << " per node " << std::endl;
|
||||||
|
std::cout << GridLogMessage << L
|
||||||
|
<< "^4 Deo Worst mflop/s = " << mflops_worst << " ; "
|
||||||
|
<< mflops_worst / NN << " per node " << std::endl;
|
||||||
std::cout << GridLogMessage << fmt << std::endl;
|
std::cout << GridLogMessage << fmt << std::endl;
|
||||||
std::cout << GridLogMessage;
|
std::cout << GridLogMessage;
|
||||||
|
|
||||||
@ -687,7 +808,10 @@ class Benchmark
|
|||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
return mflops_best;
|
return mflops_best;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -696,7 +820,8 @@ int main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
Grid_init(&argc, &argv);
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
|
CartesianCommunicator::SetCommunicatorPolicy(
|
||||||
|
CartesianCommunicator::CommunicatorPolicySequential);
|
||||||
#ifdef KNL
|
#ifdef KNL
|
||||||
LebesgueOrder::Block = std::vector<int>({8, 2, 2, 2});
|
LebesgueOrder::Block = std::vector<int>({8, 2, 2, 2});
|
||||||
#else
|
#else
|
||||||
@ -717,83 +842,148 @@ int main(int argc, char **argv)
|
|||||||
std::vector<double> staggered;
|
std::vector<double> staggered;
|
||||||
|
|
||||||
int Ls = 1;
|
int Ls = 1;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
|
std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
{
|
{
|
||||||
wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
|
wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ls = 12;
|
Ls = 12;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
|
std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
{
|
{
|
||||||
double result = Benchmark::DWF(Ls, L_list[l]);
|
double result = Benchmark::DWF(Ls, L_list[l]);
|
||||||
dwf4.push_back(result);
|
dwf4.push_back(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" << std::endl;
|
std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
{
|
{
|
||||||
double result = Benchmark::Staggered(L_list[l]);
|
double result = Benchmark::Staggered(L_list[l]);
|
||||||
staggered.push_back(result);
|
staggered.push_back(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
|
std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl;
|
std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl;
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
{
|
{
|
||||||
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t " << dwf4[l] << " \t\t " << staggered[l] << std::endl;
|
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t "
|
||||||
|
<< dwf4[l] << " \t\t " << staggered[l] << std::endl;
|
||||||
}
|
}
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
int NN = NN_global;
|
int NN = NN_global;
|
||||||
if (do_memory)
|
if (do_memory)
|
||||||
{
|
{
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << " Memory benchmark " << std::endl;
|
std::cout << GridLogMessage << " Memory benchmark " << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
Benchmark::Memory();
|
Benchmark::Memory();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (do_su4)
|
if (do_su4)
|
||||||
{
|
{
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << " SU(4) benchmark " << std::endl;
|
std::cout << GridLogMessage << " SU(4) benchmark " << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
Benchmark::SU4();
|
Benchmark::SU4();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (do_comms)
|
if (do_comms)
|
||||||
{
|
{
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << " Communications benchmark " << std::endl;
|
std::cout << GridLogMessage << " Communications benchmark " << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "======================================================================="
|
||||||
|
"==========="
|
||||||
|
<< std::endl;
|
||||||
Benchmark::Comms();
|
Benchmark::Comms();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
|
std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl;
|
std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl;
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
{
|
{
|
||||||
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t " << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
|
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t "
|
||||||
|
<< dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
|
||||||
}
|
}
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN << " Mflop/s per node" << std::endl;
|
<< "========================================================================="
|
||||||
std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+" << dwf4[selm1] / NN << ") " << std::endl;
|
"========="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN
|
||||||
|
<< " Mflop/s per node" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+"
|
||||||
|
<< dwf4[selm1] / NN << ") " << std::endl;
|
||||||
std::cout << std::setprecision(3);
|
std::cout << std::setprecision(3);
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"========="
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -36,8 +36,7 @@ namespace Grid
|
|||||||
|
|
||||||
// AP 06/10/2020: Standard C version in case one is suspicious of the C++ API
|
// AP 06/10/2020: Standard C version in case one is suspicious of the C++ API
|
||||||
//
|
//
|
||||||
// template <typename Field>
|
// template <typename Field> void stdWrite(const std::string filestem, Field &vec)
|
||||||
// void stdWrite(const std::string filestem, Field &vec)
|
|
||||||
// {
|
// {
|
||||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb");
|
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb");
|
||||||
@ -45,7 +44,7 @@ namespace Grid
|
|||||||
// uint32_t crc;
|
// uint32_t crc;
|
||||||
// GridStopWatch ioWatch, crcWatch;
|
// GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
// size = vec.Grid()->lSites() * sizeof(typename Field::scalar_object);
|
||||||
// autoView(vec_v, vec, CpuRead);
|
// autoView(vec_v, vec, CpuRead);
|
||||||
// crcWatch.Start();
|
// crcWatch.Start();
|
||||||
// crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
// crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||||
@ -53,21 +52,23 @@ namespace Grid
|
|||||||
// crcWatch.Stop();
|
// crcWatch.Stop();
|
||||||
// MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
// MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
||||||
// ioWatch.Start();
|
// ioWatch.Start();
|
||||||
// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object),
|
||||||
|
// vec.Grid()->lSites(), file);
|
||||||
// ioWatch.Stop();
|
// ioWatch.Stop();
|
||||||
// std::fclose(file);
|
// std::fclose(file);
|
||||||
// size *= vec.Grid()->ProcessorCount();
|
// size *= vec.Grid()->ProcessorCount();
|
||||||
// auto &p = BinaryIO::lastPerf;
|
// auto &p = BinaryIO::lastPerf;
|
||||||
// p.size = size;
|
// p.size = size;
|
||||||
// p.time = ioWatch.useconds();
|
// p.time = ioWatch.useconds();
|
||||||
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
// p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||||
// MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
// MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||||
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
// << ",
|
||||||
|
// "
|
||||||
|
// << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
// MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
// MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
// }
|
// }
|
||||||
//
|
|
||||||
// template <typename Field>
|
// template <typename Field> void stdRead(Field &vec, const std::string filestem)
|
||||||
// void stdRead(Field &vec, const std::string filestem)
|
|
||||||
// {
|
// {
|
||||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb");
|
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb");
|
||||||
@ -75,14 +76,15 @@ namespace Grid
|
|||||||
// uint32_t crcRead, crcData;
|
// uint32_t crcRead, crcData;
|
||||||
// GridStopWatch ioWatch, crcWatch;
|
// GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
// size = vec.Grid()->lSites() * sizeof(typename Field::scalar_object);
|
||||||
// crcWatch.Start();
|
// crcWatch.Start();
|
||||||
// std::fread(&crcRead, sizeof(uint32_t), 1, file);
|
// std::fread(&crcRead, sizeof(uint32_t), 1, file);
|
||||||
// crcWatch.Stop();
|
// crcWatch.Stop();
|
||||||
// {
|
// {
|
||||||
// autoView(vec_v, vec, CpuWrite);
|
// autoView(vec_v, vec, CpuWrite);
|
||||||
// ioWatch.Start();
|
// ioWatch.Start();
|
||||||
// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object),
|
||||||
|
// vec.Grid()->lSites(), file);
|
||||||
// ioWatch.Stop();
|
// ioWatch.Stop();
|
||||||
// std::fclose(file);
|
// std::fclose(file);
|
||||||
// }
|
// }
|
||||||
@ -98,17 +100,17 @@ namespace Grid
|
|||||||
// auto &p = BinaryIO::lastPerf;
|
// auto &p = BinaryIO::lastPerf;
|
||||||
// p.size = size;
|
// p.size = size;
|
||||||
// p.time = ioWatch.useconds();
|
// p.time = ioWatch.useconds();
|
||||||
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
// p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||||
// MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
// MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() << ", "
|
||||||
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
// << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
// MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
// MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
template <typename Field>
|
template <typename Field> void stdWrite(const std::string filestem, Field &vec)
|
||||||
void stdWrite(const std::string filestem, Field &vec)
|
|
||||||
{
|
{
|
||||||
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
std::ofstream file(filestem + "." + rankStr + ".bin", std::ios::out | std::ios::binary);
|
std::ofstream file(filestem + "." + rankStr + ".bin",
|
||||||
|
std::ios::out | std::ios::binary);
|
||||||
size_t size, sizec;
|
size_t size, sizec;
|
||||||
uint32_t crc;
|
uint32_t crc;
|
||||||
GridStopWatch ioWatch, crcWatch;
|
GridStopWatch ioWatch, crcWatch;
|
||||||
@ -130,16 +132,16 @@ namespace Grid
|
|||||||
p.size = size;
|
p.size = size;
|
||||||
p.time = ioWatch.useconds();
|
p.time = ioWatch.useconds();
|
||||||
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||||
MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed() << ", "
|
||||||
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
<< p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Field>
|
template <typename Field> void stdRead(Field &vec, const std::string filestem)
|
||||||
void stdRead(Field &vec, const std::string filestem)
|
|
||||||
{
|
{
|
||||||
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
std::ifstream file(filestem + "." + rankStr + ".bin", std::ios::in | std::ios::binary);
|
std::ifstream file(filestem + "." + rankStr + ".bin",
|
||||||
|
std::ios::in | std::ios::binary);
|
||||||
size_t size, sizec;
|
size_t size, sizec;
|
||||||
uint32_t crcRead, crcData;
|
uint32_t crcRead, crcData;
|
||||||
GridStopWatch ioWatch, crcWatch;
|
GridStopWatch ioWatch, crcWatch;
|
||||||
@ -168,13 +170,12 @@ namespace Grid
|
|||||||
p.size = size;
|
p.size = size;
|
||||||
p.time = ioWatch.useconds();
|
p.time = ioWatch.useconds();
|
||||||
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||||
MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() << ", "
|
||||||
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
<< p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Field>
|
template <typename Field> void limeWrite(const std::string filestem, Field &vec)
|
||||||
void limeWrite(const std::string filestem, Field &vec)
|
|
||||||
{
|
{
|
||||||
emptyUserRecord record;
|
emptyUserRecord record;
|
||||||
ScidacWriter binWriter(vec.Grid()->IsBoss());
|
ScidacWriter binWriter(vec.Grid()->IsBoss());
|
||||||
@ -184,8 +185,7 @@ namespace Grid
|
|||||||
binWriter.close();
|
binWriter.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Field>
|
template <typename Field> void limeRead(Field &vec, const std::string filestem)
|
||||||
void limeRead(Field &vec, const std::string filestem)
|
|
||||||
{
|
{
|
||||||
emptyUserRecord record;
|
emptyUserRecord record;
|
||||||
ScidacReader binReader;
|
ScidacReader binReader;
|
||||||
@ -225,12 +225,13 @@ namespace Grid
|
|||||||
|
|
||||||
template <typename Field>
|
template <typename Field>
|
||||||
void writeBenchmark(const Coordinate &latt, const std::string filename,
|
void writeBenchmark(const Coordinate &latt, const std::string filename,
|
||||||
const WriterFn<Field> &write,
|
const WriterFn<Field> &write, const unsigned int Ls = 1,
|
||||||
const unsigned int Ls = 1, const bool rb = false)
|
const bool rb = false)
|
||||||
{
|
{
|
||||||
auto mpi = GridDefaultMpi();
|
auto mpi = GridDefaultMpi();
|
||||||
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
||||||
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
std::shared_ptr<GridCartesian> gBasePt(
|
||||||
|
SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||||
std::shared_ptr<GridBase> gPt;
|
std::shared_ptr<GridBase> gPt;
|
||||||
std::random_device rd;
|
std::random_device rd;
|
||||||
|
|
||||||
@ -251,12 +252,13 @@ namespace Grid
|
|||||||
|
|
||||||
template <typename Field>
|
template <typename Field>
|
||||||
void readBenchmark(const Coordinate &latt, const std::string filename,
|
void readBenchmark(const Coordinate &latt, const std::string filename,
|
||||||
const ReaderFn<Field> &read,
|
const ReaderFn<Field> &read, const unsigned int Ls = 1,
|
||||||
const unsigned int Ls = 1, const bool rb = false)
|
const bool rb = false)
|
||||||
{
|
{
|
||||||
auto mpi = GridDefaultMpi();
|
auto mpi = GridDefaultMpi();
|
||||||
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
||||||
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
std::shared_ptr<GridCartesian> gBasePt(
|
||||||
|
SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||||
std::shared_ptr<GridBase> gPt;
|
std::shared_ptr<GridBase> gPt;
|
||||||
|
|
||||||
makeGrid(gPt, gBasePt, Ls, rb);
|
makeGrid(gPt, gBasePt, Ls, rb);
|
||||||
|
Loading…
Reference in New Issue
Block a user