Merge remote-tracking branch 'upstream/main' into main
This commit is contained in:
commit
5f976b361c
@ -6,7 +6,7 @@
|
||||
BreakBeforeBraces: Allman,
|
||||
AllowShortIfStatementsOnASingleLine: false,
|
||||
IndentCaseLabels: false,
|
||||
ColumnLimit: 0,
|
||||
ColumnLimit: 90,
|
||||
AccessModifierOffset: -4,
|
||||
NamespaceIndentation: All,
|
||||
FixNamespaceComments: false,
|
||||
|
@ -18,6 +18,7 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "Common.hpp"
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace Grid;
|
||||
@ -44,8 +45,7 @@ struct time_statistics
|
||||
mean = sum / v.size();
|
||||
|
||||
std::vector<double> diff(v.size());
|
||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x)
|
||||
{ return x - mean; });
|
||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
||||
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
||||
err = std::sqrt(sq_sum / (v.size() * (v.size() - 1)));
|
||||
|
||||
@ -64,11 +64,8 @@ void comms_header()
|
||||
<< "bytes\t MB/s uni (err/min/max) \t\t MB/s bidi (err/min/max)" << std::endl;
|
||||
};
|
||||
|
||||
Gamma::Algebra Gmu[] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT};
|
||||
Gamma::Algebra Gmu[] = {Gamma::Algebra::GammaX, Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ, Gamma::Algebra::GammaT};
|
||||
|
||||
struct controls
|
||||
{
|
||||
@ -84,26 +81,52 @@ class Benchmark
|
||||
{
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << "= Grid is setup to use " << threads << " threads" << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "= Grid is setup to use " << threads << " threads"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "Grid Default Decomposition patterns\n";
|
||||
std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads() << std::endl;
|
||||
std::cout << GridLogMessage << "\tMPI tasks : " << GridCmdVectorIntToString(GridDefaultMpi()) << std::endl;
|
||||
std::cout << GridLogMessage << "\tvReal : " << sizeof(vReal) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vReal::Nsimd())) << std::endl;
|
||||
std::cout << GridLogMessage << "\tvRealF : " << sizeof(vRealF) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vRealF::Nsimd())) << std::endl;
|
||||
std::cout << GridLogMessage << "\tvRealD : " << sizeof(vRealD) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vRealD::Nsimd())) << std::endl;
|
||||
std::cout << GridLogMessage << "\tvComplex : " << sizeof(vComplex) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplex::Nsimd())) << std::endl;
|
||||
std::cout << GridLogMessage << "\tvComplexF : " << sizeof(vComplexF) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexF::Nsimd())) << std::endl;
|
||||
std::cout << GridLogMessage << "\tvComplexD : " << sizeof(vComplexD) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd())) << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads()
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "\tMPI tasks : " << GridCmdVectorIntToString(GridDefaultMpi())
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "\tvReal : " << sizeof(vReal) * 8 << "bits ; "
|
||||
<< GridCmdVectorIntToString(GridDefaultSimd(4, vReal::Nsimd()))
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "\tvRealF : " << sizeof(vRealF) * 8
|
||||
<< "bits ; "
|
||||
<< GridCmdVectorIntToString(GridDefaultSimd(4, vRealF::Nsimd()))
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "\tvRealD : " << sizeof(vRealD) * 8
|
||||
<< "bits ; "
|
||||
<< GridCmdVectorIntToString(GridDefaultSimd(4, vRealD::Nsimd()))
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "\tvComplex : " << sizeof(vComplex) * 8
|
||||
<< "bits ; "
|
||||
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplex::Nsimd()))
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "\tvComplexF : " << sizeof(vComplexF) * 8
|
||||
<< "bits ; "
|
||||
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplexF::Nsimd()))
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "\tvComplexD : " << sizeof(vComplexD) * 8
|
||||
<< "bits ; "
|
||||
<< GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd()))
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
static void Comms(void)
|
||||
{
|
||||
int Nloop = 200;
|
||||
int nmu = 0;
|
||||
int maxlat = 32;
|
||||
int maxlat = 48;
|
||||
|
||||
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
|
||||
Coordinate mpi_layout = GridDefaultMpi();
|
||||
@ -115,94 +138,89 @@ class Benchmark
|
||||
std::vector<double> t_time(Nloop);
|
||||
time_statistics timestat;
|
||||
|
||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in " << nmu << " dimensions" << std::endl;
|
||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
||||
comms_header();
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"============================="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "
|
||||
<< nmu << " dimensions" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"============================="
|
||||
<< std::endl;
|
||||
grid_printf("%5s %5s %15s %15s %15s %15s %15s\n", "L", "dir", "payload (B)",
|
||||
"time (usec)", "rate (GB/s)", "std dev", "max");
|
||||
|
||||
for (int lat = 16; lat <= maxlat; lat += 8)
|
||||
{
|
||||
// for(int Ls=8;Ls<=8;Ls*=2){
|
||||
int Ls = 12;
|
||||
|
||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||
lat * mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank / Nnode;
|
||||
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD);
|
||||
for (int d = 0; d < 8; d++)
|
||||
{
|
||||
int Ls = 12;
|
||||
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||
}
|
||||
|
||||
Coordinate latt_size({lat * mpi_layout[0],
|
||||
lat * mpi_layout[1],
|
||||
lat * mpi_layout[2],
|
||||
lat * mpi_layout[3]});
|
||||
double dbytes;
|
||||
|
||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank / Nnode;
|
||||
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
// Grid.ShmBufferFreeAll();
|
||||
uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD);
|
||||
for (int d = 0; d < 8; d++)
|
||||
for (int dir = 0; dir < 8; dir++)
|
||||
{
|
||||
int mu = dir % 4;
|
||||
if (mpi_layout[mu] > 1)
|
||||
{
|
||||
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
}
|
||||
|
||||
// int ncomm;
|
||||
double dbytes;
|
||||
|
||||
for (int dir = 0; dir < 8; dir++)
|
||||
{
|
||||
int mu = dir % 4;
|
||||
if (mpi_layout[mu] > 1)
|
||||
std::vector<double> times(Nloop);
|
||||
for (int i = 0; i < Nloop; i++)
|
||||
{
|
||||
|
||||
std::vector<double> times(Nloop);
|
||||
for (int i = 0; i < Nloop; i++)
|
||||
dbytes = 0;
|
||||
double start = usecond();
|
||||
int xmit_to_rank;
|
||||
int recv_from_rank;
|
||||
|
||||
if (dir == mu)
|
||||
{
|
||||
|
||||
dbytes = 0;
|
||||
double start = usecond();
|
||||
int xmit_to_rank;
|
||||
int recv_from_rank;
|
||||
|
||||
if (dir == mu)
|
||||
{
|
||||
int comm_proc = 1;
|
||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||
}
|
||||
else
|
||||
{
|
||||
int comm_proc = mpi_layout[mu] - 1;
|
||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||
}
|
||||
Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||
(void *)&rbuf[dir][0], recv_from_rank,
|
||||
bytes);
|
||||
dbytes += bytes;
|
||||
|
||||
double stop = usecond();
|
||||
t_time[i] = stop - start; // microseconds
|
||||
int comm_proc = 1;
|
||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||
}
|
||||
timestat.statistics(t_time);
|
||||
else
|
||||
{
|
||||
int comm_proc = mpi_layout[mu] - 1;
|
||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||
}
|
||||
Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||
(void *)&rbuf[dir][0], recv_from_rank, bytes);
|
||||
dbytes += bytes;
|
||||
|
||||
dbytes = dbytes * ppn;
|
||||
double xbytes = dbytes * 0.5;
|
||||
double bidibytes = dbytes;
|
||||
|
||||
std::cout << GridLogMessage << lat << "\t" << Ls << "\t "
|
||||
<< bytes << " \t "
|
||||
<< xbytes / timestat.mean << " \t " << xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t "
|
||||
<< xbytes / timestat.max << " " << xbytes / timestat.min
|
||||
<< "\t\t" << bidibytes / timestat.mean << " " << bidibytes * timestat.err / (timestat.mean * timestat.mean) << " "
|
||||
<< bidibytes / timestat.max << " " << bidibytes / timestat.min << std::endl;
|
||||
double stop = usecond();
|
||||
t_time[i] = stop - start; // microseconds
|
||||
}
|
||||
timestat.statistics(t_time);
|
||||
|
||||
dbytes = dbytes * ppn;
|
||||
double bidibytes = 2. * dbytes;
|
||||
double rate = bidibytes / (timestat.mean / 1.e6) / 1024. / 1024. / 1024.;
|
||||
double rate_err = rate * timestat.err / timestat.mean;
|
||||
double rate_max = rate * timestat.mean / timestat.min;
|
||||
grid_printf("%5d %5d %15d %15.2f %15.2f %15.1f %15.2f\n", lat, dir, bytes,
|
||||
timestat.mean, rate, rate_err, rate_max);
|
||||
}
|
||||
for (int d = 0; d < 8; d++)
|
||||
{
|
||||
acceleratorFreeDevice(xbuf[d]);
|
||||
acceleratorFreeDevice(rbuf[d]);
|
||||
}
|
||||
}
|
||||
for (int d = 0; d < 8; d++)
|
||||
{
|
||||
acceleratorFreeDevice(xbuf[d]);
|
||||
acceleratorFreeDevice(rbuf[d]);
|
||||
}
|
||||
}
|
||||
return;
|
||||
@ -217,9 +235,15 @@ class Benchmark
|
||||
Coordinate simd_layout = GridDefaultSimd(Nd, vReal::Nsimd());
|
||||
Coordinate mpi_layout = GridDefaultMpi();
|
||||
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "= Benchmarking a*x + y bandwidth" << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << " L "
|
||||
<< "\t\t"
|
||||
<< "bytes"
|
||||
@ -229,7 +253,9 @@ class Benchmark
|
||||
<< "Gflop/s"
|
||||
<< "\t\t seconds"
|
||||
<< "\t\tGB/s / node" << std::endl;
|
||||
std::cout << GridLogMessage << "----------------------------------------------------------" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "----------------------------------------------------------"
|
||||
<< std::endl;
|
||||
|
||||
// uint64_t NP;
|
||||
uint64_t NN;
|
||||
@ -242,7 +268,8 @@ class Benchmark
|
||||
for (int lat = 8; lat <= lmax; lat += 8)
|
||||
{
|
||||
|
||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]});
|
||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||
lat * mpi_layout[3]});
|
||||
int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||
@ -273,9 +300,10 @@ class Benchmark
|
||||
|
||||
double flops = vol * Nvec * 2; // mul,add
|
||||
double bytes = 3.0 * vol * Nvec * sizeof(Real);
|
||||
std::cout << GridLogMessage << std::setprecision(3)
|
||||
<< lat << "\t\t" << bytes << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" << (stop - start) / 1000. / 1000.
|
||||
<< "\t\t" << bytes / time / NN << std::endl;
|
||||
std::cout << GridLogMessage << std::setprecision(3) << lat << "\t\t" << bytes
|
||||
<< " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t"
|
||||
<< (stop - start) / 1000. / 1000. << "\t\t" << bytes / time / NN
|
||||
<< std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
@ -287,9 +315,15 @@ class Benchmark
|
||||
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexF::Nsimd());
|
||||
Coordinate mpi_layout = GridDefaultMpi();
|
||||
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "= Benchmarking z = y*x SU(4) bandwidth" << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << " L "
|
||||
<< "\t\t"
|
||||
<< "bytes"
|
||||
@ -299,7 +333,9 @@ class Benchmark
|
||||
<< "Gflop/s"
|
||||
<< "\t\t seconds"
|
||||
<< "\t\tGB/s / node" << std::endl;
|
||||
std::cout << GridLogMessage << "----------------------------------------------------------" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "----------------------------------------------------------"
|
||||
<< std::endl;
|
||||
|
||||
uint64_t NN;
|
||||
|
||||
@ -310,7 +346,8 @@ class Benchmark
|
||||
for (int lat = 8; lat <= lmax; lat += 8)
|
||||
{
|
||||
|
||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]});
|
||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||
lat * mpi_layout[3]});
|
||||
int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||
@ -337,9 +374,10 @@ class Benchmark
|
||||
|
||||
double flops = vol * Nc4 * Nc4 * (6 + (Nc4 - 1) * 8); // mul,add
|
||||
double bytes = 3.0 * vol * Nc4 * Nc4 * 2 * sizeof(RealF);
|
||||
std::cout << GridLogMessage << std::setprecision(3)
|
||||
<< lat << "\t\t" << bytes << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" << (stop - start) / 1000. / 1000.
|
||||
<< "\t\t" << bytes / time / NN << std::endl;
|
||||
std::cout << GridLogMessage << std::setprecision(3) << lat << "\t\t" << bytes
|
||||
<< " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t"
|
||||
<< (stop - start) / 1000. / 1000. << "\t\t" << bytes / time / NN
|
||||
<< std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
@ -360,31 +398,41 @@ class Benchmark
|
||||
Coordinate mpi = GridDefaultMpi();
|
||||
assert(mpi.size() == 4);
|
||||
Coordinate local({L, L, L, L});
|
||||
Coordinate latt4({local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
||||
Coordinate latt4(
|
||||
{local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
||||
|
||||
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(latt4,
|
||||
GridDefaultSimd(Nd, vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||
latt4, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
|
||||
uint64_t NP = TmpGrid->RankCount();
|
||||
uint64_t NN = TmpGrid->NodeCount();
|
||||
NN_global = NN;
|
||||
uint64_t SHM = NP / NN;
|
||||
|
||||
///////// Welcome message ////////////
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << "Benchmark DWF on " << L << "^4 local volume " << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "Benchmark DWF on " << L << "^4 local volume "
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Nc : " << Nc << std::endl;
|
||||
std::cout << GridLogMessage << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
||||
std::cout << GridLogMessage << "* Ls : " << Ls << std::endl;
|
||||
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
||||
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
||||
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
||||
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) << std::endl;
|
||||
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi)
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
|
||||
///////// Lattice Init ////////////
|
||||
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||
latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
||||
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
||||
@ -423,10 +471,14 @@ class Benchmark
|
||||
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
||||
|
||||
controls Cases[] = {
|
||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicySequential},
|
||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicySequential}};
|
||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute,
|
||||
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute,
|
||||
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute,
|
||||
CartesianCommunicator::CommunicatorPolicySequential},
|
||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute,
|
||||
CartesianCommunicator::CommunicatorPolicySequential}};
|
||||
|
||||
for (int c = 0; c < num_cases; c++)
|
||||
{
|
||||
@ -435,7 +487,10 @@ class Benchmark
|
||||
WilsonKernelsStatic::Opt = Cases[c].Opt;
|
||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "==================================================================="
|
||||
"==============="
|
||||
<< std::endl;
|
||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric)
|
||||
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl;
|
||||
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute)
|
||||
@ -443,7 +498,10 @@ class Benchmark
|
||||
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute)
|
||||
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "==================================================================="
|
||||
"==============="
|
||||
<< std::endl;
|
||||
|
||||
int nwarm = 10;
|
||||
double t0 = usecond();
|
||||
@ -458,7 +516,8 @@ class Benchmark
|
||||
|
||||
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
||||
|
||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per
|
||||
// second"<<std::endl;
|
||||
Dw.ZeroCounters();
|
||||
|
||||
time_statistics timestat;
|
||||
@ -483,7 +542,8 @@ class Benchmark
|
||||
#if 0
|
||||
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
|
||||
#else
|
||||
double fps = Nc * (6 + (Nc - 1) * 8) * Ns * Nd + 2 * Nd * Nc * Ns + 2 * Nd * Nc * Ns * 2;
|
||||
double fps =
|
||||
Nc * (6 + (Nc - 1) * 8) * Ns * Nd + 2 * Nd * Nc * Ns + 2 * Nd * Nc * Ns * 2;
|
||||
#endif
|
||||
double flops = (fps * volume) / 2;
|
||||
double mf_hi, mf_lo, mf_err;
|
||||
@ -505,14 +565,25 @@ class Benchmark
|
||||
mflops_worst = mflops;
|
||||
|
||||
std::cout << GridLogMessage << "Deo FlopsPerSite is " << fps << std::endl;
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo << "-" << mf_hi << std::endl;
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per rank " << mflops / NP << std::endl;
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per node " << mflops / NN << std::endl;
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||
<< "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo
|
||||
<< "-" << mf_hi << std::endl;
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||
<< "Deo mflop/s per rank " << mflops / NP << std::endl;
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||
<< "Deo mflop/s per node " << mflops / NN << std::endl;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << L << "^4 x " << Ls << " Deo Best mflop/s = " << mflops_best << " ; " << mflops_best / NN << " per node " << std::endl;
|
||||
std::cout << GridLogMessage << L << "^4 x " << Ls << " Deo Worst mflop/s = " << mflops_worst << " ; " << mflops_worst / NN << " per node " << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "====================================================================="
|
||||
"============="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << L << "^4 x " << Ls
|
||||
<< " Deo Best mflop/s = " << mflops_best << " ; "
|
||||
<< mflops_best / NN << " per node " << std::endl;
|
||||
std::cout << GridLogMessage << L << "^4 x " << Ls
|
||||
<< " Deo Worst mflop/s = " << mflops_worst << " ; "
|
||||
<< mflops_worst / NN << " per node " << std::endl;
|
||||
std::cout << GridLogMessage << fmt << std::endl;
|
||||
std::cout << GridLogMessage;
|
||||
|
||||
@ -521,7 +592,10 @@ class Benchmark
|
||||
std::cout << mflops_all[i] / NN << " ; ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "====================================================================="
|
||||
"============="
|
||||
<< std::endl;
|
||||
}
|
||||
return mflops_best;
|
||||
}
|
||||
@ -540,29 +614,39 @@ class Benchmark
|
||||
Coordinate mpi = GridDefaultMpi();
|
||||
assert(mpi.size() == 4);
|
||||
Coordinate local({L, L, L, L});
|
||||
Coordinate latt4({local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
||||
Coordinate latt4(
|
||||
{local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
||||
|
||||
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(latt4,
|
||||
GridDefaultSimd(Nd, vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||
latt4, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
|
||||
uint64_t NP = TmpGrid->RankCount();
|
||||
uint64_t NN = TmpGrid->NodeCount();
|
||||
NN_global = NN;
|
||||
uint64_t SHM = NP / NN;
|
||||
|
||||
///////// Welcome message ////////////
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << "Benchmark ImprovedStaggered on " << L << "^4 local volume " << std::endl;
|
||||
std::cout << GridLogMessage << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "Benchmark ImprovedStaggered on " << L
|
||||
<< "^4 local volume " << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
||||
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
||||
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
||||
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
||||
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) << std::endl;
|
||||
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi)
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
|
||||
///////// Lattice Init ////////////
|
||||
GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||
GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||
latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
|
||||
|
||||
///////// RNG Init ////////////
|
||||
@ -604,10 +688,14 @@ class Benchmark
|
||||
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
||||
|
||||
controls Cases[] = {
|
||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicySequential},
|
||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicySequential}};
|
||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute,
|
||||
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute,
|
||||
CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute,
|
||||
CartesianCommunicator::CommunicatorPolicySequential},
|
||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute,
|
||||
CartesianCommunicator::CommunicatorPolicySequential}};
|
||||
|
||||
for (int c = 0; c < num_cases; c++)
|
||||
{
|
||||
@ -616,15 +704,22 @@ class Benchmark
|
||||
StaggeredKernelsStatic::Opt = Cases[c].Opt;
|
||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "==================================================================="
|
||||
"==============="
|
||||
<< std::endl;
|
||||
if (StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric)
|
||||
std::cout << GridLogMessage << "* Using GENERIC Nc StaggeredKernels" << std::endl;
|
||||
std::cout << GridLogMessage << "* Using GENERIC Nc StaggeredKernels"
|
||||
<< std::endl;
|
||||
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute)
|
||||
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
||||
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsThenCompute)
|
||||
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "==================================================================="
|
||||
"==============="
|
||||
<< std::endl;
|
||||
|
||||
int nwarm = 10;
|
||||
double t0 = usecond();
|
||||
@ -639,7 +734,8 @@ class Benchmark
|
||||
|
||||
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
||||
|
||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per
|
||||
// second"<<std::endl;
|
||||
Ds.ZeroCounters();
|
||||
|
||||
time_statistics timestat;
|
||||
@ -675,14 +771,25 @@ class Benchmark
|
||||
if (mflops < mflops_worst)
|
||||
mflops_worst = mflops;
|
||||
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo << "-" << mf_hi << std::endl;
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per rank " << mflops / NP << std::endl;
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per node " << mflops / NN << std::endl;
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||
<< "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo
|
||||
<< "-" << mf_hi << std::endl;
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||
<< "Deo mflop/s per rank " << mflops / NP << std::endl;
|
||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1)
|
||||
<< "Deo mflop/s per node " << mflops / NN << std::endl;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << L << "^4 Deo Best mflop/s = " << mflops_best << " ; " << mflops_best / NN << " per node " << std::endl;
|
||||
std::cout << GridLogMessage << L << "^4 Deo Worst mflop/s = " << mflops_worst << " ; " << mflops_worst / NN << " per node " << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "====================================================================="
|
||||
"============="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << L
|
||||
<< "^4 Deo Best mflop/s = " << mflops_best << " ; "
|
||||
<< mflops_best / NN << " per node " << std::endl;
|
||||
std::cout << GridLogMessage << L
|
||||
<< "^4 Deo Worst mflop/s = " << mflops_worst << " ; "
|
||||
<< mflops_worst / NN << " per node " << std::endl;
|
||||
std::cout << GridLogMessage << fmt << std::endl;
|
||||
std::cout << GridLogMessage;
|
||||
|
||||
@ -692,7 +799,10 @@ class Benchmark
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
return mflops_best;
|
||||
}
|
||||
};
|
||||
@ -701,7 +811,8 @@ int main(int argc, char **argv)
|
||||
{
|
||||
Grid_init(&argc, &argv);
|
||||
|
||||
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
|
||||
CartesianCommunicator::SetCommunicatorPolicy(
|
||||
CartesianCommunicator::CommunicatorPolicySequential);
|
||||
#ifdef KNL
|
||||
LebesgueOrder::Block = std::vector<int>({8, 2, 2, 2});
|
||||
#else
|
||||
@ -709,9 +820,11 @@ int main(int argc, char **argv)
|
||||
#endif
|
||||
Benchmark::Decomposition();
|
||||
|
||||
int do_su4 = 1;
|
||||
int do_memory = 1;
|
||||
int do_su4 = 0;
|
||||
int do_memory = 0;
|
||||
int do_comms = 1;
|
||||
int do_flops = 0;
|
||||
int Ls = 1;
|
||||
|
||||
int sel = 4;
|
||||
std::vector<int> L_list({8, 12, 16, 24, 32});
|
||||
@ -721,84 +834,170 @@ int main(int argc, char **argv)
|
||||
std::vector<double> dwf4;
|
||||
std::vector<double> staggered;
|
||||
|
||||
int Ls = 1;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
for (int l = 0; l < L_list.size(); l++)
|
||||
if (do_flops)
|
||||
{
|
||||
wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
|
||||
}
|
||||
Ls = 1;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
for (int l = 0; l < L_list.size(); l++)
|
||||
{
|
||||
wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
|
||||
}
|
||||
|
||||
Ls = 12;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
for (int l = 0; l < L_list.size(); l++)
|
||||
{
|
||||
double result = Benchmark::DWF(Ls, L_list[l]);
|
||||
dwf4.push_back(result);
|
||||
}
|
||||
Ls = 12;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
for (int l = 0; l < L_list.size(); l++)
|
||||
{
|
||||
double result = Benchmark::DWF(Ls, L_list[l]);
|
||||
dwf4.push_back(result);
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
for (int l = 0; l < L_list.size(); l++)
|
||||
{
|
||||
double result = Benchmark::Staggered(L_list[l]);
|
||||
staggered.push_back(result);
|
||||
}
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised"
|
||||
<< std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
for (int l = 0; l < L_list.size(); l++)
|
||||
{
|
||||
double result = Benchmark::Staggered(L_list[l]);
|
||||
staggered.push_back(result);
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl;
|
||||
for (int l = 0; l < L_list.size(); l++)
|
||||
{
|
||||
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t " << dwf4[l] << " \t\t " << staggered[l] << std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl;
|
||||
for (int l = 0; l < L_list.size(); l++)
|
||||
{
|
||||
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t "
|
||||
<< dwf4[l] << " \t\t " << staggered[l] << std::endl;
|
||||
}
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
|
||||
int NN = NN_global;
|
||||
if (do_memory)
|
||||
{
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << " Memory benchmark " << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
Benchmark::Memory();
|
||||
}
|
||||
|
||||
if (do_su4)
|
||||
{
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << " SU(4) benchmark " << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
Benchmark::SU4();
|
||||
}
|
||||
|
||||
if (do_comms)
|
||||
{
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << " Communications benchmark " << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "======================================================================="
|
||||
"==========="
|
||||
<< std::endl;
|
||||
Benchmark::Comms();
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl;
|
||||
for (int l = 0; l < L_list.size(); l++)
|
||||
if (do_flops)
|
||||
{
|
||||
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t " << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl;
|
||||
for (int l = 0; l < L_list.size(); l++)
|
||||
{
|
||||
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t "
|
||||
<< dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
|
||||
}
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN << " Mflop/s per node" << std::endl;
|
||||
std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+" << dwf4[selm1] / NN << ") " << std::endl;
|
||||
std::cout << std::setprecision(3);
|
||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN
|
||||
<< " Mflop/s per node" << std::endl;
|
||||
std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+"
|
||||
<< dwf4[selm1] / NN << ") " << std::endl;
|
||||
std::cout << std::setprecision(3);
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "========================================================================="
|
||||
"========="
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
@ -32,23 +32,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#ifdef HAVE_LIME
|
||||
using namespace Grid;
|
||||
|
||||
std::string filestem(const int l)
|
||||
{
|
||||
return "iobench_l" + std::to_string(l);
|
||||
}
|
||||
std::string filestem(const int l) { return "iobench_l" + std::to_string(l); }
|
||||
|
||||
int vol(const int i)
|
||||
{
|
||||
return BENCH_IO_LMIN + 2 * i;
|
||||
}
|
||||
int vol(const int i) { return BENCH_IO_LMIN + 2 * i; }
|
||||
|
||||
int volInd(const int l)
|
||||
{
|
||||
return (l - BENCH_IO_LMIN) / 2;
|
||||
}
|
||||
int volInd(const int l) { return (l - BENCH_IO_LMIN) / 2; }
|
||||
|
||||
template <typename Mat>
|
||||
void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
||||
template <typename Mat> void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
||||
{
|
||||
auto nr = data[0].rows(), nc = data[0].cols();
|
||||
Eigen::MatrixXd sqSum(nr, nc);
|
||||
@ -66,11 +56,11 @@ void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
||||
mean /= n;
|
||||
}
|
||||
|
||||
#define grid_printf(...) \
|
||||
{ \
|
||||
char _buf[1024]; \
|
||||
sprintf(_buf, __VA_ARGS__); \
|
||||
MSG << _buf; \
|
||||
#define grid_printf(...) \
|
||||
{ \
|
||||
char _buf[1024]; \
|
||||
sprintf(_buf, __VA_ARGS__); \
|
||||
MSG << _buf; \
|
||||
}
|
||||
|
||||
enum
|
||||
@ -173,47 +163,49 @@ int main(int argc, char **argv)
|
||||
MSG << "SUMMARY" << std::endl;
|
||||
MSG << BIGSEP << std::endl;
|
||||
MSG << "Summary of individual results (all results in MB/s)." << std::endl;
|
||||
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
||||
MSG << "Every second colum gives the standard deviation of the previous column."
|
||||
<< std::endl;
|
||||
MSG << std::endl;
|
||||
grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n",
|
||||
"L", "std read", "std dev", "std write", "std dev",
|
||||
"Grid read", "std dev", "Grid write", "std dev");
|
||||
grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n", "L", "std read", "std dev",
|
||||
"std write", "std dev", "Grid read", "std dev", "Grid write", "std dev");
|
||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
||||
l, mean(volInd(l), sRead), stdDev(volInd(l), sRead),
|
||||
mean(volInd(l), sWrite), stdDev(volInd(l), sWrite),
|
||||
mean(volInd(l), gRead), stdDev(volInd(l), gRead),
|
||||
mean(volInd(l), gWrite), stdDev(volInd(l), gWrite));
|
||||
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", l,
|
||||
mean(volInd(l), sRead), stdDev(volInd(l), sRead), mean(volInd(l), sWrite),
|
||||
stdDev(volInd(l), sWrite), mean(volInd(l), gRead),
|
||||
stdDev(volInd(l), gRead), mean(volInd(l), gWrite),
|
||||
stdDev(volInd(l), gWrite));
|
||||
}
|
||||
MSG << std::endl;
|
||||
MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)" << std::endl;
|
||||
MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)"
|
||||
<< std::endl;
|
||||
MSG << std::endl;
|
||||
grid_printf("%4s %12s %12s %12s %12s\n",
|
||||
"L", "std read", "std write", "Grid read", "Grid write");
|
||||
grid_printf("%4s %12s %12s %12s %12s\n", "L", "std read", "std write", "Grid read",
|
||||
"Grid write");
|
||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n",
|
||||
l, rob(volInd(l), sRead), rob(volInd(l), sWrite),
|
||||
rob(volInd(l), gRead), rob(volInd(l), gWrite));
|
||||
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n", l, rob(volInd(l), sRead),
|
||||
rob(volInd(l), sWrite), rob(volInd(l), gRead), rob(volInd(l), gWrite));
|
||||
}
|
||||
MSG << std::endl;
|
||||
MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX << "^4 (all results in MB/s)." << std::endl;
|
||||
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
||||
MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX
|
||||
<< "^4 (all results in MB/s)." << std::endl;
|
||||
MSG << "Every second colum gives the standard deviation of the previous column."
|
||||
<< std::endl;
|
||||
MSG << std::endl;
|
||||
grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n",
|
||||
"std read", "std dev", "std write", "std dev",
|
||||
"Grid read", "std dev", "Grid write", "std dev");
|
||||
grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
||||
avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite),
|
||||
avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite));
|
||||
grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n", "std read", "std dev",
|
||||
"std write", "std dev", "Grid read", "std dev", "Grid write", "std dev");
|
||||
grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", avMean(sRead),
|
||||
avStdDev(sRead), avMean(sWrite), avStdDev(sWrite), avMean(gRead),
|
||||
avStdDev(gRead), avMean(gWrite), avStdDev(gWrite));
|
||||
MSG << std::endl;
|
||||
MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)" << std::endl;
|
||||
MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)"
|
||||
<< std::endl;
|
||||
MSG << std::endl;
|
||||
grid_printf("%12s %12s %12s %12s\n",
|
||||
"std read", "std write", "Grid read", "Grid write");
|
||||
grid_printf("%12.1f %12.1f %12.1f %12.1f\n",
|
||||
avRob(sRead), avRob(sWrite), avRob(gRead), avRob(gWrite));
|
||||
grid_printf("%12s %12s %12s %12s\n", "std read", "std write", "Grid read",
|
||||
"Grid write");
|
||||
grid_printf("%12.1f %12.1f %12.1f %12.1f\n", avRob(sRead), avRob(sWrite), avRob(gRead),
|
||||
avRob(gWrite));
|
||||
|
||||
Grid_finalize();
|
||||
|
||||
|
@ -20,9 +20,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
#define MSG std::cout << GridLogMessage
|
||||
#define SEP \
|
||||
#define SEP \
|
||||
"-----------------------------------------------------------------------------"
|
||||
#define BIGSEP \
|
||||
#define BIGSEP \
|
||||
"============================================================================="
|
||||
#ifdef HAVE_LIME
|
||||
|
||||
@ -36,16 +36,15 @@ namespace Grid
|
||||
|
||||
// AP 06/10/2020: Standard C version in case one is suspicious of the C++ API
|
||||
//
|
||||
// template <typename Field>
|
||||
// void stdWrite(const std::string filestem, Field &vec)
|
||||
// template <typename Field> void stdWrite(const std::string filestem, Field &vec)
|
||||
// {
|
||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb");
|
||||
// size_t size;
|
||||
// uint32_t crc;
|
||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb");
|
||||
// size_t size;
|
||||
// uint32_t crc;
|
||||
// GridStopWatch ioWatch, crcWatch;
|
||||
|
||||
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||
// size = vec.Grid()->lSites() * sizeof(typename Field::scalar_object);
|
||||
// autoView(vec_v, vec, CpuRead);
|
||||
// crcWatch.Start();
|
||||
// crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||
@ -53,36 +52,39 @@ namespace Grid
|
||||
// crcWatch.Stop();
|
||||
// MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
||||
// ioWatch.Start();
|
||||
// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
||||
// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object),
|
||||
// vec.Grid()->lSites(), file);
|
||||
// ioWatch.Stop();
|
||||
// std::fclose(file);
|
||||
// size *= vec.Grid()->ProcessorCount();
|
||||
// auto &p = BinaryIO::lastPerf;
|
||||
// p.size = size;
|
||||
// p.time = ioWatch.useconds();
|
||||
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||
// p.size = size;
|
||||
// p.time = ioWatch.useconds();
|
||||
// p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||
// MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
// << ",
|
||||
// "
|
||||
// << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
// MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
// }
|
||||
//
|
||||
// template <typename Field>
|
||||
// void stdRead(Field &vec, const std::string filestem)
|
||||
|
||||
// template <typename Field> void stdRead(Field &vec, const std::string filestem)
|
||||
// {
|
||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb");
|
||||
// size_t size;
|
||||
// uint32_t crcRead, crcData;
|
||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb");
|
||||
// size_t size;
|
||||
// uint32_t crcRead, crcData;
|
||||
// GridStopWatch ioWatch, crcWatch;
|
||||
|
||||
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||
// size = vec.Grid()->lSites() * sizeof(typename Field::scalar_object);
|
||||
// crcWatch.Start();
|
||||
// std::fread(&crcRead, sizeof(uint32_t), 1, file);
|
||||
// crcWatch.Stop();
|
||||
// {
|
||||
// autoView(vec_v, vec, CpuWrite);
|
||||
// ioWatch.Start();
|
||||
// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
||||
// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object),
|
||||
// vec.Grid()->lSites(), file);
|
||||
// ioWatch.Stop();
|
||||
// std::fclose(file);
|
||||
// }
|
||||
@ -96,19 +98,19 @@ namespace Grid
|
||||
// assert(crcData == crcRead);
|
||||
// size *= vec.Grid()->ProcessorCount();
|
||||
// auto &p = BinaryIO::lastPerf;
|
||||
// p.size = size;
|
||||
// p.time = ioWatch.useconds();
|
||||
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||
// MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
// p.size = size;
|
||||
// p.time = ioWatch.useconds();
|
||||
// p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||
// MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() << ", "
|
||||
// << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
// MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
// }
|
||||
|
||||
template <typename Field>
|
||||
void stdWrite(const std::string filestem, Field &vec)
|
||||
template <typename Field> void stdWrite(const std::string filestem, Field &vec)
|
||||
{
|
||||
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
std::ofstream file(filestem + "." + rankStr + ".bin", std::ios::out | std::ios::binary);
|
||||
std::ofstream file(filestem + "." + rankStr + ".bin",
|
||||
std::ios::out | std::ios::binary);
|
||||
size_t size, sizec;
|
||||
uint32_t crc;
|
||||
GridStopWatch ioWatch, crcWatch;
|
||||
@ -130,16 +132,16 @@ namespace Grid
|
||||
p.size = size;
|
||||
p.time = ioWatch.useconds();
|
||||
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||
MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed() << ", "
|
||||
<< p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
}
|
||||
|
||||
template <typename Field>
|
||||
void stdRead(Field &vec, const std::string filestem)
|
||||
template <typename Field> void stdRead(Field &vec, const std::string filestem)
|
||||
{
|
||||
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
std::ifstream file(filestem + "." + rankStr + ".bin", std::ios::in | std::ios::binary);
|
||||
std::ifstream file(filestem + "." + rankStr + ".bin",
|
||||
std::ios::in | std::ios::binary);
|
||||
size_t size, sizec;
|
||||
uint32_t crcRead, crcData;
|
||||
GridStopWatch ioWatch, crcWatch;
|
||||
@ -168,13 +170,12 @@ namespace Grid
|
||||
p.size = size;
|
||||
p.time = ioWatch.useconds();
|
||||
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||
MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() << ", "
|
||||
<< p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
}
|
||||
|
||||
template <typename Field>
|
||||
void limeWrite(const std::string filestem, Field &vec)
|
||||
template <typename Field> void limeWrite(const std::string filestem, Field &vec)
|
||||
{
|
||||
emptyUserRecord record;
|
||||
ScidacWriter binWriter(vec.Grid()->IsBoss());
|
||||
@ -184,8 +185,7 @@ namespace Grid
|
||||
binWriter.close();
|
||||
}
|
||||
|
||||
template <typename Field>
|
||||
void limeRead(Field &vec, const std::string filestem)
|
||||
template <typename Field> void limeRead(Field &vec, const std::string filestem)
|
||||
{
|
||||
emptyUserRecord record;
|
||||
ScidacReader binReader;
|
||||
@ -225,12 +225,13 @@ namespace Grid
|
||||
|
||||
template <typename Field>
|
||||
void writeBenchmark(const Coordinate &latt, const std::string filename,
|
||||
const WriterFn<Field> &write,
|
||||
const unsigned int Ls = 1, const bool rb = false)
|
||||
const WriterFn<Field> &write, const unsigned int Ls = 1,
|
||||
const bool rb = false)
|
||||
{
|
||||
auto mpi = GridDefaultMpi();
|
||||
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
||||
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||
std::shared_ptr<GridCartesian> gBasePt(
|
||||
SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||
std::shared_ptr<GridBase> gPt;
|
||||
std::random_device rd;
|
||||
|
||||
@ -251,12 +252,13 @@ namespace Grid
|
||||
|
||||
template <typename Field>
|
||||
void readBenchmark(const Coordinate &latt, const std::string filename,
|
||||
const ReaderFn<Field> &read,
|
||||
const unsigned int Ls = 1, const bool rb = false)
|
||||
const ReaderFn<Field> &read, const unsigned int Ls = 1,
|
||||
const bool rb = false)
|
||||
{
|
||||
auto mpi = GridDefaultMpi();
|
||||
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
||||
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||
std::shared_ptr<GridCartesian> gBasePt(
|
||||
SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||
std::shared_ptr<GridBase> gPt;
|
||||
|
||||
makeGrid(gPt, gBasePt, Ls, rb);
|
||||
|
@ -34,8 +34,7 @@ struct time_statistics
|
||||
mean = sum / v.size();
|
||||
|
||||
std::vector<double> diff(v.size());
|
||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x)
|
||||
{ return x - mean; });
|
||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
||||
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
||||
err = std::sqrt(sq_sum / (v.size() * (v.size() - 1)));
|
||||
|
||||
@ -50,8 +49,7 @@ void header()
|
||||
std::cout << GridLogMessage << " L "
|
||||
<< "\t"
|
||||
<< " Ls "
|
||||
<< "\t"
|
||||
<< std::setw(11) << "bytes\t\t"
|
||||
<< "\t" << std::setw(11) << "bytes\t\t"
|
||||
<< "MB/s uni"
|
||||
<< "\t"
|
||||
<< "MB/s bidi" << std::endl;
|
||||
@ -64,7 +62,8 @@ int main(int argc, char **argv)
|
||||
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
|
||||
Coordinate mpi_layout = GridDefaultMpi();
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
|
||||
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads"
|
||||
<< std::endl;
|
||||
|
||||
int Nloop = 250;
|
||||
int nmu = 0;
|
||||
@ -73,13 +72,21 @@ int main(int argc, char **argv)
|
||||
if (mpi_layout[mu] > 1)
|
||||
nmu++;
|
||||
|
||||
std::cout << GridLogMessage << "Number of iterations to average: " << Nloop << std::endl;
|
||||
std::cout << GridLogMessage << "Number of iterations to average: " << Nloop
|
||||
<< std::endl;
|
||||
std::vector<double> t_time(Nloop);
|
||||
// time_statistics timestat;
|
||||
|
||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << "= Benchmarking sequential halo exchange from host memory " << std::endl;
|
||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "========================================================================="
|
||||
"==========================="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "= Benchmarking sequential halo exchange from host memory " << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "========================================================================="
|
||||
"==========================="
|
||||
<< std::endl;
|
||||
header();
|
||||
|
||||
for (int lat = 8; lat <= maxlat; lat += 4)
|
||||
@ -87,9 +94,7 @@ int main(int argc, char **argv)
|
||||
for (int Ls = 8; Ls <= 8; Ls *= 2)
|
||||
{
|
||||
|
||||
Coordinate latt_size({lat * mpi_layout[0],
|
||||
lat * mpi_layout[1],
|
||||
lat * mpi_layout[2],
|
||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||
lat * mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||
@ -127,22 +132,16 @@ int main(int argc, char **argv)
|
||||
{
|
||||
std::vector<CommsRequest_t> requests;
|
||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||
Grid.SendToRecvFrom((void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank,
|
||||
(void *)&rbuf[mu][0], recv_from_rank, bytes);
|
||||
}
|
||||
|
||||
comm_proc = mpi_layout[mu] - 1;
|
||||
{
|
||||
std::vector<CommsRequest_t> requests;
|
||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu + 4][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank,
|
||||
(void *)&rbuf[mu + 4][0], recv_from_rank, bytes);
|
||||
}
|
||||
}
|
||||
Grid.Barrier();
|
||||
@ -154,17 +153,24 @@ int main(int argc, char **argv)
|
||||
double bidibytes = xbytes + rbytes;
|
||||
|
||||
std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t"
|
||||
<< std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " "
|
||||
<< std::right << xbytes / mean << " "
|
||||
<< std::setw(11) << bytes << std::fixed << std::setprecision(1)
|
||||
<< std::setw(7) << " " << std::right << xbytes / mean << " "
|
||||
<< "\t\t" << std::setw(7) << bidibytes / mean << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage << "= Benchmarking sequential halo exchange from GPU memory " << std::endl;
|
||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "========================================================================="
|
||||
"==========================="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "= Benchmarking sequential halo exchange from GPU memory " << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "========================================================================="
|
||||
"==========================="
|
||||
<< std::endl;
|
||||
header();
|
||||
|
||||
for (int lat = 8; lat <= maxlat; lat += 4)
|
||||
@ -172,9 +178,7 @@ int main(int argc, char **argv)
|
||||
for (int Ls = 8; Ls <= 8; Ls *= 2)
|
||||
{
|
||||
|
||||
Coordinate latt_size({lat * mpi_layout[0],
|
||||
lat * mpi_layout[1],
|
||||
lat * mpi_layout[2],
|
||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||
lat * mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||
@ -212,22 +216,16 @@ int main(int argc, char **argv)
|
||||
{
|
||||
std::vector<CommsRequest_t> requests;
|
||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||
Grid.SendToRecvFrom((void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank,
|
||||
(void *)&rbuf[mu][0], recv_from_rank, bytes);
|
||||
}
|
||||
|
||||
comm_proc = mpi_layout[mu] - 1;
|
||||
{
|
||||
std::vector<CommsRequest_t> requests;
|
||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu + 4][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank,
|
||||
(void *)&rbuf[mu + 4][0], recv_from_rank, bytes);
|
||||
}
|
||||
}
|
||||
Grid.Barrier();
|
||||
@ -239,8 +237,8 @@ int main(int argc, char **argv)
|
||||
double bidibytes = xbytes + rbytes;
|
||||
|
||||
std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t"
|
||||
<< std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " "
|
||||
<< std::right << xbytes / mean << " "
|
||||
<< std::setw(11) << bytes << std::fixed << std::setprecision(1)
|
||||
<< std::setw(7) << " " << std::right << xbytes / mean << " "
|
||||
<< "\t\t" << std::setw(7) << bidibytes / mean << std::endl;
|
||||
}
|
||||
}
|
||||
@ -253,9 +251,15 @@ int main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "========================================================================="
|
||||
"==========================="
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "= All done; Bye Bye" << std::endl;
|
||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "========================================================================="
|
||||
"==========================="
|
||||
<< std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
@ -30,8 +30,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
||||
template <class d>
|
||||
struct scal
|
||||
template <class d> struct scal
|
||||
{
|
||||
d internal;
|
||||
};
|
||||
@ -69,13 +68,11 @@ int main(int argc, char **argv)
|
||||
json["single_site_flops"] = single_site_flops;
|
||||
|
||||
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||
GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian *UrbGrid =
|
||||
SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
|
||||
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
||||
GridRedBlackCartesian *FrbGrid =
|
||||
SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
||||
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
||||
|
||||
json["grid"] = FGrid->FullDimensions().toVector();
|
||||
json["local_grid"] = FGrid->LocalDimensions().toVector();
|
||||
@ -83,11 +80,10 @@ int main(int argc, char **argv)
|
||||
std::cout << GridLogMessage << "Making s innermost grids" << std::endl;
|
||||
GridCartesian *sUGrid =
|
||||
SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(), GridDefaultMpi());
|
||||
GridRedBlackCartesian *sUrbGrid =
|
||||
SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||
|
||||
GridRedBlackCartesian *sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||
GridCartesian *sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls, UGrid);
|
||||
GridRedBlackCartesian *sFrbGrid =
|
||||
SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls, UGrid);
|
||||
GridRedBlackCartesian *sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls, UGrid);
|
||||
|
||||
std::vector<int> seeds4({1, 2, 3, 4});
|
||||
std::vector<int> seeds5({5, 6, 7, 8});
|
||||
@ -150,8 +146,7 @@ int main(int argc, char **argv)
|
||||
{
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
std::cout << GridLogMessage << "Setting up Cshift based reference "
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
|
||||
|
||||
if (1)
|
||||
{
|
||||
@ -200,54 +195,45 @@ int main(int argc, char **argv)
|
||||
json["ranks"] = NP;
|
||||
json["nodes"] = NN;
|
||||
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "*****************************************************************"
|
||||
<< std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm"
|
||||
<< std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "*****************************************************************"
|
||||
<< std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "*****************************************************************"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "* Benchmarking DomainWallFermionR::Dhop "
|
||||
<< "*****************************************************************"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Vectorising space-time by "
|
||||
<< vComplexF::Nsimd() << std::endl;
|
||||
std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF)
|
||||
<< " B" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "*****************************************************************"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "*****************************************************************"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "* Benchmarking DomainWallFermionR::Dhop " << std::endl;
|
||||
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd()
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF) << " B"
|
||||
<< std::endl;
|
||||
|
||||
if (sizeof(RealF) == 4)
|
||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||
if (sizeof(RealF) == 8)
|
||||
std::cout << GridLogMessage << "* DOUBLE precision " << std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute)
|
||||
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
||||
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute)
|
||||
std::cout << GridLogMessage << "* Using sequential comms compute"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Using sequential comms compute" << std::endl;
|
||||
#endif
|
||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric)
|
||||
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl;
|
||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
|
||||
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
||||
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels"
|
||||
<< std::endl;
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "*****************************************************************"
|
||||
<< std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "*****************************************************************"
|
||||
<< std::endl;
|
||||
|
||||
DomainWallFermionF Dw(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
|
||||
int ncall = 300;
|
||||
@ -277,16 +263,13 @@ int main(int argc, char **argv)
|
||||
auto simdwidth = sizeof(vComplex);
|
||||
|
||||
// RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors
|
||||
double data_rf = volume * ((2 * Nd + 1) * Nd * Nc + 2 * Nd * Nc * Nc) *
|
||||
simdwidth / nsimd * ncall / (1024. * 1024. * 1024.);
|
||||
double data_rf = volume * ((2 * Nd + 1) * Nd * Nc + 2 * Nd * Nc * Nc) * simdwidth /
|
||||
nsimd * ncall / (1024. * 1024. * 1024.);
|
||||
|
||||
// mem: Nd Wilson * Ls, Nd gauge, Nc colors
|
||||
double data_mem =
|
||||
(volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) *
|
||||
simdwidth / nsimd * ncall / (1024. * 1024. * 1024.);
|
||||
|
||||
std::cout << GridLogMessage << "Called Dw " << ncall << " times in "
|
||||
<< t1 - t0 << " us" << std::endl;
|
||||
(volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) * simdwidth /
|
||||
nsimd * ncall / (1024. * 1024. * 1024.);
|
||||
|
||||
json["Dw"]["calls"] = ncall;
|
||||
json["Dw"]["time"] = t1 - t0;
|
||||
@ -296,15 +279,16 @@ int main(int argc, char **argv)
|
||||
json["Dw"]["RF"] = 1000000. * data_rf / ((t1 - t0));
|
||||
json["Dw"]["mem"] = 1000000. * data_mem / ((t1 - t0));
|
||||
|
||||
// std::cout<<GridLogMessage << "norm result "<<
|
||||
// norm2(result)<<std::endl; std::cout<<GridLogMessage << "norm ref "<<
|
||||
// norm2(ref)<<std::endl;
|
||||
std::cout << GridLogMessage << "mflop/s = " << flops / (t1 - t0)
|
||||
std::cout << GridLogMessage << "Called Dw " << ncall << " times in " << t1 - t0
|
||||
<< " us" << std::endl;
|
||||
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout << GridLogMessage << "mflop/s = " << flops / (t1 - t0) << std::endl;
|
||||
std::cout << GridLogMessage << "mflop/s per rank = " << flops / (t1 - t0) / NP
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "mflop/s per rank = " << flops / (t1 - t0) / NP << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "mflop/s per node = " << flops / (t1 - t0) / NN << std::endl;
|
||||
std::cout << GridLogMessage << "mflop/s per node = " << flops / (t1 - t0) / NN
|
||||
<< std::endl;
|
||||
|
||||
std::cout << GridLogMessage
|
||||
<< "RF GiB/s (base 2) = " << 1000000. * data_rf / ((t1 - t0))
|
||||
<< std::endl;
|
||||
@ -381,13 +365,12 @@ int main(int argc, char **argv)
|
||||
}
|
||||
// dump=1;
|
||||
Dw.Dhop(src, result, 1);
|
||||
std::cout
|
||||
<< GridLogMessage
|
||||
<< "Compare to naive wilson implementation Dag to verify correctness"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "Called DwDag" << std::endl;
|
||||
std::cout << GridLogMessage << "norm dag result " << norm2(result)
|
||||
|
||||
std::cout << GridLogMessage
|
||||
<< "Compare to naive wilson implementation Dag to verify correctness"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "Called DwDag" << std::endl;
|
||||
std::cout << GridLogMessage << "norm dag result " << norm2(result) << std::endl;
|
||||
std::cout << GridLogMessage << "norm dag ref " << norm2(ref) << std::endl;
|
||||
err = ref - result;
|
||||
std::cout << GridLogMessage << "norm dag diff " << norm2(err) << std::endl;
|
||||
@ -405,8 +388,7 @@ int main(int argc, char **argv)
|
||||
LatticeFermionF r_o(FrbGrid);
|
||||
LatticeFermionF r_eo(FGrid);
|
||||
|
||||
std::cout << GridLogMessage
|
||||
<< "Calling Deo and Doe and //assert Deo+Doe == Dunprec"
|
||||
std::cout << GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"
|
||||
<< std::endl;
|
||||
pickCheckerboard(Even, src_e, src);
|
||||
pickCheckerboard(Odd, src_o, src);
|
||||
@ -416,37 +398,32 @@ int main(int argc, char **argv)
|
||||
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
std::cout << GridLogMessage
|
||||
<< "*********************************************************"
|
||||
<< std::endl;
|
||||
|
||||
<< "*********************************************************" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "* Benchmarking DomainWallFermionF::DhopEO "
|
||||
<< "* Benchmarking DomainWallFermionF::DhopEO " << std::endl;
|
||||
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd()
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Vectorising space-time by "
|
||||
<< vComplexF::Nsimd() << std::endl;
|
||||
|
||||
if (sizeof(RealF) == 4)
|
||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||
if (sizeof(RealF) == 8)
|
||||
std::cout << GridLogMessage << "* DOUBLE precision " << std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute)
|
||||
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
||||
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute)
|
||||
std::cout << GridLogMessage << "* Using sequential comms compute"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Using sequential comms compute" << std::endl;
|
||||
#endif
|
||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric)
|
||||
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl;
|
||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
|
||||
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
||||
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "*********************************************************"
|
||||
<< std::endl;
|
||||
<< "*********************************************************" << std::endl;
|
||||
|
||||
{
|
||||
Dw.ZeroCounters();
|
||||
FGrid->Barrier();
|
||||
@ -478,12 +455,12 @@ int main(int argc, char **argv)
|
||||
json["Deo"]["mflops_per_rank"] = flops / (t1 - t0) / NP;
|
||||
json["Deo"]["mflops_per_node"] = flops / (t1 - t0) / NN;
|
||||
|
||||
std::cout << GridLogMessage << "Deo mflop/s = " << flops / (t1 - t0)
|
||||
std::cout << GridLogMessage << "Deo mflop/s = " << flops / (t1 - t0) << std::endl;
|
||||
std::cout << GridLogMessage << "Deo mflop/s per rank " << flops / (t1 - t0) / NP
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "Deo mflop/s per rank "
|
||||
<< flops / (t1 - t0) / NP << std::endl;
|
||||
std::cout << GridLogMessage << "Deo mflop/s per node "
|
||||
<< flops / (t1 - t0) / NN << std::endl;
|
||||
std::cout << GridLogMessage << "Deo mflop/s per node " << flops / (t1 - t0) / NN
|
||||
<< std::endl;
|
||||
|
||||
Dw.Report();
|
||||
}
|
||||
Dw.DhopEO(src_o, r_e, DaggerNo);
|
||||
@ -510,18 +487,16 @@ int main(int argc, char **argv)
|
||||
|
||||
pickCheckerboard(Even, src_e, err);
|
||||
pickCheckerboard(Odd, src_o, err);
|
||||
std::cout << GridLogMessage << "norm diff even " << norm2(src_e)
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "norm diff odd " << norm2(src_o)
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "norm diff even " << norm2(src_e) << std::endl;
|
||||
std::cout << GridLogMessage << "norm diff odd " << norm2(src_o) << std::endl;
|
||||
|
||||
assert(norm2(src_e) < 1.0e-4);
|
||||
assert(norm2(src_o) < 1.0e-4);
|
||||
|
||||
if (!json_filename.empty())
|
||||
{
|
||||
std::cout << GridLogMessage << "writing benchmark results to "
|
||||
<< json_filename << std::endl;
|
||||
std::cout << GridLogMessage << "writing benchmark results to " << json_filename
|
||||
<< std::endl;
|
||||
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
36
Grid/Common.hpp
Normal file
36
Grid/Common.hpp
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef Grid_Benchmarks_Common_hpp_
|
||||
#define Grid_Benchmarks_Common_hpp_
|
||||
|
||||
#ifndef GRID_MSG
|
||||
#define GRID_MSG std::cout << GridLogMessage
|
||||
#endif
|
||||
|
||||
#ifndef GRID_MSG_MAXSIZE
|
||||
#define GRID_MSG_MAXSIZE 1024
|
||||
#endif
|
||||
|
||||
#define grid_printf(...) \
|
||||
{ \
|
||||
char _buf[GRID_MSG_MAXSIZE]; \
|
||||
snprintf(_buf, GRID_MSG_MAXSIZE, __VA_ARGS__); \
|
||||
GRID_MSG << _buf; \
|
||||
}
|
||||
|
||||
#endif // Grid_Benchmarks_Common_hpp_
|
69
Grid/Readme.md
Normal file
69
Grid/Readme.md
Normal file
@ -0,0 +1,69 @@
|
||||
# Grid benchmarks
|
||||
|
||||
This folder contains benchmarks for the [Grid](https://github.com/aportelli/) library.
|
||||
The benchmarks can be summarised as follows
|
||||
|
||||
- `Benchmark_Grid`: This benchmark measure floating point performances for various fermion
|
||||
matrices, as well as bandwidth measurement for different operations. Measurements are
|
||||
performed for a fixed range of problem sizes.
|
||||
|
||||
## TL;DR
|
||||
Build and install Grid, all dependencies, and the benchmark with
|
||||
```bash
|
||||
systems/<system>/bootstrap-env.sh <env_dir> # build dependencies, takes a long time
|
||||
./build-grid.sh <env_dir> <config> # build Grid
|
||||
./build-benchmark.sh <env_dir> <config> # build benchmarks
|
||||
```
|
||||
where `<env_dir>` is an arbitrary directory where every product will be stored, `<system>`
|
||||
is a sub-directory of `systems` containing system-specific scripts
|
||||
(an existing preset or your own), and finally `<config>` is the name of a build config
|
||||
in `systems/<system>/grid-config.json`. After a successful execution the benchmark binaries
|
||||
will be in `<env_dir>/prefix/gridbench_<config>`.
|
||||
|
||||
## Environment setup
|
||||
A complete runtime environnement can be deploy using scripts from this repository. System-specific scripts are in the `systems` directory.
|
||||
|
||||
You should first deploy the environment for the specific system you are using, for example
|
||||
```bash
|
||||
systems/tursa/bootstrap-env.sh ./env
|
||||
```
|
||||
will deploy the relevant environment for the [Tursa](https://www.epcc.ed.ac.uk/hpc-services/dirac-tursa-gpu) supercomputer in `./env`. This step might compile from source a large set
|
||||
of packages, and might take some time to complete.
|
||||
|
||||
After that, the environment directory (`./env` in the example above) will contain a `env.sh` file that need to be sourced to activate the environment
|
||||
```bash
|
||||
source ./env/env.sh
|
||||
```
|
||||
Additional scripts `env-*.sh` can be sourced after to activate more specific environments,
|
||||
this should be done after sourcing `env.sh` as above.
|
||||
|
||||
## Building the benchmarks
|
||||
The environnement directory contains a `grid-config.json` file specifying compilation flag
|
||||
configurations for Grid (please see Grid's repository for documentation). All entries have
|
||||
the form
|
||||
```json
|
||||
{
|
||||
"name": "foo", // name of the configuration
|
||||
"env-script": "bar.sh", // script to source before building
|
||||
// (path relative to the environment directory)
|
||||
"commit": "...", // Grid commit to use
|
||||
// (anything that can be an argument of git checkout)
|
||||
"config-options": "..." // options to pass to the configure script,
|
||||
"env" : { // environment variables
|
||||
"VAR": "value" // export VAR="value" before building
|
||||
}
|
||||
}
|
||||
```
|
||||
Grid can then be built with
|
||||
```
|
||||
./build-grid.sh <env_dir> <config>
|
||||
```
|
||||
where `<env_dir>` is the environment directory and `<config>` is the build config name in
|
||||
`grid-config.json`. Similarly, the benchmarks can then be built with
|
||||
```
|
||||
./build-grid <env_dir> <config>
|
||||
```
|
||||
|
||||
## Running the benchmarks
|
||||
After building the benchmarks as above you can find the binaries in
|
||||
`<env_dir>/prefix/gridbench_<config>`.
|
@ -16,18 +16,16 @@ cd "${env_dir}"
|
||||
env_dir=$(pwd -P)
|
||||
cd "${call_dir}"
|
||||
build_dir="${env_dir}/build/Grid-benchmarks/${cfg}"
|
||||
if [ -d "${build_dir}" ]; then
|
||||
echo "error: directory '${build_dir}' exists"
|
||||
exit 1
|
||||
fi
|
||||
mkdir -p "${build_dir}"
|
||||
source "${env_dir}/env.sh"
|
||||
entry=$(jq ".configs[]|select(.name==\"${cfg}\")" "${env_dir}"/grid-config.json)
|
||||
env_script=$(echo "${entry}" | jq -r ".\"env-script\"")
|
||||
cd "${build_dir}" || return
|
||||
source "${env_dir}/${env_script}"
|
||||
"${script_dir}/configure" --with-grid="${env_dir}/prefix/grid_${cfg}" \
|
||||
--prefix="${env_dir}/prefix/gridbench_${cfg}"
|
||||
if [ ! -f Makefile ]; then
|
||||
"${script_dir}/configure" --with-grid="${env_dir}/prefix/grid_${cfg}" \
|
||||
--prefix="${env_dir}/prefix/gridbench_${cfg}"
|
||||
fi
|
||||
make -j 128
|
||||
make install
|
||||
cd "${call_dir}"
|
||||
|
8
Readme.md
Normal file
8
Readme.md
Normal file
@ -0,0 +1,8 @@
|
||||
# Lattice benchmarks
|
||||
|
||||
This repository is an attempt at packaging benchmarks for various libraries used for
|
||||
lattice field theory simulations. It is currently only featuring the Grid library but
|
||||
more will be added later.
|
||||
|
||||
Libraries:
|
||||
- [Grid](https://github.com/aportelli/) - [Documentation](Grid/Readme.md)
|
Loading…
Reference in New Issue
Block a user