diff --git a/Grid/.clang-format b/Grid/.clang-format index 9156d50..9d54a25 100644 --- a/Grid/.clang-format +++ b/Grid/.clang-format @@ -6,7 +6,7 @@ BreakBeforeBraces: Allman, AllowShortIfStatementsOnASingleLine: false, IndentCaseLabels: false, - ColumnLimit: 0, + ColumnLimit: 90, AccessModifierOffset: -4, NamespaceIndentation: All, FixNamespaceComments: false, diff --git a/Grid/Benchmark_Grid.cpp b/Grid/Benchmark_Grid.cpp index fd2056e..841f26e 100644 --- a/Grid/Benchmark_Grid.cpp +++ b/Grid/Benchmark_Grid.cpp @@ -18,6 +18,7 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ +#include "Common.hpp" #include using namespace Grid; @@ -44,8 +45,7 @@ struct time_statistics mean = sum / v.size(); std::vector diff(v.size()); - std::transform(v.begin(), v.end(), diff.begin(), [=](double x) - { return x - mean; }); + std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); err = std::sqrt(sq_sum / (v.size() * (v.size() - 1))); @@ -64,11 +64,8 @@ void comms_header() << "bytes\t MB/s uni (err/min/max) \t\t MB/s bidi (err/min/max)" << std::endl; }; -Gamma::Algebra Gmu[] = { - Gamma::Algebra::GammaX, - Gamma::Algebra::GammaY, - Gamma::Algebra::GammaZ, - Gamma::Algebra::GammaT}; +Gamma::Algebra Gmu[] = {Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, Gamma::Algebra::GammaT}; struct controls { @@ -84,26 +81,52 @@ class Benchmark { int threads = GridThread::GetThreads(); - std::cout << GridLogMessage << "==================================================================================" << std::endl; - std::cout << GridLogMessage << "= Grid is setup to use " << threads << " threads" << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; + std::cout << GridLogMessage << "= Grid is setup to use " << threads << " threads" + << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; std::cout << GridLogMessage << "Grid Default Decomposition patterns\n"; - std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads() << std::endl; - std::cout << GridLogMessage << "\tMPI tasks : " << GridCmdVectorIntToString(GridDefaultMpi()) << std::endl; - std::cout << GridLogMessage << "\tvReal : " << sizeof(vReal) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vReal::Nsimd())) << std::endl; - std::cout << GridLogMessage << "\tvRealF : " << sizeof(vRealF) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vRealF::Nsimd())) << std::endl; - std::cout << GridLogMessage << "\tvRealD : " << sizeof(vRealD) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vRealD::Nsimd())) << std::endl; - std::cout << GridLogMessage << "\tvComplex : " << sizeof(vComplex) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplex::Nsimd())) << std::endl; - std::cout << GridLogMessage << "\tvComplexF : " << sizeof(vComplexF) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexF::Nsimd())) << std::endl; - std::cout << GridLogMessage << "\tvComplexD : " << sizeof(vComplexD) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd())) << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads() + << std::endl; + std::cout << GridLogMessage + << "\tMPI tasks : " << GridCmdVectorIntToString(GridDefaultMpi()) + << std::endl; + std::cout << GridLogMessage << "\tvReal : " << sizeof(vReal) * 8 << "bits ; " + << GridCmdVectorIntToString(GridDefaultSimd(4, vReal::Nsimd())) + << std::endl; + std::cout << GridLogMessage << "\tvRealF : " << sizeof(vRealF) * 8 + << "bits ; " + << GridCmdVectorIntToString(GridDefaultSimd(4, vRealF::Nsimd())) + << std::endl; + std::cout << GridLogMessage << "\tvRealD : " << sizeof(vRealD) * 8 + << "bits ; " + << GridCmdVectorIntToString(GridDefaultSimd(4, vRealD::Nsimd())) + << std::endl; + std::cout << GridLogMessage << "\tvComplex : " << sizeof(vComplex) * 8 + << "bits ; " + << GridCmdVectorIntToString(GridDefaultSimd(4, vComplex::Nsimd())) + << std::endl; + std::cout << GridLogMessage << "\tvComplexF : " << sizeof(vComplexF) * 8 + << "bits ; " + << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexF::Nsimd())) + << std::endl; + std::cout << GridLogMessage << "\tvComplexD : " << sizeof(vComplexD) * 8 + << "bits ; " + << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd())) + << std::endl; } static void Comms(void) { int Nloop = 200; int nmu = 0; - int maxlat = 32; + int maxlat = 48; Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); @@ -115,94 +138,89 @@ class Benchmark std::vector t_time(Nloop); time_statistics timestat; - std::cout << GridLogMessage << "====================================================================================================" << std::endl; - std::cout << GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in " << nmu << " dimensions" << std::endl; - std::cout << GridLogMessage << "====================================================================================================" << std::endl; - comms_header(); + std::cout << GridLogMessage + << "=======================================================================" + "=============================" + << std::endl; + std::cout << GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in " + << nmu << " dimensions" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "=============================" + << std::endl; + grid_printf("%5s %5s %15s %15s %15s %15s %15s\n", "L", "dir", "payload (B)", + "time (usec)", "rate (GB/s)", "std dev", "max"); for (int lat = 16; lat <= maxlat; lat += 8) { - // for(int Ls=8;Ls<=8;Ls*=2){ + int Ls = 12; + + Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], + lat * mpi_layout[3]}); + + GridCartesian Grid(latt_size, simd_layout, mpi_layout); + RealD Nrank = Grid._Nprocessors; + RealD Nnode = Grid.NodeCount(); + RealD ppn = Nrank / Nnode; + + std::vector xbuf(8); + std::vector rbuf(8); + uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD); + for (int d = 0; d < 8; d++) { - int Ls = 12; + xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); + rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); + } - Coordinate latt_size({lat * mpi_layout[0], - lat * mpi_layout[1], - lat * mpi_layout[2], - lat * mpi_layout[3]}); + double dbytes; - GridCartesian Grid(latt_size, simd_layout, mpi_layout); - RealD Nrank = Grid._Nprocessors; - RealD Nnode = Grid.NodeCount(); - RealD ppn = Nrank / Nnode; - - std::vector xbuf(8); - std::vector rbuf(8); - // Grid.ShmBufferFreeAll(); - uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD); - for (int d = 0; d < 8; d++) + for (int dir = 0; dir < 8; dir++) + { + int mu = dir % 4; + if (mpi_layout[mu] > 1) { - xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); - rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); - // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - } - // int ncomm; - double dbytes; - - for (int dir = 0; dir < 8; dir++) - { - int mu = dir % 4; - if (mpi_layout[mu] > 1) + std::vector times(Nloop); + for (int i = 0; i < Nloop; i++) { - std::vector times(Nloop); - for (int i = 0; i < Nloop; i++) + dbytes = 0; + double start = usecond(); + int xmit_to_rank; + int recv_from_rank; + + if (dir == mu) { - - dbytes = 0; - double start = usecond(); - int xmit_to_rank; - int recv_from_rank; - - if (dir == mu) - { - int comm_proc = 1; - Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - } - else - { - int comm_proc = mpi_layout[mu] - 1; - Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - } - Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, - (void *)&rbuf[dir][0], recv_from_rank, - bytes); - dbytes += bytes; - - double stop = usecond(); - t_time[i] = stop - start; // microseconds + int comm_proc = 1; + Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); } - timestat.statistics(t_time); + else + { + int comm_proc = mpi_layout[mu] - 1; + Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); + } + Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, + (void *)&rbuf[dir][0], recv_from_rank, bytes); + dbytes += bytes; - dbytes = dbytes * ppn; - double xbytes = dbytes * 0.5; - double bidibytes = dbytes; - - std::cout << GridLogMessage << lat << "\t" << Ls << "\t " - << bytes << " \t " - << xbytes / timestat.mean << " \t " << xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t " - << xbytes / timestat.max << " " << xbytes / timestat.min - << "\t\t" << bidibytes / timestat.mean << " " << bidibytes * timestat.err / (timestat.mean * timestat.mean) << " " - << bidibytes / timestat.max << " " << bidibytes / timestat.min << std::endl; + double stop = usecond(); + t_time[i] = stop - start; // microseconds } + timestat.statistics(t_time); + + dbytes = dbytes * ppn; + double bidibytes = 2. * dbytes; + double rate = bidibytes / (timestat.mean / 1.e6) / 1024. / 1024. / 1024.; + double rate_err = rate * timestat.err / timestat.mean; + double rate_max = rate * timestat.mean / timestat.min; + grid_printf("%5d %5d %15d %15.2f %15.2f %15.1f %15.2f\n", lat, dir, bytes, + timestat.mean, rate, rate_err, rate_max); } - for (int d = 0; d < 8; d++) - { - acceleratorFreeDevice(xbuf[d]); - acceleratorFreeDevice(rbuf[d]); - } + } + for (int d = 0; d < 8; d++) + { + acceleratorFreeDevice(xbuf[d]); + acceleratorFreeDevice(rbuf[d]); } } return; @@ -217,9 +235,15 @@ class Benchmark Coordinate simd_layout = GridDefaultSimd(Nd, vReal::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; std::cout << GridLogMessage << "= Benchmarking a*x + y bandwidth" << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; std::cout << GridLogMessage << " L " << "\t\t" << "bytes" @@ -229,7 +253,9 @@ class Benchmark << "Gflop/s" << "\t\t seconds" << "\t\tGB/s / node" << std::endl; - std::cout << GridLogMessage << "----------------------------------------------------------" << std::endl; + std::cout << GridLogMessage + << "----------------------------------------------------------" + << std::endl; // uint64_t NP; uint64_t NN; @@ -242,7 +268,8 @@ class Benchmark for (int lat = 8; lat <= lmax; lat += 8) { - Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]}); + Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], + lat * mpi_layout[3]}); int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3]; GridCartesian Grid(latt_size, simd_layout, mpi_layout); @@ -273,9 +300,10 @@ class Benchmark double flops = vol * Nvec * 2; // mul,add double bytes = 3.0 * vol * Nvec * sizeof(Real); - std::cout << GridLogMessage << std::setprecision(3) - << lat << "\t\t" << bytes << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" << (stop - start) / 1000. / 1000. - << "\t\t" << bytes / time / NN << std::endl; + std::cout << GridLogMessage << std::setprecision(3) << lat << "\t\t" << bytes + << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" + << (stop - start) / 1000. / 1000. << "\t\t" << bytes / time / NN + << std::endl; } }; @@ -287,9 +315,15 @@ class Benchmark Coordinate simd_layout = GridDefaultSimd(Nd, vComplexF::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; std::cout << GridLogMessage << "= Benchmarking z = y*x SU(4) bandwidth" << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; std::cout << GridLogMessage << " L " << "\t\t" << "bytes" @@ -299,7 +333,9 @@ class Benchmark << "Gflop/s" << "\t\t seconds" << "\t\tGB/s / node" << std::endl; - std::cout << GridLogMessage << "----------------------------------------------------------" << std::endl; + std::cout << GridLogMessage + << "----------------------------------------------------------" + << std::endl; uint64_t NN; @@ -310,7 +346,8 @@ class Benchmark for (int lat = 8; lat <= lmax; lat += 8) { - Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]}); + Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], + lat * mpi_layout[3]}); int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3]; GridCartesian Grid(latt_size, simd_layout, mpi_layout); @@ -337,9 +374,10 @@ class Benchmark double flops = vol * Nc4 * Nc4 * (6 + (Nc4 - 1) * 8); // mul,add double bytes = 3.0 * vol * Nc4 * Nc4 * 2 * sizeof(RealF); - std::cout << GridLogMessage << std::setprecision(3) - << lat << "\t\t" << bytes << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" << (stop - start) / 1000. / 1000. - << "\t\t" << bytes / time / NN << std::endl; + std::cout << GridLogMessage << std::setprecision(3) << lat << "\t\t" << bytes + << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" + << (stop - start) / 1000. / 1000. << "\t\t" << bytes / time / NN + << std::endl; } }; @@ -360,31 +398,41 @@ class Benchmark Coordinate mpi = GridDefaultMpi(); assert(mpi.size() == 4); Coordinate local({L, L, L, L}); - Coordinate latt4({local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]}); + Coordinate latt4( + {local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]}); - GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(latt4, - GridDefaultSimd(Nd, vComplex::Nsimd()), - GridDefaultMpi()); + GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid( + latt4, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()); uint64_t NP = TmpGrid->RankCount(); uint64_t NN = TmpGrid->NodeCount(); NN_global = NN; uint64_t SHM = NP / NN; ///////// Welcome message //////////// - std::cout << GridLogMessage << "==================================================================================" << std::endl; - std::cout << GridLogMessage << "Benchmark DWF on " << L << "^4 local volume " << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; + std::cout << GridLogMessage << "Benchmark DWF on " << L << "^4 local volume " + << std::endl; std::cout << GridLogMessage << "* Nc : " << Nc << std::endl; - std::cout << GridLogMessage << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl; + std::cout << GridLogMessage + << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl; std::cout << GridLogMessage << "* Ls : " << Ls << std::endl; std::cout << GridLogMessage << "* ranks : " << NP << std::endl; std::cout << GridLogMessage << "* nodes : " << NN << std::endl; std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl; - std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) << std::endl; + std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) + << std::endl; std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; ///////// Lattice Init //////////// - GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid( + latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); @@ -423,10 +471,14 @@ class Benchmark std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S "); controls Cases[] = { - {WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicyConcurrent}, - {WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicyConcurrent}, - {WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicySequential}, - {WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicySequential}}; + {WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, + CartesianCommunicator::CommunicatorPolicyConcurrent}, + {WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, + CartesianCommunicator::CommunicatorPolicyConcurrent}, + {WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, + CartesianCommunicator::CommunicatorPolicySequential}, + {WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, + CartesianCommunicator::CommunicatorPolicySequential}}; for (int c = 0; c < num_cases; c++) { @@ -435,7 +487,10 @@ class Benchmark WilsonKernelsStatic::Opt = Cases[c].Opt; CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "===================================================================" + "===============" + << std::endl; if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric) std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl; if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute) @@ -443,7 +498,10 @@ class Benchmark if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl; std::cout << GridLogMessage << "* SINGLE precision " << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "===================================================================" + "===============" + << std::endl; int nwarm = 10; double t0 = usecond(); @@ -458,7 +516,8 @@ class Benchmark FGrid->Broadcast(0, &ncall, sizeof(ncall)); - // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<RankCount(); uint64_t NN = TmpGrid->NodeCount(); NN_global = NN; uint64_t SHM = NP / NN; ///////// Welcome message //////////// - std::cout << GridLogMessage << "==================================================================================" << std::endl; - std::cout << GridLogMessage << "Benchmark ImprovedStaggered on " << L << "^4 local volume " << std::endl; - std::cout << GridLogMessage << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; + std::cout << GridLogMessage << "Benchmark ImprovedStaggered on " << L + << "^4 local volume " << std::endl; + std::cout << GridLogMessage + << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl; std::cout << GridLogMessage << "* ranks : " << NP << std::endl; std::cout << GridLogMessage << "* nodes : " << NN << std::endl; std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl; - std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) << std::endl; + std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) + << std::endl; std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; ///////// Lattice Init //////////// - GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid( + latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid); ///////// RNG Init //////////// @@ -604,10 +688,14 @@ class Benchmark std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S "); controls Cases[] = { - {StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicyConcurrent}, - {StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicyConcurrent}, - {StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicySequential}, - {StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicySequential}}; + {StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, + CartesianCommunicator::CommunicatorPolicyConcurrent}, + {StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, + CartesianCommunicator::CommunicatorPolicyConcurrent}, + {StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, + CartesianCommunicator::CommunicatorPolicySequential}, + {StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, + CartesianCommunicator::CommunicatorPolicySequential}}; for (int c = 0; c < num_cases; c++) { @@ -616,15 +704,22 @@ class Benchmark StaggeredKernelsStatic::Opt = Cases[c].Opt; CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "===================================================================" + "===============" + << std::endl; if (StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric) - std::cout << GridLogMessage << "* Using GENERIC Nc StaggeredKernels" << std::endl; + std::cout << GridLogMessage << "* Using GENERIC Nc StaggeredKernels" + << std::endl; if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute) std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl; if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsThenCompute) std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl; std::cout << GridLogMessage << "* SINGLE precision " << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "===================================================================" + "===============" + << std::endl; int nwarm = 10; double t0 = usecond(); @@ -639,7 +734,8 @@ class Benchmark FGrid->Broadcast(0, &ncall, sizeof(ncall)); - // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<({8, 2, 2, 2}); #else @@ -709,9 +820,11 @@ int main(int argc, char **argv) #endif Benchmark::Decomposition(); - int do_su4 = 1; - int do_memory = 1; + int do_su4 = 0; + int do_memory = 0; int do_comms = 1; + int do_flops = 0; + int Ls = 1; int sel = 4; std::vector L_list({8, 12, 16, 24, 32}); @@ -721,84 +834,170 @@ int main(int argc, char **argv) std::vector dwf4; std::vector staggered; - int Ls = 1; - std::cout << GridLogMessage << "==================================================================================" << std::endl; - std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; - for (int l = 0; l < L_list.size(); l++) + if (do_flops) { - wilson.push_back(Benchmark::DWF(Ls, L_list[l])); - } + Ls = 1; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + for (int l = 0; l < L_list.size(); l++) + { + wilson.push_back(Benchmark::DWF(Ls, L_list[l])); + } - Ls = 12; - std::cout << GridLogMessage << "==================================================================================" << std::endl; - std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; - for (int l = 0; l < L_list.size(); l++) - { - double result = Benchmark::DWF(Ls, L_list[l]); - dwf4.push_back(result); - } + Ls = 12; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + for (int l = 0; l < L_list.size(); l++) + { + double result = Benchmark::DWF(Ls, L_list[l]); + dwf4.push_back(result); + } - std::cout << GridLogMessage << "==================================================================================" << std::endl; - std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; - for (int l = 0; l < L_list.size(); l++) - { - double result = Benchmark::Staggered(L_list[l]); - staggered.push_back(result); - } + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" + << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + for (int l = 0; l < L_list.size(); l++) + { + double result = Benchmark::Staggered(L_list[l]); + staggered.push_back(result); + } - std::cout << GridLogMessage << "==================================================================================" << std::endl; - std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; - std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl; - for (int l = 0; l < L_list.size(); l++) - { - std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t " << dwf4[l] << " \t\t " << staggered[l] << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl; + for (int l = 0; l < L_list.size(); l++) + { + std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t " + << dwf4[l] << " \t\t " << staggered[l] << std::endl; + } + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; } - std::cout << GridLogMessage << "==================================================================================" << std::endl; int NN = NN_global; if (do_memory) { - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; std::cout << GridLogMessage << " Memory benchmark " << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; Benchmark::Memory(); } if (do_su4) { - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; std::cout << GridLogMessage << " SU(4) benchmark " << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; Benchmark::SU4(); } if (do_comms) { - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; std::cout << GridLogMessage << " Communications benchmark " << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout << GridLogMessage + << "=======================================================================" + "===========" + << std::endl; Benchmark::Comms(); } - std::cout << GridLogMessage << "==================================================================================" << std::endl; - std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; - std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl; - for (int l = 0; l < L_list.size(); l++) + if (do_flops) { - std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t " << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl; - } - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl; + for (int l = 0; l < L_list.size(); l++) + { + std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t " + << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl; + } + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; - std::cout << GridLogMessage << "==================================================================================" << std::endl; - std::cout << GridLogMessage << " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN << " Mflop/s per node" << std::endl; - std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+" << dwf4[selm1] / NN << ") " << std::endl; - std::cout << std::setprecision(3); - std::cout << GridLogMessage << "==================================================================================" << std::endl; + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + std::cout << GridLogMessage + << " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN + << " Mflop/s per node" << std::endl; + std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+" + << dwf4[selm1] / NN << ") " << std::endl; + std::cout << std::setprecision(3); + std::cout + << GridLogMessage + << "=========================================================================" + "=========" + << std::endl; + } Grid_finalize(); } diff --git a/Grid/Benchmark_IO.cpp b/Grid/Benchmark_IO.cpp index da6b78f..96ef3e3 100644 --- a/Grid/Benchmark_IO.cpp +++ b/Grid/Benchmark_IO.cpp @@ -32,23 +32,13 @@ along with this program. If not, see . #ifdef HAVE_LIME using namespace Grid; -std::string filestem(const int l) -{ - return "iobench_l" + std::to_string(l); -} +std::string filestem(const int l) { return "iobench_l" + std::to_string(l); } -int vol(const int i) -{ - return BENCH_IO_LMIN + 2 * i; -} +int vol(const int i) { return BENCH_IO_LMIN + 2 * i; } -int volInd(const int l) -{ - return (l - BENCH_IO_LMIN) / 2; -} +int volInd(const int l) { return (l - BENCH_IO_LMIN) / 2; } -template -void stats(Mat &mean, Mat &stdDev, const std::vector &data) +template void stats(Mat &mean, Mat &stdDev, const std::vector &data) { auto nr = data[0].rows(), nc = data[0].cols(); Eigen::MatrixXd sqSum(nr, nc); @@ -66,11 +56,11 @@ void stats(Mat &mean, Mat &stdDev, const std::vector &data) mean /= n; } -#define grid_printf(...) \ - { \ - char _buf[1024]; \ - sprintf(_buf, __VA_ARGS__); \ - MSG << _buf; \ +#define grid_printf(...) \ + { \ + char _buf[1024]; \ + sprintf(_buf, __VA_ARGS__); \ + MSG << _buf; \ } enum @@ -173,47 +163,49 @@ int main(int argc, char **argv) MSG << "SUMMARY" << std::endl; MSG << BIGSEP << std::endl; MSG << "Summary of individual results (all results in MB/s)." << std::endl; - MSG << "Every second colum gives the standard deviation of the previous column." << std::endl; + MSG << "Every second colum gives the standard deviation of the previous column." + << std::endl; MSG << std::endl; - grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n", - "L", "std read", "std dev", "std write", "std dev", - "Grid read", "std dev", "Grid write", "std dev"); + grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n", "L", "std read", "std dev", + "std write", "std dev", "Grid read", "std dev", "Grid write", "std dev"); for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2) { - grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", - l, mean(volInd(l), sRead), stdDev(volInd(l), sRead), - mean(volInd(l), sWrite), stdDev(volInd(l), sWrite), - mean(volInd(l), gRead), stdDev(volInd(l), gRead), - mean(volInd(l), gWrite), stdDev(volInd(l), gWrite)); + grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", l, + mean(volInd(l), sRead), stdDev(volInd(l), sRead), mean(volInd(l), sWrite), + stdDev(volInd(l), sWrite), mean(volInd(l), gRead), + stdDev(volInd(l), gRead), mean(volInd(l), gWrite), + stdDev(volInd(l), gWrite)); } MSG << std::endl; - MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)" << std::endl; + MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)" + << std::endl; MSG << std::endl; - grid_printf("%4s %12s %12s %12s %12s\n", - "L", "std read", "std write", "Grid read", "Grid write"); + grid_printf("%4s %12s %12s %12s %12s\n", "L", "std read", "std write", "Grid read", + "Grid write"); for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2) { - grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n", - l, rob(volInd(l), sRead), rob(volInd(l), sWrite), - rob(volInd(l), gRead), rob(volInd(l), gWrite)); + grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n", l, rob(volInd(l), sRead), + rob(volInd(l), sWrite), rob(volInd(l), gRead), rob(volInd(l), gWrite)); } MSG << std::endl; - MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX << "^4 (all results in MB/s)." << std::endl; - MSG << "Every second colum gives the standard deviation of the previous column." << std::endl; + MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX + << "^4 (all results in MB/s)." << std::endl; + MSG << "Every second colum gives the standard deviation of the previous column." + << std::endl; MSG << std::endl; - grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n", - "std read", "std dev", "std write", "std dev", - "Grid read", "std dev", "Grid write", "std dev"); - grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", - avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite), - avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite)); + grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n", "std read", "std dev", + "std write", "std dev", "Grid read", "std dev", "Grid write", "std dev"); + grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", avMean(sRead), + avStdDev(sRead), avMean(sWrite), avStdDev(sWrite), avMean(gRead), + avStdDev(gRead), avMean(gWrite), avStdDev(gWrite)); MSG << std::endl; - MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)" << std::endl; + MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)" + << std::endl; MSG << std::endl; - grid_printf("%12s %12s %12s %12s\n", - "std read", "std write", "Grid read", "Grid write"); - grid_printf("%12.1f %12.1f %12.1f %12.1f\n", - avRob(sRead), avRob(sWrite), avRob(gRead), avRob(gWrite)); + grid_printf("%12s %12s %12s %12s\n", "std read", "std write", "Grid read", + "Grid write"); + grid_printf("%12.1f %12.1f %12.1f %12.1f\n", avRob(sRead), avRob(sWrite), avRob(gRead), + avRob(gWrite)); Grid_finalize(); diff --git a/Grid/Benchmark_IO.hpp b/Grid/Benchmark_IO.hpp index 24e2214..d71e943 100644 --- a/Grid/Benchmark_IO.hpp +++ b/Grid/Benchmark_IO.hpp @@ -20,9 +20,9 @@ along with this program. If not, see . #include #define MSG std::cout << GridLogMessage -#define SEP \ +#define SEP \ "-----------------------------------------------------------------------------" -#define BIGSEP \ +#define BIGSEP \ "=============================================================================" #ifdef HAVE_LIME @@ -36,16 +36,15 @@ namespace Grid // AP 06/10/2020: Standard C version in case one is suspicious of the C++ API // - // template - // void stdWrite(const std::string filestem, Field &vec) + // template void stdWrite(const std::string filestem, Field &vec) // { - // std::string rankStr = std::to_string(vec.Grid()->ThisRank()); - // std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb"); - // size_t size; - // uint32_t crc; + // std::string rankStr = std::to_string(vec.Grid()->ThisRank()); + // std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb"); + // size_t size; + // uint32_t crc; // GridStopWatch ioWatch, crcWatch; - // size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object); + // size = vec.Grid()->lSites() * sizeof(typename Field::scalar_object); // autoView(vec_v, vec, CpuRead); // crcWatch.Start(); // crc = GridChecksum::crc32(vec_v.cpu_ptr, size); @@ -53,36 +52,39 @@ namespace Grid // crcWatch.Stop(); // MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl; // ioWatch.Start(); - // std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file); + // std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), + // vec.Grid()->lSites(), file); // ioWatch.Stop(); // std::fclose(file); // size *= vec.Grid()->ProcessorCount(); // auto &p = BinaryIO::lastPerf; - // p.size = size; - // p.time = ioWatch.useconds(); - // p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6); + // p.size = size; + // p.time = ioWatch.useconds(); + // p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6); // MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed() - // << ", " << p.mbytesPerSecond << " MB/s" << std::endl; + // << ", + // " + // << p.mbytesPerSecond << " MB/s" << std::endl; // MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl; // } - // - // template - // void stdRead(Field &vec, const std::string filestem) + + // template void stdRead(Field &vec, const std::string filestem) // { - // std::string rankStr = std::to_string(vec.Grid()->ThisRank()); - // std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb"); - // size_t size; - // uint32_t crcRead, crcData; + // std::string rankStr = std::to_string(vec.Grid()->ThisRank()); + // std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb"); + // size_t size; + // uint32_t crcRead, crcData; // GridStopWatch ioWatch, crcWatch; - // size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object); + // size = vec.Grid()->lSites() * sizeof(typename Field::scalar_object); // crcWatch.Start(); // std::fread(&crcRead, sizeof(uint32_t), 1, file); // crcWatch.Stop(); // { // autoView(vec_v, vec, CpuWrite); // ioWatch.Start(); - // std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file); + // std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), + // vec.Grid()->lSites(), file); // ioWatch.Stop(); // std::fclose(file); // } @@ -96,19 +98,19 @@ namespace Grid // assert(crcData == crcRead); // size *= vec.Grid()->ProcessorCount(); // auto &p = BinaryIO::lastPerf; - // p.size = size; - // p.time = ioWatch.useconds(); - // p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6); - // MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() - // << ", " << p.mbytesPerSecond << " MB/s" << std::endl; + // p.size = size; + // p.time = ioWatch.useconds(); + // p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6); + // MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() << ", " + // << p.mbytesPerSecond << " MB/s" << std::endl; // MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl; // } - template - void stdWrite(const std::string filestem, Field &vec) + template void stdWrite(const std::string filestem, Field &vec) { std::string rankStr = std::to_string(vec.Grid()->ThisRank()); - std::ofstream file(filestem + "." + rankStr + ".bin", std::ios::out | std::ios::binary); + std::ofstream file(filestem + "." + rankStr + ".bin", + std::ios::out | std::ios::binary); size_t size, sizec; uint32_t crc; GridStopWatch ioWatch, crcWatch; @@ -130,16 +132,16 @@ namespace Grid p.size = size; p.time = ioWatch.useconds(); p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6); - MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed() - << ", " << p.mbytesPerSecond << " MB/s" << std::endl; + MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed() << ", " + << p.mbytesPerSecond << " MB/s" << std::endl; MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl; } - template - void stdRead(Field &vec, const std::string filestem) + template void stdRead(Field &vec, const std::string filestem) { std::string rankStr = std::to_string(vec.Grid()->ThisRank()); - std::ifstream file(filestem + "." + rankStr + ".bin", std::ios::in | std::ios::binary); + std::ifstream file(filestem + "." + rankStr + ".bin", + std::ios::in | std::ios::binary); size_t size, sizec; uint32_t crcRead, crcData; GridStopWatch ioWatch, crcWatch; @@ -168,13 +170,12 @@ namespace Grid p.size = size; p.time = ioWatch.useconds(); p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6); - MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() - << ", " << p.mbytesPerSecond << " MB/s" << std::endl; + MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() << ", " + << p.mbytesPerSecond << " MB/s" << std::endl; MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl; } - template - void limeWrite(const std::string filestem, Field &vec) + template void limeWrite(const std::string filestem, Field &vec) { emptyUserRecord record; ScidacWriter binWriter(vec.Grid()->IsBoss()); @@ -184,8 +185,7 @@ namespace Grid binWriter.close(); } - template - void limeRead(Field &vec, const std::string filestem) + template void limeRead(Field &vec, const std::string filestem) { emptyUserRecord record; ScidacReader binReader; @@ -225,12 +225,13 @@ namespace Grid template void writeBenchmark(const Coordinate &latt, const std::string filename, - const WriterFn &write, - const unsigned int Ls = 1, const bool rb = false) + const WriterFn &write, const unsigned int Ls = 1, + const bool rb = false) { auto mpi = GridDefaultMpi(); auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd()); - std::shared_ptr gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi)); + std::shared_ptr gBasePt( + SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi)); std::shared_ptr gPt; std::random_device rd; @@ -251,12 +252,13 @@ namespace Grid template void readBenchmark(const Coordinate &latt, const std::string filename, - const ReaderFn &read, - const unsigned int Ls = 1, const bool rb = false) + const ReaderFn &read, const unsigned int Ls = 1, + const bool rb = false) { auto mpi = GridDefaultMpi(); auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd()); - std::shared_ptr gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi)); + std::shared_ptr gBasePt( + SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi)); std::shared_ptr gPt; makeGrid(gPt, gBasePt, Ls, rb); diff --git a/Grid/Benchmark_comms_host_device.cpp b/Grid/Benchmark_comms_host_device.cpp index 5a4aae4..e213859 100644 --- a/Grid/Benchmark_comms_host_device.cpp +++ b/Grid/Benchmark_comms_host_device.cpp @@ -34,8 +34,7 @@ struct time_statistics mean = sum / v.size(); std::vector diff(v.size()); - std::transform(v.begin(), v.end(), diff.begin(), [=](double x) - { return x - mean; }); + std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); err = std::sqrt(sq_sum / (v.size() * (v.size() - 1))); @@ -50,8 +49,7 @@ void header() std::cout << GridLogMessage << " L " << "\t" << " Ls " - << "\t" - << std::setw(11) << "bytes\t\t" + << "\t" << std::setw(11) << "bytes\t\t" << "MB/s uni" << "\t" << "MB/s bidi" << std::endl; @@ -64,7 +62,8 @@ int main(int argc, char **argv) Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); int threads = GridThread::GetThreads(); - std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" + << std::endl; int Nloop = 250; int nmu = 0; @@ -73,13 +72,21 @@ int main(int argc, char **argv) if (mpi_layout[mu] > 1) nmu++; - std::cout << GridLogMessage << "Number of iterations to average: " << Nloop << std::endl; + std::cout << GridLogMessage << "Number of iterations to average: " << Nloop + << std::endl; std::vector t_time(Nloop); // time_statistics timestat; - std::cout << GridLogMessage << "====================================================================================================" << std::endl; - std::cout << GridLogMessage << "= Benchmarking sequential halo exchange from host memory " << std::endl; - std::cout << GridLogMessage << "====================================================================================================" << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; + std::cout << GridLogMessage + << "= Benchmarking sequential halo exchange from host memory " << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; header(); for (int lat = 8; lat <= maxlat; lat += 4) @@ -87,9 +94,7 @@ int main(int argc, char **argv) for (int Ls = 8; Ls <= 8; Ls *= 2) { - Coordinate latt_size({lat * mpi_layout[0], - lat * mpi_layout[1], - lat * mpi_layout[2], + Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]}); GridCartesian Grid(latt_size, simd_layout, mpi_layout); @@ -127,22 +132,16 @@ int main(int argc, char **argv) { std::vector requests; Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu][0], - xmit_to_rank, - (void *)&rbuf[mu][0], - recv_from_rank, - bytes); + Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank, + (void *)&rbuf[mu][0], recv_from_rank, bytes); } comm_proc = mpi_layout[mu] - 1; { std::vector requests; Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], - xmit_to_rank, - (void *)&rbuf[mu + 4][0], - recv_from_rank, - bytes); + Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank, + (void *)&rbuf[mu + 4][0], recv_from_rank, bytes); } } Grid.Barrier(); @@ -154,17 +153,24 @@ int main(int argc, char **argv) double bidibytes = xbytes + rbytes; std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t" - << std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " " - << std::right << xbytes / mean << " " + << std::setw(11) << bytes << std::fixed << std::setprecision(1) + << std::setw(7) << " " << std::right << xbytes / mean << " " << "\t\t" << std::setw(7) << bidibytes / mean << std::endl; } } } } - std::cout << GridLogMessage << "====================================================================================================" << std::endl; - std::cout << GridLogMessage << "= Benchmarking sequential halo exchange from GPU memory " << std::endl; - std::cout << GridLogMessage << "====================================================================================================" << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; + std::cout << GridLogMessage + << "= Benchmarking sequential halo exchange from GPU memory " << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; header(); for (int lat = 8; lat <= maxlat; lat += 4) @@ -172,9 +178,7 @@ int main(int argc, char **argv) for (int Ls = 8; Ls <= 8; Ls *= 2) { - Coordinate latt_size({lat * mpi_layout[0], - lat * mpi_layout[1], - lat * mpi_layout[2], + Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]}); GridCartesian Grid(latt_size, simd_layout, mpi_layout); @@ -212,22 +216,16 @@ int main(int argc, char **argv) { std::vector requests; Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu][0], - xmit_to_rank, - (void *)&rbuf[mu][0], - recv_from_rank, - bytes); + Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank, + (void *)&rbuf[mu][0], recv_from_rank, bytes); } comm_proc = mpi_layout[mu] - 1; { std::vector requests; Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], - xmit_to_rank, - (void *)&rbuf[mu + 4][0], - recv_from_rank, - bytes); + Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank, + (void *)&rbuf[mu + 4][0], recv_from_rank, bytes); } } Grid.Barrier(); @@ -239,8 +237,8 @@ int main(int argc, char **argv) double bidibytes = xbytes + rbytes; std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t" - << std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " " - << std::right << xbytes / mean << " " + << std::setw(11) << bytes << std::fixed << std::setprecision(1) + << std::setw(7) << " " << std::right << xbytes / mean << " " << "\t\t" << std::setw(7) << bidibytes / mean << std::endl; } } @@ -253,9 +251,15 @@ int main(int argc, char **argv) } } - std::cout << GridLogMessage << "====================================================================================================" << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; std::cout << GridLogMessage << "= All done; Bye Bye" << std::endl; - std::cout << GridLogMessage << "====================================================================================================" << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; Grid_finalize(); } diff --git a/Grid/Benchmark_dwf_fp32.cpp b/Grid/Benchmark_dwf_fp32.cpp index ee2dd2d..c0fcf7c 100644 --- a/Grid/Benchmark_dwf_fp32.cpp +++ b/Grid/Benchmark_dwf_fp32.cpp @@ -30,8 +30,7 @@ along with this program. If not, see . using namespace std; using namespace Grid; -template -struct scal +template struct scal { d internal; }; @@ -69,13 +68,11 @@ int main(int argc, char **argv) json["single_site_flops"] = single_site_flops; GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid( - GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), - GridDefaultMpi()); - GridRedBlackCartesian *UrbGrid = - SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); - GridRedBlackCartesian *FrbGrid = - SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); json["grid"] = FGrid->FullDimensions().toVector(); json["local_grid"] = FGrid->LocalDimensions().toVector(); @@ -83,11 +80,10 @@ int main(int argc, char **argv) std::cout << GridLogMessage << "Making s innermost grids" << std::endl; GridCartesian *sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(), GridDefaultMpi()); - GridRedBlackCartesian *sUrbGrid = - SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); + + GridRedBlackCartesian *sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); GridCartesian *sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls, UGrid); - GridRedBlackCartesian *sFrbGrid = - SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls, UGrid); + GridRedBlackCartesian *sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls, UGrid); std::vector seeds4({1, 2, 3, 4}); std::vector seeds5({5, 6, 7, 8}); @@ -150,8 +146,7 @@ int main(int argc, char **argv) { U[mu] = PeekIndex(Umu, mu); } - std::cout << GridLogMessage << "Setting up Cshift based reference " - << std::endl; + std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl; if (1) { @@ -200,54 +195,45 @@ int main(int argc, char **argv) json["ranks"] = NP; json["nodes"] = NN; - std::cout - << GridLogMessage - << "*****************************************************************" - << std::endl; - std::cout - << GridLogMessage - << "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" - << std::endl; - std::cout - << GridLogMessage - << "*****************************************************************" - << std::endl; - std::cout - << GridLogMessage - << "*****************************************************************" - << std::endl; std::cout << GridLogMessage - << "* Benchmarking DomainWallFermionR::Dhop " + << "*****************************************************************" << std::endl; - std::cout << GridLogMessage << "* Vectorising space-time by " - << vComplexF::Nsimd() << std::endl; - std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF) - << " B" << std::endl; + std::cout << GridLogMessage + << "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" + << std::endl; + std::cout << GridLogMessage + << "*****************************************************************" + << std::endl; + std::cout << GridLogMessage + << "*****************************************************************" + << std::endl; + std::cout << GridLogMessage + << "* Benchmarking DomainWallFermionR::Dhop " << std::endl; + std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd() + << std::endl; + std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF) << " B" + << std::endl; + if (sizeof(RealF) == 4) std::cout << GridLogMessage << "* SINGLE precision " << std::endl; if (sizeof(RealF) == 8) std::cout << GridLogMessage << "* DOUBLE precision " << std::endl; #ifdef GRID_OMP if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute) - std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" - << std::endl; + std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl; if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) - std::cout << GridLogMessage << "* Using sequential comms compute" - << std::endl; + std::cout << GridLogMessage << "* Using sequential comms compute" << std::endl; #endif if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric) - std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" - << std::endl; + std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl; if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) - std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" - << std::endl; + std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl; if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm) - std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" - << std::endl; - std::cout - << GridLogMessage - << "*****************************************************************" - << std::endl; + + std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl; + std::cout << GridLogMessage + << "*****************************************************************" + << std::endl; DomainWallFermionF Dw(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5); int ncall = 300; @@ -277,16 +263,13 @@ int main(int argc, char **argv) auto simdwidth = sizeof(vComplex); // RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors - double data_rf = volume * ((2 * Nd + 1) * Nd * Nc + 2 * Nd * Nc * Nc) * - simdwidth / nsimd * ncall / (1024. * 1024. * 1024.); + double data_rf = volume * ((2 * Nd + 1) * Nd * Nc + 2 * Nd * Nc * Nc) * simdwidth / + nsimd * ncall / (1024. * 1024. * 1024.); // mem: Nd Wilson * Ls, Nd gauge, Nc colors double data_mem = - (volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) * - simdwidth / nsimd * ncall / (1024. * 1024. * 1024.); - - std::cout << GridLogMessage << "Called Dw " << ncall << " times in " - << t1 - t0 << " us" << std::endl; + (volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) * simdwidth / + nsimd * ncall / (1024. * 1024. * 1024.); json["Dw"]["calls"] = ncall; json["Dw"]["time"] = t1 - t0; @@ -296,15 +279,16 @@ int main(int argc, char **argv) json["Dw"]["RF"] = 1000000. * data_rf / ((t1 - t0)); json["Dw"]["mem"] = 1000000. * data_mem / ((t1 - t0)); - // std::cout<Barrier(); @@ -478,12 +455,12 @@ int main(int argc, char **argv) json["Deo"]["mflops_per_rank"] = flops / (t1 - t0) / NP; json["Deo"]["mflops_per_node"] = flops / (t1 - t0) / NN; - std::cout << GridLogMessage << "Deo mflop/s = " << flops / (t1 - t0) + std::cout << GridLogMessage << "Deo mflop/s = " << flops / (t1 - t0) << std::endl; + std::cout << GridLogMessage << "Deo mflop/s per rank " << flops / (t1 - t0) / NP << std::endl; - std::cout << GridLogMessage << "Deo mflop/s per rank " - << flops / (t1 - t0) / NP << std::endl; - std::cout << GridLogMessage << "Deo mflop/s per node " - << flops / (t1 - t0) / NN << std::endl; + std::cout << GridLogMessage << "Deo mflop/s per node " << flops / (t1 - t0) / NN + << std::endl; + Dw.Report(); } Dw.DhopEO(src_o, r_e, DaggerNo); @@ -510,18 +487,16 @@ int main(int argc, char **argv) pickCheckerboard(Even, src_e, err); pickCheckerboard(Odd, src_o, err); - std::cout << GridLogMessage << "norm diff even " << norm2(src_e) - << std::endl; - std::cout << GridLogMessage << "norm diff odd " << norm2(src_o) - << std::endl; + std::cout << GridLogMessage << "norm diff even " << norm2(src_e) << std::endl; + std::cout << GridLogMessage << "norm diff odd " << norm2(src_o) << std::endl; assert(norm2(src_e) < 1.0e-4); assert(norm2(src_o) < 1.0e-4); if (!json_filename.empty()) { - std::cout << GridLogMessage << "writing benchmark results to " - << json_filename << std::endl; + std::cout << GridLogMessage << "writing benchmark results to " << json_filename + << std::endl; int me = 0; MPI_Comm_rank(MPI_COMM_WORLD, &me); diff --git a/Grid/Common.hpp b/Grid/Common.hpp new file mode 100644 index 0000000..ccae02c --- /dev/null +++ b/Grid/Common.hpp @@ -0,0 +1,36 @@ +/* +Copyright © 2022 Antonin Portelli + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +#ifndef Grid_Benchmarks_Common_hpp_ +#define Grid_Benchmarks_Common_hpp_ + +#ifndef GRID_MSG +#define GRID_MSG std::cout << GridLogMessage +#endif + +#ifndef GRID_MSG_MAXSIZE +#define GRID_MSG_MAXSIZE 1024 +#endif + +#define grid_printf(...) \ + { \ + char _buf[GRID_MSG_MAXSIZE]; \ + snprintf(_buf, GRID_MSG_MAXSIZE, __VA_ARGS__); \ + GRID_MSG << _buf; \ + } + +#endif // Grid_Benchmarks_Common_hpp_ diff --git a/Grid/Readme.md b/Grid/Readme.md new file mode 100644 index 0000000..10d6bbd --- /dev/null +++ b/Grid/Readme.md @@ -0,0 +1,69 @@ +# Grid benchmarks + +This folder contains benchmarks for the [Grid](https://github.com/aportelli/) library. +The benchmarks can be summarised as follows + +- `Benchmark_Grid`: This benchmark measure floating point performances for various fermion +matrices, as well as bandwidth measurement for different operations. Measurements are +performed for a fixed range of problem sizes. + +## TL;DR +Build and install Grid, all dependencies, and the benchmark with +```bash +systems//bootstrap-env.sh # build dependencies, takes a long time +./build-grid.sh # build Grid +./build-benchmark.sh # build benchmarks +``` +where `` is an arbitrary directory where every product will be stored, `` +is a sub-directory of `systems` containing system-specific scripts +(an existing preset or your own), and finally `` is the name of a build config +in `systems//grid-config.json`. After a successful execution the benchmark binaries +will be in `/prefix/gridbench_`. + +## Environment setup +A complete runtime environnement can be deploy using scripts from this repository. System-specific scripts are in the `systems` directory. + +You should first deploy the environment for the specific system you are using, for example +```bash +systems/tursa/bootstrap-env.sh ./env +``` +will deploy the relevant environment for the [Tursa](https://www.epcc.ed.ac.uk/hpc-services/dirac-tursa-gpu) supercomputer in `./env`. This step might compile from source a large set +of packages, and might take some time to complete. + +After that, the environment directory (`./env` in the example above) will contain a `env.sh` file that need to be sourced to activate the environment +```bash +source ./env/env.sh +``` +Additional scripts `env-*.sh` can be sourced after to activate more specific environments, +this should be done after sourcing `env.sh` as above. + +## Building the benchmarks +The environnement directory contains a `grid-config.json` file specifying compilation flag +configurations for Grid (please see Grid's repository for documentation). All entries have +the form +```json +{ + "name": "foo", // name of the configuration + "env-script": "bar.sh", // script to source before building + // (path relative to the environment directory) + "commit": "...", // Grid commit to use + // (anything that can be an argument of git checkout) + "config-options": "..." // options to pass to the configure script, + "env" : { // environment variables + "VAR": "value" // export VAR="value" before building + } +} +``` +Grid can then be built with +``` +./build-grid.sh +``` +where `` is the environment directory and `` is the build config name in +`grid-config.json`. Similarly, the benchmarks can then be built with +``` +./build-grid +``` + +## Running the benchmarks +After building the benchmarks as above you can find the binaries in +`/prefix/gridbench_`. \ No newline at end of file diff --git a/Grid/build-benchmark.sh b/Grid/build-benchmark.sh index 4b973d5..92ce696 100755 --- a/Grid/build-benchmark.sh +++ b/Grid/build-benchmark.sh @@ -16,18 +16,16 @@ cd "${env_dir}" env_dir=$(pwd -P) cd "${call_dir}" build_dir="${env_dir}/build/Grid-benchmarks/${cfg}" -if [ -d "${build_dir}" ]; then - echo "error: directory '${build_dir}' exists" - exit 1 -fi mkdir -p "${build_dir}" source "${env_dir}/env.sh" entry=$(jq ".configs[]|select(.name==\"${cfg}\")" "${env_dir}"/grid-config.json) env_script=$(echo "${entry}" | jq -r ".\"env-script\"") cd "${build_dir}" || return source "${env_dir}/${env_script}" -"${script_dir}/configure" --with-grid="${env_dir}/prefix/grid_${cfg}" \ - --prefix="${env_dir}/prefix/gridbench_${cfg}" +if [ ! -f Makefile ]; then + "${script_dir}/configure" --with-grid="${env_dir}/prefix/grid_${cfg}" \ + --prefix="${env_dir}/prefix/gridbench_${cfg}" +fi make -j 128 make install cd "${call_dir}" diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..b3aefae --- /dev/null +++ b/Readme.md @@ -0,0 +1,8 @@ +# Lattice benchmarks + +This repository is an attempt at packaging benchmarks for various libraries used for +lattice field theory simulations. It is currently only featuring the Grid library but +more will be added later. + +Libraries: +- [Grid](https://github.com/aportelli/) - [Documentation](Grid/Readme.md) \ No newline at end of file