diff --git a/Grid/.clang-format b/Grid/.clang-format index 9156d50..9d54a25 100644 --- a/Grid/.clang-format +++ b/Grid/.clang-format @@ -6,7 +6,7 @@ BreakBeforeBraces: Allman, AllowShortIfStatementsOnASingleLine: false, IndentCaseLabels: false, - ColumnLimit: 0, + ColumnLimit: 90, AccessModifierOffset: -4, NamespaceIndentation: All, FixNamespaceComments: false, diff --git a/Grid/Benchmark_Grid.cpp b/Grid/Benchmark_Grid.cpp index fd2056e..1495d13 100644 --- a/Grid/Benchmark_Grid.cpp +++ b/Grid/Benchmark_Grid.cpp @@ -122,87 +122,82 @@ class Benchmark for (int lat = 16; lat <= maxlat; lat += 8) { - // for(int Ls=8;Ls<=8;Ls*=2){ + int Ls = 12; + + Coordinate latt_size({lat * mpi_layout[0], + lat * mpi_layout[1], + lat * mpi_layout[2], + lat * mpi_layout[3]}); + + GridCartesian Grid(latt_size, simd_layout, mpi_layout); + RealD Nrank = Grid._Nprocessors; + RealD Nnode = Grid.NodeCount(); + RealD ppn = Nrank / Nnode; + + std::vector xbuf(8); + std::vector rbuf(8); + uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD); + for (int d = 0; d < 8; d++) { - int Ls = 12; + xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); + rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); + // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + } - Coordinate latt_size({lat * mpi_layout[0], - lat * mpi_layout[1], - lat * mpi_layout[2], - lat * mpi_layout[3]}); + double dbytes; - GridCartesian Grid(latt_size, simd_layout, mpi_layout); - RealD Nrank = Grid._Nprocessors; - RealD Nnode = Grid.NodeCount(); - RealD ppn = Nrank / Nnode; - - std::vector xbuf(8); - std::vector rbuf(8); - // Grid.ShmBufferFreeAll(); - uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD); - for (int d = 0; d < 8; d++) + for (int dir = 0; dir < 8; dir++) + { + int mu = dir % 4; + if (mpi_layout[mu] > 1) { - xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); - rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); - // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - } - // int ncomm; - double dbytes; - - for (int dir = 0; dir < 8; dir++) - { - int mu = dir % 4; - if (mpi_layout[mu] > 1) + std::vector times(Nloop); + for (int i = 0; i < Nloop; i++) { - std::vector times(Nloop); - for (int i = 0; i < Nloop; i++) + dbytes = 0; + double start = usecond(); + int xmit_to_rank; + int recv_from_rank; + + if (dir == mu) { - - dbytes = 0; - double start = usecond(); - int xmit_to_rank; - int recv_from_rank; - - if (dir == mu) - { - int comm_proc = 1; - Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - } - else - { - int comm_proc = mpi_layout[mu] - 1; - Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - } - Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, - (void *)&rbuf[dir][0], recv_from_rank, - bytes); - dbytes += bytes; - - double stop = usecond(); - t_time[i] = stop - start; // microseconds + int comm_proc = 1; + Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); } - timestat.statistics(t_time); + else + { + int comm_proc = mpi_layout[mu] - 1; + Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); + } + Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, + (void *)&rbuf[dir][0], recv_from_rank, + bytes); + dbytes += bytes; - dbytes = dbytes * ppn; - double xbytes = dbytes * 0.5; - double bidibytes = dbytes; - - std::cout << GridLogMessage << lat << "\t" << Ls << "\t " - << bytes << " \t " - << xbytes / timestat.mean << " \t " << xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t " - << xbytes / timestat.max << " " << xbytes / timestat.min - << "\t\t" << bidibytes / timestat.mean << " " << bidibytes * timestat.err / (timestat.mean * timestat.mean) << " " - << bidibytes / timestat.max << " " << bidibytes / timestat.min << std::endl; + double stop = usecond(); + t_time[i] = stop - start; // microseconds } + timestat.statistics(t_time); + + dbytes = dbytes * ppn; + double xbytes = dbytes * 0.5; + double bidibytes = dbytes; + + std::cout << GridLogMessage << lat << "\t" << Ls << "\t " + << bytes << " \t " + << xbytes / timestat.mean << " \t " << xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t " + << xbytes / timestat.max << " " << xbytes / timestat.min + << "\t\t" << bidibytes / timestat.mean << " " << bidibytes * timestat.err / (timestat.mean * timestat.mean) << " " + << bidibytes / timestat.max << " " << bidibytes / timestat.min << std::endl; } - for (int d = 0; d < 8; d++) - { - acceleratorFreeDevice(xbuf[d]); - acceleratorFreeDevice(rbuf[d]); - } + } + for (int d = 0; d < 8; d++) + { + acceleratorFreeDevice(xbuf[d]); + acceleratorFreeDevice(rbuf[d]); } } return; diff --git a/Grid/Benchmark_IO.cpp b/Grid/Benchmark_IO.cpp index da6b78f..96ef3e3 100644 --- a/Grid/Benchmark_IO.cpp +++ b/Grid/Benchmark_IO.cpp @@ -32,23 +32,13 @@ along with this program. If not, see . #ifdef HAVE_LIME using namespace Grid; -std::string filestem(const int l) -{ - return "iobench_l" + std::to_string(l); -} +std::string filestem(const int l) { return "iobench_l" + std::to_string(l); } -int vol(const int i) -{ - return BENCH_IO_LMIN + 2 * i; -} +int vol(const int i) { return BENCH_IO_LMIN + 2 * i; } -int volInd(const int l) -{ - return (l - BENCH_IO_LMIN) / 2; -} +int volInd(const int l) { return (l - BENCH_IO_LMIN) / 2; } -template -void stats(Mat &mean, Mat &stdDev, const std::vector &data) +template void stats(Mat &mean, Mat &stdDev, const std::vector &data) { auto nr = data[0].rows(), nc = data[0].cols(); Eigen::MatrixXd sqSum(nr, nc); @@ -66,11 +56,11 @@ void stats(Mat &mean, Mat &stdDev, const std::vector &data) mean /= n; } -#define grid_printf(...) \ - { \ - char _buf[1024]; \ - sprintf(_buf, __VA_ARGS__); \ - MSG << _buf; \ +#define grid_printf(...) \ + { \ + char _buf[1024]; \ + sprintf(_buf, __VA_ARGS__); \ + MSG << _buf; \ } enum @@ -173,47 +163,49 @@ int main(int argc, char **argv) MSG << "SUMMARY" << std::endl; MSG << BIGSEP << std::endl; MSG << "Summary of individual results (all results in MB/s)." << std::endl; - MSG << "Every second colum gives the standard deviation of the previous column." << std::endl; + MSG << "Every second colum gives the standard deviation of the previous column." + << std::endl; MSG << std::endl; - grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n", - "L", "std read", "std dev", "std write", "std dev", - "Grid read", "std dev", "Grid write", "std dev"); + grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n", "L", "std read", "std dev", + "std write", "std dev", "Grid read", "std dev", "Grid write", "std dev"); for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2) { - grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", - l, mean(volInd(l), sRead), stdDev(volInd(l), sRead), - mean(volInd(l), sWrite), stdDev(volInd(l), sWrite), - mean(volInd(l), gRead), stdDev(volInd(l), gRead), - mean(volInd(l), gWrite), stdDev(volInd(l), gWrite)); + grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", l, + mean(volInd(l), sRead), stdDev(volInd(l), sRead), mean(volInd(l), sWrite), + stdDev(volInd(l), sWrite), mean(volInd(l), gRead), + stdDev(volInd(l), gRead), mean(volInd(l), gWrite), + stdDev(volInd(l), gWrite)); } MSG << std::endl; - MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)" << std::endl; + MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)" + << std::endl; MSG << std::endl; - grid_printf("%4s %12s %12s %12s %12s\n", - "L", "std read", "std write", "Grid read", "Grid write"); + grid_printf("%4s %12s %12s %12s %12s\n", "L", "std read", "std write", "Grid read", + "Grid write"); for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2) { - grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n", - l, rob(volInd(l), sRead), rob(volInd(l), sWrite), - rob(volInd(l), gRead), rob(volInd(l), gWrite)); + grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n", l, rob(volInd(l), sRead), + rob(volInd(l), sWrite), rob(volInd(l), gRead), rob(volInd(l), gWrite)); } MSG << std::endl; - MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX << "^4 (all results in MB/s)." << std::endl; - MSG << "Every second colum gives the standard deviation of the previous column." << std::endl; + MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX + << "^4 (all results in MB/s)." << std::endl; + MSG << "Every second colum gives the standard deviation of the previous column." + << std::endl; MSG << std::endl; - grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n", - "std read", "std dev", "std write", "std dev", - "Grid read", "std dev", "Grid write", "std dev"); - grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", - avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite), - avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite)); + grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n", "std read", "std dev", + "std write", "std dev", "Grid read", "std dev", "Grid write", "std dev"); + grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", avMean(sRead), + avStdDev(sRead), avMean(sWrite), avStdDev(sWrite), avMean(gRead), + avStdDev(gRead), avMean(gWrite), avStdDev(gWrite)); MSG << std::endl; - MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)" << std::endl; + MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)" + << std::endl; MSG << std::endl; - grid_printf("%12s %12s %12s %12s\n", - "std read", "std write", "Grid read", "Grid write"); - grid_printf("%12.1f %12.1f %12.1f %12.1f\n", - avRob(sRead), avRob(sWrite), avRob(gRead), avRob(gWrite)); + grid_printf("%12s %12s %12s %12s\n", "std read", "std write", "Grid read", + "Grid write"); + grid_printf("%12.1f %12.1f %12.1f %12.1f\n", avRob(sRead), avRob(sWrite), avRob(gRead), + avRob(gWrite)); Grid_finalize(); diff --git a/Grid/Benchmark_comms_host_device.cpp b/Grid/Benchmark_comms_host_device.cpp index 5a4aae4..e213859 100644 --- a/Grid/Benchmark_comms_host_device.cpp +++ b/Grid/Benchmark_comms_host_device.cpp @@ -34,8 +34,7 @@ struct time_statistics mean = sum / v.size(); std::vector diff(v.size()); - std::transform(v.begin(), v.end(), diff.begin(), [=](double x) - { return x - mean; }); + std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); err = std::sqrt(sq_sum / (v.size() * (v.size() - 1))); @@ -50,8 +49,7 @@ void header() std::cout << GridLogMessage << " L " << "\t" << " Ls " - << "\t" - << std::setw(11) << "bytes\t\t" + << "\t" << std::setw(11) << "bytes\t\t" << "MB/s uni" << "\t" << "MB/s bidi" << std::endl; @@ -64,7 +62,8 @@ int main(int argc, char **argv) Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); int threads = GridThread::GetThreads(); - std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" + << std::endl; int Nloop = 250; int nmu = 0; @@ -73,13 +72,21 @@ int main(int argc, char **argv) if (mpi_layout[mu] > 1) nmu++; - std::cout << GridLogMessage << "Number of iterations to average: " << Nloop << std::endl; + std::cout << GridLogMessage << "Number of iterations to average: " << Nloop + << std::endl; std::vector t_time(Nloop); // time_statistics timestat; - std::cout << GridLogMessage << "====================================================================================================" << std::endl; - std::cout << GridLogMessage << "= Benchmarking sequential halo exchange from host memory " << std::endl; - std::cout << GridLogMessage << "====================================================================================================" << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; + std::cout << GridLogMessage + << "= Benchmarking sequential halo exchange from host memory " << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; header(); for (int lat = 8; lat <= maxlat; lat += 4) @@ -87,9 +94,7 @@ int main(int argc, char **argv) for (int Ls = 8; Ls <= 8; Ls *= 2) { - Coordinate latt_size({lat * mpi_layout[0], - lat * mpi_layout[1], - lat * mpi_layout[2], + Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]}); GridCartesian Grid(latt_size, simd_layout, mpi_layout); @@ -127,22 +132,16 @@ int main(int argc, char **argv) { std::vector requests; Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu][0], - xmit_to_rank, - (void *)&rbuf[mu][0], - recv_from_rank, - bytes); + Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank, + (void *)&rbuf[mu][0], recv_from_rank, bytes); } comm_proc = mpi_layout[mu] - 1; { std::vector requests; Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], - xmit_to_rank, - (void *)&rbuf[mu + 4][0], - recv_from_rank, - bytes); + Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank, + (void *)&rbuf[mu + 4][0], recv_from_rank, bytes); } } Grid.Barrier(); @@ -154,17 +153,24 @@ int main(int argc, char **argv) double bidibytes = xbytes + rbytes; std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t" - << std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " " - << std::right << xbytes / mean << " " + << std::setw(11) << bytes << std::fixed << std::setprecision(1) + << std::setw(7) << " " << std::right << xbytes / mean << " " << "\t\t" << std::setw(7) << bidibytes / mean << std::endl; } } } } - std::cout << GridLogMessage << "====================================================================================================" << std::endl; - std::cout << GridLogMessage << "= Benchmarking sequential halo exchange from GPU memory " << std::endl; - std::cout << GridLogMessage << "====================================================================================================" << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; + std::cout << GridLogMessage + << "= Benchmarking sequential halo exchange from GPU memory " << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; header(); for (int lat = 8; lat <= maxlat; lat += 4) @@ -172,9 +178,7 @@ int main(int argc, char **argv) for (int Ls = 8; Ls <= 8; Ls *= 2) { - Coordinate latt_size({lat * mpi_layout[0], - lat * mpi_layout[1], - lat * mpi_layout[2], + Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]}); GridCartesian Grid(latt_size, simd_layout, mpi_layout); @@ -212,22 +216,16 @@ int main(int argc, char **argv) { std::vector requests; Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu][0], - xmit_to_rank, - (void *)&rbuf[mu][0], - recv_from_rank, - bytes); + Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank, + (void *)&rbuf[mu][0], recv_from_rank, bytes); } comm_proc = mpi_layout[mu] - 1; { std::vector requests; Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], - xmit_to_rank, - (void *)&rbuf[mu + 4][0], - recv_from_rank, - bytes); + Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank, + (void *)&rbuf[mu + 4][0], recv_from_rank, bytes); } } Grid.Barrier(); @@ -239,8 +237,8 @@ int main(int argc, char **argv) double bidibytes = xbytes + rbytes; std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t" - << std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " " - << std::right << xbytes / mean << " " + << std::setw(11) << bytes << std::fixed << std::setprecision(1) + << std::setw(7) << " " << std::right << xbytes / mean << " " << "\t\t" << std::setw(7) << bidibytes / mean << std::endl; } } @@ -253,9 +251,15 @@ int main(int argc, char **argv) } } - std::cout << GridLogMessage << "====================================================================================================" << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; std::cout << GridLogMessage << "= All done; Bye Bye" << std::endl; - std::cout << GridLogMessage << "====================================================================================================" << std::endl; + std::cout << GridLogMessage + << "=========================================================================" + "===========================" + << std::endl; Grid_finalize(); } diff --git a/Grid/Benchmark_dwf_fp32.cpp b/Grid/Benchmark_dwf_fp32.cpp index 7f6d0e6..658997f 100644 --- a/Grid/Benchmark_dwf_fp32.cpp +++ b/Grid/Benchmark_dwf_fp32.cpp @@ -28,17 +28,13 @@ along with this program. If not, see . using namespace std; using namespace Grid; -template -struct scal +template struct scal { d internal; }; -Gamma::Algebra Gmu[] = { - Gamma::Algebra::GammaX, - Gamma::Algebra::GammaY, - Gamma::Algebra::GammaZ, - Gamma::Algebra::GammaT}; +Gamma::Algebra Gmu[] = {Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, Gamma::Algebra::GammaT}; int main(int argc, char **argv) { @@ -59,13 +55,15 @@ int main(int argc, char **argv) long unsigned int single_site_flops = 8 * Nc * (7 + 16 * Nc); - GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid( + GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); std::cout << GridLogMessage << "Making s innermost grids" << std::endl; - GridCartesian *sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(), GridDefaultMpi()); + GridCartesian *sUGrid = + SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(), GridDefaultMpi()); GridRedBlackCartesian *sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); GridCartesian *sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls, UGrid); GridRedBlackCartesian *sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls, UGrid); @@ -177,13 +175,24 @@ int main(int argc, char **argv) RealD NP = UGrid->_Nprocessors; RealD NN = UGrid->NodeCount(); - std::cout << GridLogMessage << "*****************************************************************" << std::endl; - std::cout << GridLogMessage << "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" << std::endl; - std::cout << GridLogMessage << "*****************************************************************" << std::endl; - std::cout << GridLogMessage << "*****************************************************************" << std::endl; - std::cout << GridLogMessage << "* Benchmarking DomainWallFermionR::Dhop " << std::endl; - std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd() << std::endl; - std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF) << " B" << std::endl; + std::cout << GridLogMessage + << "*****************************************************************" + << std::endl; + std::cout << GridLogMessage + << "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" + << std::endl; + std::cout << GridLogMessage + << "*****************************************************************" + << std::endl; + std::cout << GridLogMessage + << "*****************************************************************" + << std::endl; + std::cout << GridLogMessage + << "* Benchmarking DomainWallFermionR::Dhop " << std::endl; + std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd() + << std::endl; + std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF) << " B" + << std::endl; if (sizeof(RealF) == 4) std::cout << GridLogMessage << "* SINGLE precision " << std::endl; if (sizeof(RealF) == 8) @@ -200,7 +209,9 @@ int main(int argc, char **argv) std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl; if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm) std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl; - std::cout << GridLogMessage << "*****************************************************************" << std::endl; + std::cout << GridLogMessage + << "*****************************************************************" + << std::endl; DomainWallFermionF Dw(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5); int ncall = 300; @@ -230,19 +241,29 @@ int main(int argc, char **argv) auto simdwidth = sizeof(vComplex); // RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors - double data_rf = volume * ((2 * Nd + 1) * Nd * Nc + 2 * Nd * Nc * Nc) * simdwidth / nsimd * ncall / (1024. * 1024. * 1024.); + double data_rf = volume * ((2 * Nd + 1) * Nd * Nc + 2 * Nd * Nc * Nc) * simdwidth / + nsimd * ncall / (1024. * 1024. * 1024.); // mem: Nd Wilson * Ls, Nd gauge, Nc colors - double data_mem = (volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) * simdwidth / nsimd * ncall / (1024. * 1024. * 1024.); + double data_mem = + (volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) * simdwidth / + nsimd * ncall / (1024. * 1024. * 1024.); - std::cout << GridLogMessage << "Called Dw " << ncall << " times in " << t1 - t0 << " us" << std::endl; + std::cout << GridLogMessage << "Called Dw " << ncall << " times in " << t1 - t0 + << " us" << std::endl; // std::cout<Barrier(); @@ -387,8 +415,10 @@ int main(int argc, char **argv) double flops = (single_site_flops * volume * ncall) / 2.0; std::cout << GridLogMessage << "Deo mflop/s = " << flops / (t1 - t0) << std::endl; - std::cout << GridLogMessage << "Deo mflop/s per rank " << flops / (t1 - t0) / NP << std::endl; - std::cout << GridLogMessage << "Deo mflop/s per node " << flops / (t1 - t0) / NN << std::endl; + std::cout << GridLogMessage << "Deo mflop/s per rank " << flops / (t1 - t0) / NP + << std::endl; + std::cout << GridLogMessage << "Deo mflop/s per node " << flops / (t1 - t0) / NN + << std::endl; Dw.Report(); } Dw.DhopEO(src_o, r_e, DaggerNo);