diff --git a/Grid/.clang-format b/Grid/.clang-format new file mode 100644 index 0000000..9156d50 --- /dev/null +++ b/Grid/.clang-format @@ -0,0 +1,14 @@ +{ + BasedOnStyle: LLVM, + UseTab: Never, + IndentWidth: 2, + TabWidth: 2, + BreakBeforeBraces: Allman, + AllowShortIfStatementsOnASingleLine: false, + IndentCaseLabels: false, + ColumnLimit: 0, + AccessModifierOffset: -4, + NamespaceIndentation: All, + FixNamespaceComments: false, + SortIncludes: true, +} diff --git a/Grid/Benchmark_ITT.cpp b/Grid/Benchmark_Grid.cpp similarity index 99% rename from Grid/Benchmark_ITT.cpp rename to Grid/Benchmark_Grid.cpp index e618aff..fd2056e 100644 --- a/Grid/Benchmark_ITT.cpp +++ b/Grid/Benchmark_Grid.cpp @@ -2,6 +2,8 @@ Copyright © 2015 Peter Boyle Copyright © 2022 Antonin Portelli +This is a refactoring of Benchmark_ITT.cpp from Grid + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 @@ -67,6 +69,7 @@ Gamma::Algebra Gmu[] = { Gamma::Algebra::GammaY, Gamma::Algebra::GammaZ, Gamma::Algebra::GammaT}; + struct controls { int Opt; @@ -76,7 +79,7 @@ struct controls class Benchmark { -public: + public: static void Decomposition(void) { diff --git a/Grid/Benchmark_comms_host_device.cpp b/Grid/Benchmark_comms_host_device.cpp index 56227c4..5a4aae4 100644 --- a/Grid/Benchmark_comms_host_device.cpp +++ b/Grid/Benchmark_comms_host_device.cpp @@ -21,231 +21,241 @@ along with this program. If not, see . using namespace std; using namespace Grid; -struct time_statistics{ +struct time_statistics +{ double mean; double err; double min; double max; - void statistics(std::vector v){ - double sum = std::accumulate(v.begin(), v.end(), 0.0); - mean = sum / v.size(); + void statistics(std::vector v) + { + double sum = std::accumulate(v.begin(), v.end(), 0.0); + mean = sum / v.size(); - std::vector diff(v.size()); - std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); - double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); - err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); + std::vector diff(v.size()); + std::transform(v.begin(), v.end(), diff.begin(), [=](double x) + { return x - mean; }); + double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); + err = std::sqrt(sq_sum / (v.size() * (v.size() - 1))); - auto result = std::minmax_element(v.begin(), v.end()); - min = *result.first; - max = *result.second; -} + auto result = std::minmax_element(v.begin(), v.end()); + min = *result.first; + max = *result.second; + } }; -void header(){ - std::cout <1) nmu++; + int Nloop = 250; + int nmu = 0; + int maxlat = 32; + for (int mu = 0; mu < Nd; mu++) + if (mpi_layout[mu] > 1) + nmu++; - std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; + std::cout << GridLogMessage << "Number of iterations to average: " << Nloop << std::endl; std::vector t_time(Nloop); // time_statistics timestat; - std::cout< > xbuf(8); - std::vector > rbuf(8); + std::vector> xbuf(8); + std::vector> rbuf(8); - for(int mu=0;mu<8;mu++){ - xbuf[mu].resize(lat*lat*lat*Ls); - rbuf[mu].resize(lat*lat*lat*Ls); + for (int mu = 0; mu < 8; mu++) + { + xbuf[mu].resize(lat * lat * lat * Ls); + rbuf[mu].resize(lat * lat * lat * Ls); } - uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); + uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD); int ncomm; - for(int mu=0;mu<4;mu++){ - if (mpi_layout[mu]>1 ) { - double start=usecond(); - for(int i=0;i 1) + { + double start = usecond(); + for (int i = 0; i < Nloop; i++) + { - ncomm=0; - - - ncomm++; - int comm_proc=1; - int xmit_to_rank; - int recv_from_rank; - - { - std::vector requests; - Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu][0], - xmit_to_rank, - (void *)&rbuf[mu][0], - recv_from_rank, - bytes); - } + ncomm = 0; - comm_proc = mpi_layout[mu]-1; - { - std::vector requests; - Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu+4][0], - xmit_to_rank, - (void *)&rbuf[mu+4][0], - recv_from_rank, - bytes); - } - } - Grid.Barrier(); - double stop=usecond(); - double mean=(stop-start)/Nloop; - double dbytes = bytes*ppn; - double xbytes = dbytes*2.0*ncomm; - double rbytes = xbytes; - double bidibytes = xbytes+rbytes; + ncomm++; + int comm_proc = 1; + int xmit_to_rank; + int recv_from_rank; - std::cout< requests; + Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); + Grid.SendToRecvFrom((void *)&xbuf[mu][0], + xmit_to_rank, + (void *)&rbuf[mu][0], + recv_from_rank, + bytes); + } + comm_proc = mpi_layout[mu] - 1; + { + std::vector requests; + Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); + Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], + xmit_to_rank, + (void *)&rbuf[mu + 4][0], + recv_from_rank, + bytes); + } + } + Grid.Barrier(); + double stop = usecond(); + double mean = (stop - start) / Nloop; + double dbytes = bytes * ppn; + double xbytes = dbytes * 2.0 * ncomm; + double rbytes = xbytes; + double bidibytes = xbytes + rbytes; - - } + std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t" + << std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " " + << std::right << xbytes / mean << " " + << "\t\t" << std::setw(7) << bidibytes / mean << std::endl; + } } - - - } } - std::cout< xbuf(8); std::vector rbuf(8); - uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); - for(int d=0;d<8;d++){ - xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); - rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); + uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD); + for (int d = 0; d < 8; d++) + { + xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); + rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); } int ncomm; - for(int mu=0;mu<4;mu++){ - if (mpi_layout[mu]>1 ) { - double start=usecond(); - for(int i=0;i 1) + { + double start = usecond(); + for (int i = 0; i < Nloop; i++) + { - ncomm=0; - - - ncomm++; - int comm_proc=1; - int xmit_to_rank; - int recv_from_rank; - - { - std::vector requests; - Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu][0], - xmit_to_rank, - (void *)&rbuf[mu][0], - recv_from_rank, - bytes); - } + ncomm = 0; - comm_proc = mpi_layout[mu]-1; - { - std::vector requests; - Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.SendToRecvFrom((void *)&xbuf[mu+4][0], - xmit_to_rank, - (void *)&rbuf[mu+4][0], - recv_from_rank, - bytes); - } - } - Grid.Barrier(); - double stop=usecond(); - double mean=(stop-start)/Nloop; - double dbytes = bytes*ppn; - double xbytes = dbytes*2.0*ncomm; - double rbytes = xbytes; - double bidibytes = xbytes+rbytes; + ncomm++; + int comm_proc = 1; + int xmit_to_rank; + int recv_from_rank; - std::cout< requests; + Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); + Grid.SendToRecvFrom((void *)&xbuf[mu][0], + xmit_to_rank, + (void *)&rbuf[mu][0], + recv_from_rank, + bytes); + } + comm_proc = mpi_layout[mu] - 1; + { + std::vector requests; + Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); + Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], + xmit_to_rank, + (void *)&rbuf[mu + 4][0], + recv_from_rank, + bytes); + } + } + Grid.Barrier(); + double stop = usecond(); + double mean = (stop - start) / Nloop; + double dbytes = bytes * ppn; + double xbytes = dbytes * 2.0 * ncomm; + double rbytes = xbytes; + double bidibytes = xbytes + rbytes; - - } + std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t" + << std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " " + << std::right << xbytes / mean << " " + << "\t\t" << std::setw(7) << bidibytes / mean << std::endl; + } } - for(int d=0;d<8;d++){ - acceleratorFreeDevice(xbuf[d]); - acceleratorFreeDevice(rbuf[d]); + for (int d = 0; d < 8; d++) + { + acceleratorFreeDevice(xbuf[d]); + acceleratorFreeDevice(rbuf[d]); } - - } } - - std::cout<