forked from portelli/lattice-benchmarks
		
	renaming and formatting
This commit is contained in:
		
							
								
								
									
										14
									
								
								Grid/.clang-format
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								Grid/.clang-format
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | |||||||
|  | { | ||||||
|  |   BasedOnStyle: LLVM, | ||||||
|  |   UseTab: Never, | ||||||
|  |   IndentWidth: 2, | ||||||
|  |   TabWidth: 2, | ||||||
|  |   BreakBeforeBraces: Allman, | ||||||
|  |   AllowShortIfStatementsOnASingleLine: false, | ||||||
|  |   IndentCaseLabels: false, | ||||||
|  |   ColumnLimit: 0, | ||||||
|  |   AccessModifierOffset: -4, | ||||||
|  |   NamespaceIndentation: All, | ||||||
|  |   FixNamespaceComments: false, | ||||||
|  |   SortIncludes: true, | ||||||
|  | } | ||||||
| @@ -2,6 +2,8 @@ | |||||||
| Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk> | Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Copyright © 2022 Antonin Portelli <antonin.portelli@me.com> | Copyright © 2022 Antonin Portelli <antonin.portelli@me.com> | ||||||
| 
 | 
 | ||||||
|  | This is a refactoring of Benchmark_ITT.cpp from Grid | ||||||
|  | 
 | ||||||
| This program is free software; you can redistribute it and/or | This program is free software; you can redistribute it and/or | ||||||
| modify it under the terms of the GNU General Public License | modify it under the terms of the GNU General Public License | ||||||
| as published by the Free Software Foundation; either version 2 | as published by the Free Software Foundation; either version 2 | ||||||
| @@ -67,6 +69,7 @@ Gamma::Algebra Gmu[] = { | |||||||
|     Gamma::Algebra::GammaY, |     Gamma::Algebra::GammaY, | ||||||
|     Gamma::Algebra::GammaZ, |     Gamma::Algebra::GammaZ, | ||||||
|     Gamma::Algebra::GammaT}; |     Gamma::Algebra::GammaT}; | ||||||
|  | 
 | ||||||
| struct controls | struct controls | ||||||
| { | { | ||||||
|   int Opt; |   int Opt; | ||||||
| @@ -76,7 +79,7 @@ struct controls | |||||||
| 
 | 
 | ||||||
| class Benchmark | class Benchmark | ||||||
| { | { | ||||||
| public: |   public: | ||||||
|   static void Decomposition(void) |   static void Decomposition(void) | ||||||
|   { |   { | ||||||
| 
 | 
 | ||||||
| @@ -21,95 +21,112 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. | |||||||
| using namespace std; | using namespace std; | ||||||
| using namespace Grid; | using namespace Grid; | ||||||
|  |  | ||||||
| struct time_statistics{ | struct time_statistics | ||||||
|  | { | ||||||
|   double mean; |   double mean; | ||||||
|   double err; |   double err; | ||||||
|   double min; |   double min; | ||||||
|   double max; |   double max; | ||||||
|  |  | ||||||
|   void statistics(std::vector<double> v){ |   void statistics(std::vector<double> v) | ||||||
|  |   { | ||||||
|     double sum = std::accumulate(v.begin(), v.end(), 0.0); |     double sum = std::accumulate(v.begin(), v.end(), 0.0); | ||||||
|     mean = sum / v.size(); |     mean = sum / v.size(); | ||||||
|  |  | ||||||
|     std::vector<double> diff(v.size()); |     std::vector<double> diff(v.size()); | ||||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); |     std::transform(v.begin(), v.end(), diff.begin(), [=](double x) | ||||||
|  |                    { return x - mean; }); | ||||||
|     double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); |     double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); | ||||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); |     err = std::sqrt(sq_sum / (v.size() * (v.size() - 1))); | ||||||
|  |  | ||||||
|     auto result = std::minmax_element(v.begin(), v.end()); |     auto result = std::minmax_element(v.begin(), v.end()); | ||||||
|     min = *result.first; |     min = *result.first; | ||||||
|     max = *result.second; |     max = *result.second; | ||||||
| } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| void header(){ | void header() | ||||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" |  | ||||||
|             <<std::setw(11)<<"bytes\t\t"<<"MB/s uni"<<"\t"<<"MB/s bidi"<<std::endl; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) |  | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   std::cout << GridLogMessage << " L  " | ||||||
|  |             << "\t" | ||||||
|  |             << " Ls  " | ||||||
|  |             << "\t" | ||||||
|  |             << std::setw(11) << "bytes\t\t" | ||||||
|  |             << "MB/s uni" | ||||||
|  |             << "\t" | ||||||
|  |             << "MB/s bidi" << std::endl; | ||||||
|  | }; | ||||||
|  |  | ||||||
|   Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); | int main(int argc, char **argv) | ||||||
|  | { | ||||||
|  |   Grid_init(&argc, &argv); | ||||||
|  |  | ||||||
|  |   Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd()); | ||||||
|   Coordinate mpi_layout = GridDefaultMpi(); |   Coordinate mpi_layout = GridDefaultMpi(); | ||||||
|   int threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; | ||||||
|  |  | ||||||
|   int Nloop=250; |   int Nloop = 250; | ||||||
|   int nmu=0; |   int nmu = 0; | ||||||
|   int maxlat=32; |   int maxlat = 32; | ||||||
|   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; |   for (int mu = 0; mu < Nd; mu++) | ||||||
|  |     if (mpi_layout[mu] > 1) | ||||||
|  |       nmu++; | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; |   std::cout << GridLogMessage << "Number of iterations to average: " << Nloop << std::endl; | ||||||
|   std::vector<double> t_time(Nloop); |   std::vector<double> t_time(Nloop); | ||||||
|   //  time_statistics timestat; |   //  time_statistics timestat; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout << GridLogMessage << "====================================================================================================" << std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange from host memory "<<std::endl; |   std::cout << GridLogMessage << "= Benchmarking sequential halo exchange from host memory " << std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout << GridLogMessage << "====================================================================================================" << std::endl; | ||||||
|   header(); |   header(); | ||||||
|  |  | ||||||
|   for(int lat=8;lat<=maxlat;lat+=4){ |   for (int lat = 8; lat <= maxlat; lat += 4) | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |   { | ||||||
|  |     for (int Ls = 8; Ls <= 8; Ls *= 2) | ||||||
|  |     { | ||||||
|  |  | ||||||
|       Coordinate latt_size  ({lat*mpi_layout[0], |       Coordinate latt_size({lat * mpi_layout[0], | ||||||
| 	                      lat*mpi_layout[1], |                             lat * mpi_layout[1], | ||||||
|       			      lat*mpi_layout[2], |                             lat * mpi_layout[2], | ||||||
|       			      lat*mpi_layout[3]}); |                             lat * mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian Grid(latt_size, simd_layout, mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |       RealD Nrank = Grid._Nprocessors; | ||||||
|       RealD Nnode = Grid.NodeCount(); |       RealD Nnode = Grid.NodeCount(); | ||||||
|       RealD ppn = Nrank/Nnode; |       RealD ppn = Nrank / Nnode; | ||||||
|  |  | ||||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8); |       std::vector<std::vector<HalfSpinColourVectorD>> xbuf(8); | ||||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8); |       std::vector<std::vector<HalfSpinColourVectorD>> rbuf(8); | ||||||
|  |  | ||||||
|       for(int mu=0;mu<8;mu++){ |       for (int mu = 0; mu < 8; mu++) | ||||||
| 	xbuf[mu].resize(lat*lat*lat*Ls); |       { | ||||||
| 	rbuf[mu].resize(lat*lat*lat*Ls); |         xbuf[mu].resize(lat * lat * lat * Ls); | ||||||
|  |         rbuf[mu].resize(lat * lat * lat * Ls); | ||||||
|       } |       } | ||||||
|       uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|  |  | ||||||
|       for(int mu=0;mu<4;mu++){ |       for (int mu = 0; mu < 4; mu++) | ||||||
| 	if (mpi_layout[mu]>1 ) { |       { | ||||||
| 	double start=usecond(); |         if (mpi_layout[mu] > 1) | ||||||
| 	for(int i=0;i<Nloop;i++){ |         { | ||||||
|  |           double start = usecond(); | ||||||
| 	  ncomm=0; |           for (int i = 0; i < Nloop; i++) | ||||||
|  |           { | ||||||
|  |  | ||||||
|  |             ncomm = 0; | ||||||
|  |  | ||||||
|             ncomm++; |             ncomm++; | ||||||
| 	    int comm_proc=1; |             int comm_proc = 1; | ||||||
|             int xmit_to_rank; |             int xmit_to_rank; | ||||||
|             int recv_from_rank; |             int recv_from_rank; | ||||||
|  |  | ||||||
|             { |             { | ||||||
|               std::vector<CommsRequest_t> requests; |               std::vector<CommsRequest_t> requests; | ||||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |               Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); | ||||||
|               Grid.SendToRecvFrom((void *)&xbuf[mu][0], |               Grid.SendToRecvFrom((void *)&xbuf[mu][0], | ||||||
|                                   xmit_to_rank, |                                   xmit_to_rank, | ||||||
|                                   (void *)&rbuf[mu][0], |                                   (void *)&rbuf[mu][0], | ||||||
| @@ -117,86 +134,84 @@ int main (int argc, char ** argv) | |||||||
|                                   bytes); |                                   bytes); | ||||||
|             } |             } | ||||||
|  |  | ||||||
| 	    comm_proc = mpi_layout[mu]-1; |             comm_proc = mpi_layout[mu] - 1; | ||||||
|             { |             { | ||||||
|               std::vector<CommsRequest_t> requests; |               std::vector<CommsRequest_t> requests; | ||||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |               Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); | ||||||
| 	      Grid.SendToRecvFrom((void *)&xbuf[mu+4][0], |               Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], | ||||||
|                                   xmit_to_rank, |                                   xmit_to_rank, | ||||||
| 				  (void *)&rbuf[mu+4][0], |                                   (void *)&rbuf[mu + 4][0], | ||||||
|                                   recv_from_rank, |                                   recv_from_rank, | ||||||
|                                   bytes); |                                   bytes); | ||||||
|             } |             } | ||||||
|           } |           } | ||||||
|           Grid.Barrier(); |           Grid.Barrier(); | ||||||
| 	double stop=usecond(); |           double stop = usecond(); | ||||||
|         double mean=(stop-start)/Nloop;       |           double mean = (stop - start) / Nloop; | ||||||
|       double dbytes    = bytes*ppn; |           double dbytes = bytes * ppn; | ||||||
|       double xbytes    = dbytes*2.0*ncomm; |           double xbytes = dbytes * 2.0 * ncomm; | ||||||
|           double rbytes = xbytes; |           double rbytes = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |           double bidibytes = xbytes + rbytes; | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)<<" " |  | ||||||
|                <<std::right<< xbytes/mean<<"  " |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/mean<< std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |           std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t" | ||||||
|  |                     << std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " " | ||||||
|  |                     << std::right << xbytes / mean << "  " | ||||||
|  |                     << "\t\t" << std::setw(7) << bidibytes / mean << std::endl; | ||||||
|  |         } | ||||||
|  |       } | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   std::cout << GridLogMessage << "====================================================================================================" << std::endl; | ||||||
|        |   std::cout << GridLogMessage << "= Benchmarking sequential halo exchange from GPU memory " << std::endl; | ||||||
|     } |   std::cout << GridLogMessage << "====================================================================================================" << std::endl; | ||||||
|   } |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange from GPU memory "<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|   header(); |   header(); | ||||||
|  |  | ||||||
|   for(int lat=8;lat<=maxlat;lat+=4){ |   for (int lat = 8; lat <= maxlat; lat += 4) | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |   { | ||||||
|  |     for (int Ls = 8; Ls <= 8; Ls *= 2) | ||||||
|  |     { | ||||||
|  |  | ||||||
|       Coordinate latt_size  ({lat*mpi_layout[0], |       Coordinate latt_size({lat * mpi_layout[0], | ||||||
| 	                      lat*mpi_layout[1], |                             lat * mpi_layout[1], | ||||||
|       			      lat*mpi_layout[2], |                             lat * mpi_layout[2], | ||||||
|       			      lat*mpi_layout[3]}); |                             lat * mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian Grid(latt_size, simd_layout, mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |       RealD Nrank = Grid._Nprocessors; | ||||||
|       RealD Nnode = Grid.NodeCount(); |       RealD Nnode = Grid.NodeCount(); | ||||||
|       RealD ppn = Nrank/Nnode; |       RealD ppn = Nrank / Nnode; | ||||||
|  |  | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
|  |  | ||||||
|       uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD); | ||||||
|       for(int d=0;d<8;d++){ |       for (int d = 0; d < 8; d++) | ||||||
|  |       { | ||||||
|         xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); |         xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); | ||||||
|         rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); |         rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|  |  | ||||||
|       for(int mu=0;mu<4;mu++){ |       for (int mu = 0; mu < 4; mu++) | ||||||
| 	if (mpi_layout[mu]>1 ) { |       { | ||||||
| 	double start=usecond(); |         if (mpi_layout[mu] > 1) | ||||||
| 	for(int i=0;i<Nloop;i++){ |         { | ||||||
|  |           double start = usecond(); | ||||||
| 	  ncomm=0; |           for (int i = 0; i < Nloop; i++) | ||||||
|  |           { | ||||||
|  |  | ||||||
|  |             ncomm = 0; | ||||||
|  |  | ||||||
|             ncomm++; |             ncomm++; | ||||||
| 	    int comm_proc=1; |             int comm_proc = 1; | ||||||
|             int xmit_to_rank; |             int xmit_to_rank; | ||||||
|             int recv_from_rank; |             int recv_from_rank; | ||||||
|  |  | ||||||
|             { |             { | ||||||
|               std::vector<CommsRequest_t> requests; |               std::vector<CommsRequest_t> requests; | ||||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |               Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); | ||||||
|               Grid.SendToRecvFrom((void *)&xbuf[mu][0], |               Grid.SendToRecvFrom((void *)&xbuf[mu][0], | ||||||
|                                   xmit_to_rank, |                                   xmit_to_rank, | ||||||
|                                   (void *)&rbuf[mu][0], |                                   (void *)&rbuf[mu][0], | ||||||
| @@ -204,48 +219,43 @@ int main (int argc, char ** argv) | |||||||
|                                   bytes); |                                   bytes); | ||||||
|             } |             } | ||||||
|  |  | ||||||
| 	    comm_proc = mpi_layout[mu]-1; |             comm_proc = mpi_layout[mu] - 1; | ||||||
|             { |             { | ||||||
|               std::vector<CommsRequest_t> requests; |               std::vector<CommsRequest_t> requests; | ||||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |               Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank); | ||||||
| 	      Grid.SendToRecvFrom((void *)&xbuf[mu+4][0], |               Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], | ||||||
|                                   xmit_to_rank, |                                   xmit_to_rank, | ||||||
| 				  (void *)&rbuf[mu+4][0], |                                   (void *)&rbuf[mu + 4][0], | ||||||
|                                   recv_from_rank, |                                   recv_from_rank, | ||||||
|                                   bytes); |                                   bytes); | ||||||
|             } |             } | ||||||
|           } |           } | ||||||
|           Grid.Barrier(); |           Grid.Barrier(); | ||||||
| 	double stop=usecond(); |           double stop = usecond(); | ||||||
|         double mean=(stop-start)/Nloop;       |           double mean = (stop - start) / Nloop; | ||||||
|       double dbytes    = bytes*ppn; |           double dbytes = bytes * ppn; | ||||||
|       double xbytes    = dbytes*2.0*ncomm; |           double xbytes = dbytes * 2.0 * ncomm; | ||||||
|           double rbytes = xbytes; |           double rbytes = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |           double bidibytes = xbytes + rbytes; | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)<<" " |  | ||||||
|                <<std::right<< xbytes/mean<<"  " |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/mean<< std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |           std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t" | ||||||
|  |                     << std::setw(11) << bytes << std::fixed << std::setprecision(1) << std::setw(7) << " " | ||||||
|  |                     << std::right << xbytes / mean << "  " | ||||||
|  |                     << "\t\t" << std::setw(7) << bidibytes / mean << std::endl; | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       for(int d=0;d<8;d++){ |       for (int d = 0; d < 8; d++) | ||||||
|  |       { | ||||||
|         acceleratorFreeDevice(xbuf[d]); |         acceleratorFreeDevice(xbuf[d]); | ||||||
|         acceleratorFreeDevice(rbuf[d]); |         acceleratorFreeDevice(rbuf[d]); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|        |  | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   std::cout << GridLogMessage << "====================================================================================================" << std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout << GridLogMessage << "= All done; Bye Bye" << std::endl; | ||||||
|   std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl; |   std::cout << GridLogMessage << "====================================================================================================" << std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user