mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-01 04:24:32 +00:00 
			
		
		
		
	Added a comms benchmark
This commit is contained in:
		
							
								
								
									
										83
									
								
								benchmarks/Grid_comms.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								benchmarks/Grid_comms.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | |||||||
|  | #include <Grid.h> | ||||||
|  |  | ||||||
|  | using namespace std; | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Grid::QCD; | ||||||
|  |  | ||||||
|  | int main (int argc, char ** argv) | ||||||
|  | { | ||||||
|  |   Grid_init(&argc,&argv); | ||||||
|  |  | ||||||
|  |   std::vector<int> simd_layout({1,1,2,2}); | ||||||
|  |   std::vector<int> mpi_layout ({1,2,2,1}); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   std::cout << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |   int Nloop=10; | ||||||
|  |   for(int lat=4;lat<=16;lat+=4){ | ||||||
|  |     for(int Ls=1;Ls<=16;Ls*=2){ | ||||||
|  |  | ||||||
|  |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |       double start=usecond(); | ||||||
|  |       int ncomm=0; | ||||||
|  |       for(int i=0;i<Nloop;i++){ | ||||||
|  | 	std::vector<int> latt_size  ({lat,lat,lat,lat}); | ||||||
|  |      | ||||||
|  | 	GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|  |  | ||||||
|  | 	std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|  | 	std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|  | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  |  | ||||||
|  | 	ncomm=0; | ||||||
|  | 	for(int mu=0;mu<4;mu++){ | ||||||
|  | 	 | ||||||
|  | 	  if (mpi_layout[mu]>1 ) { | ||||||
|  | 	   | ||||||
|  | 	    ncomm++; | ||||||
|  | 	    int comm_proc=1; | ||||||
|  | 	    int xmit_to_rank; | ||||||
|  | 	    int recv_from_rank; | ||||||
|  | 	     | ||||||
|  | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    Grid.SendToRecvFromBegin(requests, | ||||||
|  | 				   (void *)&xbuf[mu][0], | ||||||
|  | 				   xmit_to_rank, | ||||||
|  | 				   (void *)&rbuf[mu][0], | ||||||
|  | 				   recv_from_rank, | ||||||
|  | 				   bytes); | ||||||
|  | 	 | ||||||
|  | 	    comm_proc = mpi_layout[mu]-1; | ||||||
|  | 	   | ||||||
|  | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    Grid.SendToRecvFromBegin(requests, | ||||||
|  | 				     (void *)&xbuf[mu+4][0], | ||||||
|  | 				     xmit_to_rank, | ||||||
|  | 				     (void *)&rbuf[mu+4][0], | ||||||
|  | 				     recv_from_rank, | ||||||
|  | 				     bytes); | ||||||
|  | 	   | ||||||
|  | 	  } | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	Grid.SendToRecvFromComplete(requests); | ||||||
|  | 	Grid.Barrier(); | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       double stop=usecond(); | ||||||
|  |  | ||||||
|  |       double xbytes    = Nloop*bytes*2*ncomm; | ||||||
|  |       double rbytes    = xbytes; | ||||||
|  |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|  |       double time = stop-start; | ||||||
|  |  | ||||||
|  |       std::cout << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   Grid_finalize(); | ||||||
|  | } | ||||||
| @@ -35,6 +35,7 @@ int main (int argc, char ** argv) | |||||||
|   LatticeFermion src(&Grid); random(pRNG,src); |   LatticeFermion src(&Grid); random(pRNG,src); | ||||||
|   LatticeFermion result(&Grid); result=zero; |   LatticeFermion result(&Grid); result=zero; | ||||||
|   LatticeFermion    ref(&Grid);    ref=zero; |   LatticeFermion    ref(&Grid);    ref=zero; | ||||||
|  |   LatticeFermion    err(&Grid);     | ||||||
|   LatticeFermion    tmp(&Grid);    tmp=zero; |   LatticeFermion    tmp(&Grid);    tmp=zero; | ||||||
|   LatticeGaugeField Umu(&Grid); random(pRNG,Umu); |   LatticeGaugeField Umu(&Grid); random(pRNG,Umu); | ||||||
|   std::vector<LatticeColourMatrix> U(4,&Grid); |   std::vector<LatticeColourMatrix> U(4,&Grid); | ||||||
| @@ -82,12 +83,14 @@ int main (int argc, char ** argv) | |||||||
|   } |   } | ||||||
|   double t1=usecond(); |   double t1=usecond(); | ||||||
|   double flops=1320*volume*ncall; |   double flops=1320*volume*ncall; | ||||||
|  |  | ||||||
|    |    | ||||||
|   std::cout << "Called Dw"<<std::endl; |   std::cout << "Called Dw"<<std::endl; | ||||||
|   std::cout << "norm result "<< norm2(result)<<std::endl; |   std::cout << "norm result "<< norm2(result)<<std::endl; | ||||||
|   std::cout << "norm ref    "<< norm2(ref)<<std::endl; |   std::cout << "norm ref    "<< norm2(ref)<<std::endl; | ||||||
|   std::cout << "mflop/s = "<< flops/(t1-t0)<<std::endl; |   std::cout << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|  |   err = ref -result; | ||||||
|  |   std::cout << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|   //  for(int ss=0;ss<10;ss++ ){ |   //  for(int ss=0;ss<10;ss++ ){ | ||||||
|   for(int ss=0;ss<0;ss++ ){ |   for(int ss=0;ss<0;ss++ ){ | ||||||
| @@ -100,8 +103,5 @@ int main (int argc, char ** argv) | |||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   ref = ref -result; |  | ||||||
|   std::cout << "norm diff   "<< norm2(ref)<<std::endl; |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -21,6 +21,9 @@ class CartesianCommunicator { | |||||||
|  |  | ||||||
| #ifdef GRID_COMMS_MPI | #ifdef GRID_COMMS_MPI | ||||||
|     MPI_Comm communicator; |     MPI_Comm communicator; | ||||||
|  |     typedef MPI_Request CommsRequest_t; | ||||||
|  | #else  | ||||||
|  |     typedef int CommsRequest_t; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|     // Constructor |     // Constructor | ||||||
| @@ -77,13 +80,20 @@ class CartesianCommunicator { | |||||||
|       GlobalSumVector(ptr,words); |       GlobalSumVector(ptr,words); | ||||||
|     } |     } | ||||||
|     //////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////// | ||||||
|     // Face exchange |     // Face exchange, buffer swap in translational invariant way | ||||||
|     //////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////// | ||||||
|     void SendToRecvFrom(void *xmit, |     void SendToRecvFrom(void *xmit, | ||||||
| 			int xmit_to_rank, | 			int xmit_to_rank, | ||||||
| 			void *recv, | 			void *recv, | ||||||
| 			int recv_from_rank, | 			int recv_from_rank, | ||||||
| 			int bytes); | 			int bytes); | ||||||
|  |     void SendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
|  | 			 void *xmit, | ||||||
|  | 			 int xmit_to_rank, | ||||||
|  | 			 void *recv, | ||||||
|  | 			 int recv_from_rank, | ||||||
|  | 			 int bytes); | ||||||
|  |     void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////// | ||||||
|     // Barrier |     // Barrier | ||||||
|   | |||||||
| @@ -20,7 +20,7 @@ void CartesianCommunicator::GlobalSum(double &){} | |||||||
| void CartesianCommunicator::GlobalSum(uint32_t &){} | void CartesianCommunicator::GlobalSum(uint32_t &){} | ||||||
| void CartesianCommunicator::GlobalSumVector(double *,int N){} | void CartesianCommunicator::GlobalSumVector(double *,int N){} | ||||||
|  |  | ||||||
| // Basic Halo comms primitive | // Basic Halo comms primitive -- should never call in single node | ||||||
| void CartesianCommunicator::SendToRecvFrom(void *xmit, | void CartesianCommunicator::SendToRecvFrom(void *xmit, | ||||||
| 					   int dest, | 					   int dest, | ||||||
| 					   void *recv, | 					   void *recv, | ||||||
| @@ -29,6 +29,19 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit, | |||||||
| { | { | ||||||
|   exit(-1); |   exit(-1); | ||||||
| } | } | ||||||
|  | void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
|  | 						void *xmit, | ||||||
|  | 						int dest, | ||||||
|  | 						void *recv, | ||||||
|  | 						int from, | ||||||
|  | 						int bytes) | ||||||
|  | { | ||||||
|  |   exit(-1); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) | ||||||
|  | { | ||||||
|  |   exit(-1); | ||||||
|  | } | ||||||
|  |  | ||||||
| void CartesianCommunicator::Barrier(void) | void CartesianCommunicator::Barrier(void) | ||||||
| { | { | ||||||
|   | |||||||
| @@ -29,37 +29,45 @@ CartesianCommunicator::CartesianCommunicator(std::vector<int> &processors) | |||||||
| } | } | ||||||
|  |  | ||||||
| void CartesianCommunicator::GlobalSum(uint32_t &u){ | void CartesianCommunicator::GlobalSum(uint32_t &u){ | ||||||
|   MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
| } | } | ||||||
| void CartesianCommunicator::GlobalSum(float &f){ | void CartesianCommunicator::GlobalSum(float &f){ | ||||||
|   MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
| } | } | ||||||
| void CartesianCommunicator::GlobalSumVector(float *f,int N) | void CartesianCommunicator::GlobalSumVector(float *f,int N) | ||||||
| { | { | ||||||
|   MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
| } | } | ||||||
| void CartesianCommunicator::GlobalSum(double &d) | void CartesianCommunicator::GlobalSum(double &d) | ||||||
| { | { | ||||||
|   MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); |   int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
| } | } | ||||||
| void CartesianCommunicator::GlobalSumVector(double *d,int N) | void CartesianCommunicator::GlobalSumVector(double *d,int N) | ||||||
| { | { | ||||||
|   MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); |   int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
| } | } | ||||||
| void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) | void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) | ||||||
| { | { | ||||||
|   MPI_Cart_shift(communicator,dim,shift,&source,&dest); |   int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); | ||||||
|  |   assert(ierr==0); | ||||||
| } | } | ||||||
| int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) | int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) | ||||||
| { | { | ||||||
|   int rank; |   int rank; | ||||||
|   MPI_Cart_rank  (communicator, &coor[0], &rank); |   int ierr=MPI_Cart_rank  (communicator, &coor[0], &rank); | ||||||
|  |   assert(ierr==0); | ||||||
|   return rank; |   return rank; | ||||||
| } | } | ||||||
| void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor) | void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor) | ||||||
| { | { | ||||||
|   coor.resize(_ndimension); |   coor.resize(_ndimension); | ||||||
|   MPI_Cart_coords  (communicator, rank, _ndimension,&coor[0]); |   int ierr=MPI_Cart_coords  (communicator, rank, _ndimension,&coor[0]); | ||||||
|  |   assert(ierr==0); | ||||||
| } | } | ||||||
|  |  | ||||||
| // Basic Halo comms primitive | // Basic Halo comms primitive | ||||||
| @@ -69,36 +77,64 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit, | |||||||
| 					   int from, | 					   int from, | ||||||
| 					   int bytes) | 					   int bytes) | ||||||
| { | { | ||||||
|   MPI_Request reqs[2]; |   std::vector<CommsRequest_t> reqs(0); | ||||||
|   MPI_Status OkeyDokey[2]; |   SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); | ||||||
|  |   SendToRecvFromComplete(reqs); | ||||||
|  | } | ||||||
|  | // Basic Halo comms primitive | ||||||
|  |   void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
|  | 						  void *xmit, | ||||||
|  | 						  int dest, | ||||||
|  | 						  void *recv, | ||||||
|  | 						  int from, | ||||||
|  | 						  int bytes) | ||||||
|  | { | ||||||
|  |   MPI_Request xrq; | ||||||
|  |   MPI_Request rrq; | ||||||
|   int rank = _processor; |   int rank = _processor; | ||||||
|   MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&reqs[0]); |   int ierr; | ||||||
|   MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&reqs[1]); |   ierr=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); | ||||||
|   MPI_Waitall(2,reqs,OkeyDokey); |   ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); | ||||||
|  |    | ||||||
|  |   assert(ierr==0); | ||||||
|  |  | ||||||
|  |   list.push_back(xrq); | ||||||
|  |   list.push_back(rrq); | ||||||
|  |  | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) | ||||||
|  | { | ||||||
|  |   int nreq=list.size(); | ||||||
|  |   std::vector<MPI_Status> status(nreq); | ||||||
|  |   int ierr = MPI_Waitall(nreq,&list[0],&status[0]); | ||||||
|  |  | ||||||
|  |   assert(ierr==0); | ||||||
| } | } | ||||||
|  |  | ||||||
| void CartesianCommunicator::Barrier(void) | void CartesianCommunicator::Barrier(void) | ||||||
| { | { | ||||||
|   MPI_Barrier(communicator); |   int ierr = MPI_Barrier(communicator); | ||||||
|  |   assert(ierr==0); | ||||||
| } | } | ||||||
|  |  | ||||||
| void CartesianCommunicator::Broadcast(int root,void* data, int bytes) | void CartesianCommunicator::Broadcast(int root,void* data, int bytes) | ||||||
| { | { | ||||||
|   MPI_Bcast(data, |   int ierr=MPI_Bcast(data, | ||||||
| 	    bytes, | 		     bytes, | ||||||
| 	    MPI_BYTE, | 		     MPI_BYTE, | ||||||
| 	    root, | 		     root, | ||||||
| 	    communicator); | 		     communicator); | ||||||
|  |   assert(ierr==0); | ||||||
| } | } | ||||||
|  |  | ||||||
| void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | ||||||
| { | { | ||||||
|   MPI_Bcast(data, |   int ierr= MPI_Bcast(data, | ||||||
| 	    bytes, | 		      bytes, | ||||||
| 	    MPI_BYTE, | 		      MPI_BYTE, | ||||||
| 	    root, | 		      root, | ||||||
| 	    MPI_COMM_WORLD); | 		      MPI_COMM_WORLD); | ||||||
|  |   assert(ierr==0); | ||||||
| } | } | ||||||
|  |  | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user