1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Added a comms benchmark

This commit is contained in:
Peter Boyle 2015-05-02 23:42:30 +01:00
parent 6a39089a43
commit ea52562527
5 changed files with 173 additions and 31 deletions

83
benchmarks/Grid_comms.cc Normal file
View File

@ -0,0 +1,83 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> simd_layout({1,1,2,2});
std::vector<int> mpi_layout ({1,2,2,1});
std::cout << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
int Nloop=10;
for(int lat=4;lat<=16;lat+=4){
for(int Ls=1;Ls<=16;Ls*=2){
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
int ncomm=0;
for(int i=0;i<Nloop;i++){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<CartesianCommunicator::CommsRequest_t> requests;
ncomm=0;
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
}
}
Grid.SendToRecvFromComplete(requests);
Grid.Barrier();
}
double stop=usecond();
double xbytes = Nloop*bytes*2*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start;
std::cout << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
Grid_finalize();
}

View File

@ -35,6 +35,7 @@ int main (int argc, char ** argv)
LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;
LatticeFermion ref(&Grid); ref=zero;
LatticeFermion err(&Grid);
LatticeFermion tmp(&Grid); tmp=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
@ -82,12 +83,14 @@ int main (int argc, char ** argv)
}
double t1=usecond();
double flops=1320*volume*ncall;
std::cout << "Called Dw"<<std::endl;
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
std::cout << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref -result;
std::cout << "norm diff "<< norm2(err)<<std::endl;
// for(int ss=0;ss<10;ss++ ){
for(int ss=0;ss<0;ss++ ){
@ -100,8 +103,5 @@ int main (int argc, char ** argv)
}
}
ref = ref -result;
std::cout << "norm diff "<< norm2(ref)<<std::endl;
Grid_finalize();
}

View File

@ -21,6 +21,9 @@ class CartesianCommunicator {
#ifdef GRID_COMMS_MPI
MPI_Comm communicator;
typedef MPI_Request CommsRequest_t;
#else
typedef int CommsRequest_t;
#endif
// Constructor
@ -77,13 +80,20 @@ class CartesianCommunicator {
GlobalSumVector(ptr,words);
}
////////////////////////////////////////////////////////////
// Face exchange
// Face exchange, buffer swap in translational invariant way
////////////////////////////////////////////////////////////
void SendToRecvFrom(void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
////////////////////////////////////////////////////////////
// Barrier

View File

@ -20,7 +20,7 @@ void CartesianCommunicator::GlobalSum(double &){}
void CartesianCommunicator::GlobalSum(uint32_t &){}
void CartesianCommunicator::GlobalSumVector(double *,int N){}
// Basic Halo comms primitive
// Basic Halo comms primitive -- should never call in single node
void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest,
void *recv,
@ -29,6 +29,19 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
{
exit(-1);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
exit(-1);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
exit(-1);
}
void CartesianCommunicator::Barrier(void)
{

View File

@ -29,37 +29,45 @@ CartesianCommunicator::CartesianCommunicator(std::vector<int> &processors)
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){
MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(float *f,int N)
{
MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(double &d)
{
MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(double *d,int N)
{
MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
MPI_Cart_shift(communicator,dim,shift,&source,&dest);
int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
assert(ierr==0);
}
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{
int rank;
MPI_Cart_rank (communicator, &coor[0], &rank);
int ierr=MPI_Cart_rank (communicator, &coor[0], &rank);
assert(ierr==0);
return rank;
}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{
coor.resize(_ndimension);
MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
assert(ierr==0);
}
// Basic Halo comms primitive
@ -69,36 +77,64 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
int from,
int bytes)
{
MPI_Request reqs[2];
MPI_Status OkeyDokey[2];
std::vector<CommsRequest_t> reqs(0);
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&reqs[0]);
MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&reqs[1]);
MPI_Waitall(2,reqs,OkeyDokey);
int ierr;
ierr=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
int nreq=list.size();
std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
}
void CartesianCommunicator::Barrier(void)
{
MPI_Barrier(communicator);
int ierr = MPI_Barrier(communicator);
assert(ierr==0);
}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
{
MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
communicator);
int ierr=MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
communicator);
assert(ierr==0);
}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{
MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
MPI_COMM_WORLD);
int ierr= MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
MPI_COMM_WORLD);
assert(ierr==0);
}
}