1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Added a comms benchmark

This commit is contained in:
Peter Boyle 2015-05-02 23:42:30 +01:00
parent 4a1d4f1b3c
commit f663be2a6c
5 changed files with 173 additions and 31 deletions

83
benchmarks/Grid_comms.cc Normal file
View File

@ -0,0 +1,83 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> simd_layout({1,1,2,2});
std::vector<int> mpi_layout ({1,2,2,1});
std::cout << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
int Nloop=10;
for(int lat=4;lat<=16;lat+=4){
for(int Ls=1;Ls<=16;Ls*=2){
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
int ncomm=0;
for(int i=0;i<Nloop;i++){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<CartesianCommunicator::CommsRequest_t> requests;
ncomm=0;
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
}
}
Grid.SendToRecvFromComplete(requests);
Grid.Barrier();
}
double stop=usecond();
double xbytes = Nloop*bytes*2*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start;
std::cout << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
Grid_finalize();
}

View File

@ -35,6 +35,7 @@ int main (int argc, char ** argv)
LatticeFermion src(&Grid); random(pRNG,src); LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero; LatticeFermion result(&Grid); result=zero;
LatticeFermion ref(&Grid); ref=zero; LatticeFermion ref(&Grid); ref=zero;
LatticeFermion err(&Grid);
LatticeFermion tmp(&Grid); tmp=zero; LatticeFermion tmp(&Grid); tmp=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu); LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid); std::vector<LatticeColourMatrix> U(4,&Grid);
@ -82,12 +83,14 @@ int main (int argc, char ** argv)
} }
double t1=usecond(); double t1=usecond();
double flops=1320*volume*ncall; double flops=1320*volume*ncall;
std::cout << "Called Dw"<<std::endl; std::cout << "Called Dw"<<std::endl;
std::cout << "norm result "<< norm2(result)<<std::endl; std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl; std::cout << "norm ref "<< norm2(ref)<<std::endl;
std::cout << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref -result;
std::cout << "norm diff "<< norm2(err)<<std::endl;
// for(int ss=0;ss<10;ss++ ){ // for(int ss=0;ss<10;ss++ ){
for(int ss=0;ss<0;ss++ ){ for(int ss=0;ss<0;ss++ ){
@ -100,8 +103,5 @@ int main (int argc, char ** argv)
} }
} }
ref = ref -result;
std::cout << "norm diff "<< norm2(ref)<<std::endl;
Grid_finalize(); Grid_finalize();
} }

View File

@ -21,6 +21,9 @@ class CartesianCommunicator {
#ifdef GRID_COMMS_MPI #ifdef GRID_COMMS_MPI
MPI_Comm communicator; MPI_Comm communicator;
typedef MPI_Request CommsRequest_t;
#else
typedef int CommsRequest_t;
#endif #endif
// Constructor // Constructor
@ -77,13 +80,20 @@ class CartesianCommunicator {
GlobalSumVector(ptr,words); GlobalSumVector(ptr,words);
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
// Face exchange // Face exchange, buffer swap in translational invariant way
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
void SendToRecvFrom(void *xmit, void SendToRecvFrom(void *xmit,
int xmit_to_rank, int xmit_to_rank,
void *recv, void *recv,
int recv_from_rank, int recv_from_rank,
int bytes); int bytes);
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
// Barrier // Barrier

View File

@ -20,7 +20,7 @@ void CartesianCommunicator::GlobalSum(double &){}
void CartesianCommunicator::GlobalSum(uint32_t &){} void CartesianCommunicator::GlobalSum(uint32_t &){}
void CartesianCommunicator::GlobalSumVector(double *,int N){} void CartesianCommunicator::GlobalSumVector(double *,int N){}
// Basic Halo comms primitive // Basic Halo comms primitive -- should never call in single node
void CartesianCommunicator::SendToRecvFrom(void *xmit, void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest, int dest,
void *recv, void *recv,
@ -29,6 +29,19 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
{ {
exit(-1); exit(-1);
} }
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
exit(-1);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
exit(-1);
}
void CartesianCommunicator::Barrier(void) void CartesianCommunicator::Barrier(void)
{ {

View File

@ -29,37 +29,45 @@ CartesianCommunicator::CartesianCommunicator(std::vector<int> &processors)
} }
void CartesianCommunicator::GlobalSum(uint32_t &u){ void CartesianCommunicator::GlobalSum(uint32_t &u){
MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
} }
void CartesianCommunicator::GlobalSum(float &f){ void CartesianCommunicator::GlobalSum(float &f){
MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
} }
void CartesianCommunicator::GlobalSumVector(float *f,int N) void CartesianCommunicator::GlobalSumVector(float *f,int N)
{ {
MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
} }
void CartesianCommunicator::GlobalSum(double &d) void CartesianCommunicator::GlobalSum(double &d)
{ {
MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
} }
void CartesianCommunicator::GlobalSumVector(double *d,int N) void CartesianCommunicator::GlobalSumVector(double *d,int N)
{ {
MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
} }
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{ {
MPI_Cart_shift(communicator,dim,shift,&source,&dest); int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
assert(ierr==0);
} }
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{ {
int rank; int rank;
MPI_Cart_rank (communicator, &coor[0], &rank); int ierr=MPI_Cart_rank (communicator, &coor[0], &rank);
assert(ierr==0);
return rank; return rank;
} }
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor) void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{ {
coor.resize(_ndimension); coor.resize(_ndimension);
MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
assert(ierr==0);
} }
// Basic Halo comms primitive // Basic Halo comms primitive
@ -69,36 +77,64 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
int from, int from,
int bytes) int bytes)
{ {
MPI_Request reqs[2]; std::vector<CommsRequest_t> reqs(0);
MPI_Status OkeyDokey[2]; SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor; int rank = _processor;
MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&reqs[0]); int ierr;
MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&reqs[1]); ierr=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
MPI_Waitall(2,reqs,OkeyDokey); ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
int nreq=list.size();
std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
} }
void CartesianCommunicator::Barrier(void) void CartesianCommunicator::Barrier(void)
{ {
MPI_Barrier(communicator); int ierr = MPI_Barrier(communicator);
assert(ierr==0);
} }
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
{ {
MPI_Bcast(data, int ierr=MPI_Bcast(data,
bytes, bytes,
MPI_BYTE, MPI_BYTE,
root, root,
communicator); communicator);
assert(ierr==0);
} }
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{ {
MPI_Bcast(data, int ierr= MPI_Bcast(data,
bytes, bytes,
MPI_BYTE, MPI_BYTE,
root, root,
MPI_COMM_WORLD); MPI_COMM_WORLD);
assert(ierr==0);
} }
} }