1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Internal SHM comms in non-simd directions working

Need to fix simd directions
This commit is contained in:
azusayamaguchi
2016-10-22 18:14:27 +01:00
parent 0fcd2e7188
commit c190221fd3
16 changed files with 1729 additions and 1739 deletions

View File

@ -80,7 +80,6 @@ class CartesianCommunicator {
void * ShmCommBuf;
std::vector<void *> ShmCommBufs;
std::vector<void *> ShmStencilBufs;
int WorldRank;
int WorldSize;
@ -105,6 +104,10 @@ class CartesianCommunicator {
int RankFromProcessorCoor(std::vector<int> &coor);
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
// Helper function for SHM Windows in MPI3
void *ShmBufferSelf(void);
void *ShmBuffer(int rank);
/////////////////////////////////
// Grid information queries
/////////////////////////////////
@ -173,6 +176,16 @@ class CartesianCommunicator {
int recv_from_rank,
int bytes);
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
{
SendToRecvFromComplete(waitall);
}
////////////////////////////////////////////////////////////
// Barrier

View File

@ -67,6 +67,14 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
assert(Size==_Nprocessors);
}
void *CartesianCommunicator::ShmBufferSelf(void)
{
return NULL;
}
void *CartesianCommunicator::ShmBuffer(int rank)
{
return NULL;
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);

View File

@ -197,10 +197,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Verbose for now
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout<< "Ranks per node "<< ShmSize << std::endl;
std::cout<< "Nodes "<< GroupSize << std::endl;
std::cout<< "Ranks "<< WorldSize << std::endl;
std::cout<< "Shm CommBuf "<< ShmCommBuf << std::endl;
std::cout<<GridLogMessage<< "MPI-3 configuration: Ranks per node "<< ShmSize ;
std::cout<< " Nodes "<< GroupSize;
std::cout<< " Ranks "<< WorldSize;
std::cout<< " Shm CommBuf address"<< std::hex <<ShmCommBuf << std::dec<<std::endl;
// Done
ShmSetup=1;
@ -208,12 +208,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
}
ShmCommBufs.resize(ShmSize);
ShmStencilBufs.resize(ShmSize);
for(int r=0;r<ShmSize;r++){
MPI_Aint sz;
int dsp_unit;
MPI_Win_shared_query (ShmWindow, r, &sz, &dsp_unit, &ShmCommBufs[r]);
ShmStencilBufs[r] = (void *) ((uint64_t)ShmCommBufs[r]+MAX_MPI_SHM_BYTES/4);
}
////////////////////////////////////////////////////////////////
@ -240,6 +238,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
ShmCoor.resize(_ndimension);
GroupCoor.resize(_ndimension);
WorldCoor.resize(_ndimension);
for(int l2=0;l2<log2size;l2++){
while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
ShmDims[dim]*=2;
@ -347,6 +346,21 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
}
}
void *CartesianCommunicator::ShmBufferSelf(void)
{
return ShmCommBufs[ShmRank];
}
void *CartesianCommunicator::ShmBuffer(int rank)
{
int gpeer = GroupRanks[rank];
if (gpeer == MPI_UNDEFINED){
return NULL;
} else {
return ShmCommBufs[gpeer];
}
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
@ -355,13 +369,11 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int from,
int bytes)
{
#undef SHM_USE_BCOPY
MPI_Request xrq;
MPI_Request rrq;
static int sequence;
int rank = _processor;
int ierr;
int tag;
int check;
@ -370,6 +382,7 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
assert(from != _processor);
int gdest = GroupRanks[dest];
int gfrom = GroupRanks[from];
int gme = GroupRanks[_processor];
sequence++;
@ -379,30 +392,23 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int small = (bytes<MAX_MPI_SHM_BYTES);
#ifndef SHM_USE_BCOPY
typedef vRealD T;
int words = bytes/sizeof(T);
assert(((size_t)bytes &(sizeof(T)-1))==0);
// assert(((size_t)xmit &(sizeof(T)-1))==0);
// assert(((size_t)recv &(sizeof(T)-1))==0);
#endif
assert(((size_t)bytes &(sizeof(T)-1))==0);
assert(gme == ShmRank);
// std::cerr << "proc dest from gme gdest "<<_processor<<" "<<dest <<" "<< from <<" "<<gme<<" "<< gdest<<std::endl; Barrier();
if ( small && (dest !=MPI_UNDEFINED) ) {
if ( small && (gdest !=MPI_UNDEFINED) ) {
assert(gme != gdest);
#ifdef SHM_USE_BCOPY
bcopy(xmit,to_ptr,bytes);
#else
T *ip = (T *)xmit;
T *op = (T *)to_ptr;
PARALLEL_FOR_LOOP
PARALLEL_FOR_LOOP
for(int w=0;w<words;w++) {
vstream(op[w],ip[w]);
}
#endif
bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
bcopy(& sequence,&to_ptr[bytes+4],sizeof(sequence));
} else {
@ -411,24 +417,17 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
list.push_back(xrq);
}
// std::cout << "Syncing "<<std::endl; Barrier();
MPI_Win_sync (ShmWindow);
MPI_Barrier (ShmComm);
MPI_Win_sync (ShmWindow);
// std::cout << "Receiving "<<std::endl; Barrier();
if (small && (from !=MPI_UNDEFINED) ) {
#ifdef SHM_USE_BCOPY
bcopy(from_ptr,recv,bytes);
#else
if (small && (gfrom !=MPI_UNDEFINED) ) {
T *ip = (T *)from_ptr;
T *op = (T *)recv;
PARALLEL_FOR_LOOP
PARALLEL_FOR_LOOP
for(int w=0;w<words;w++) {
vstream(op[w],ip[w]);
}
#endif
bcopy(&from_ptr[bytes] ,&tag ,sizeof(tag));
bcopy(&from_ptr[bytes+4],&check,sizeof(check));
assert(check==sequence);
@ -438,28 +437,52 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
assert(ierr==0);
list.push_back(rrq);
}
// std::cout << "Syncing"<<std::endl; Barrier();
MPI_Win_sync (ShmWindow);
MPI_Barrier (ShmComm);
MPI_Win_sync (ShmWindow);
}
#if 0
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int ierr;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
#endif
int ierr;
assert(dest != _processor);
assert(from != _processor);
int gdest = GroupRanks[dest];
int gfrom = GroupRanks[from];
int gme = GroupRanks[_processor];
assert(gme == ShmRank);
if ( gdest == MPI_UNDEFINED ) {
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
assert(ierr==0);
list.push_back(xrq);
}
if ( gfrom ==MPI_UNDEFINED) {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(rrq);
}
MPI_Win_sync (ShmWindow);
MPI_Barrier (ShmComm);
MPI_Win_sync (ShmWindow);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
int nreq=list.size();

View File

@ -33,6 +33,14 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
}
int Rank(void ){ return 0; };
void *CartesianCommunicator::ShmBufferSelf(void)
{
return NULL;
}
void *CartesianCommunicator::ShmBuffer(int rank)
{
return NULL;
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{

View File

@ -50,6 +50,14 @@ typedef struct HandShake_t {
static Vector< HandShake > XConnections;
static Vector< HandShake > RConnections;
void *CartesianCommunicator::ShmBufferSelf(void)
{
return NULL;
}
void *CartesianCommunicator::ShmBuffer(int rank)
{
return NULL;
}
void CartesianCommunicator::Init(int *argc, char ***argv) {
shmem_init();
XConnections.resize(shmem_n_pes());