mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	mpi, mpi3, shmem all compile.
mpi, mpi3 pass single node multi-rank
This commit is contained in:
		@@ -141,7 +141,7 @@ public:
 | 
			
		||||
 | 
			
		||||
    if ( bcast != ptr ) {
 | 
			
		||||
      std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
 | 
			
		||||
      BACKTRACEFILE();
 | 
			
		||||
      //      BACKTRACEFILE();
 | 
			
		||||
      exit(0);
 | 
			
		||||
    }
 | 
			
		||||
    assert( bcast == (void *) ptr);
 | 
			
		||||
 
 | 
			
		||||
@@ -32,8 +32,6 @@
 | 
			
		||||
 | 
			
		||||
 #include <Grid/stencil/Lebesgue.h>   // subdir aggregate
 | 
			
		||||
 | 
			
		||||
const int ShmDirectCopy = 1;
 | 
			
		||||
 | 
			
		||||
 //////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 // Must not lose sight that goal is to be able to construct really efficient
 | 
			
		||||
 // gather to a point stencil code. CSHIFT is not the best way, so need
 | 
			
		||||
@@ -170,13 +168,13 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
 | 
			
		||||
    reqs.resize(Packets.size());
 | 
			
		||||
    commtime-=usecond();
 | 
			
		||||
    for(int i=0;i<Packets.size();i++){
 | 
			
		||||
      if( ShmDirectCopy ) {
 | 
			
		||||
	_grid->StencilSendToRecvFromBegin(reqs[i],
 | 
			
		||||
					  Packets[i].send_buf,
 | 
			
		||||
					  Packets[i].to_rank,
 | 
			
		||||
					  Packets[i].recv_buf,
 | 
			
		||||
					  Packets[i].from_rank,
 | 
			
		||||
					  Packets[i].bytes);
 | 
			
		||||
	/*
 | 
			
		||||
      }else{
 | 
			
		||||
	_grid->SendToRecvFromBegin(reqs[i],
 | 
			
		||||
				   Packets[i].send_buf,
 | 
			
		||||
@@ -185,17 +183,19 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
 | 
			
		||||
				   Packets[i].from_rank,
 | 
			
		||||
				   Packets[i].bytes);
 | 
			
		||||
      }
 | 
			
		||||
	*/
 | 
			
		||||
    }
 | 
			
		||||
    commtime+=usecond();
 | 
			
		||||
  }
 | 
			
		||||
  void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
 | 
			
		||||
  {
 | 
			
		||||
    commtime-=usecond();
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i<Packets.size();i++){
 | 
			
		||||
      if( ShmDirectCopy ) 
 | 
			
		||||
      //      if( ShmDirectCopy ) 
 | 
			
		||||
	_grid->StencilSendToRecvFromComplete(reqs[i]);
 | 
			
		||||
      else 
 | 
			
		||||
	_grid->SendToRecvFromComplete(reqs[i]);
 | 
			
		||||
	//      else 
 | 
			
		||||
	//	_grid->SendToRecvFromComplete(reqs[i]);
 | 
			
		||||
    }
 | 
			
		||||
    commtime+=usecond();
 | 
			
		||||
  }
 | 
			
		||||
@@ -253,8 +253,6 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
  // Flat vector, change layout for cache friendly.
 | 
			
		||||
  Vector<StencilEntry>  _entries;
 | 
			
		||||
  
 | 
			
		||||
  inline StencilEntry * GetEntry(int &ptype,int point,int osite) { ptype = _permute_type[point]; return & _entries[point+_npoints*osite]; }
 | 
			
		||||
  
 | 
			
		||||
  void PrecomputeByteOffsets(void){
 | 
			
		||||
    for(int i=0;i<_entries.size();i++){
 | 
			
		||||
      if( _entries[i]._is_local ) {
 | 
			
		||||
@@ -265,9 +263,7 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  inline uint64_t Touch(int ent) {
 | 
			
		||||
    //	 _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
 | 
			
		||||
  }
 | 
			
		||||
  inline StencilEntry * GetEntry(int &ptype,int point,int osite) { ptype = _permute_type[point]; return & _entries[point+_npoints*osite]; }
 | 
			
		||||
  inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
 | 
			
		||||
    uint64_t cbase = (uint64_t)&u_recv_buf_p[0];
 | 
			
		||||
    local = _entries[ent]._is_local;
 | 
			
		||||
@@ -685,7 +681,9 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    _grid->StencilBarrier();
 | 
			
		||||
    HaloGather(source,compress);
 | 
			
		||||
    this->CommunicateBegin(reqs);
 | 
			
		||||
    _grid->StencilBarrier();
 | 
			
		||||
    this->CommunicateComplete(reqs);
 | 
			
		||||
    _grid->StencilBarrier();
 | 
			
		||||
    CommsMerge(); // spins
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
@@ -823,11 +821,13 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	cobj *send_buf = (cobj *)_grid->ShmBufferTranslate(xmit_to_rank,u_recv_buf_p);
 | 
			
		||||
	if ( (ShmDirectCopy==0)||send_buf==NULL ) { 
 | 
			
		||||
	  cobj *send_buf = u_send_buf_p;
 | 
			
		||||
	if ( (send_buf==NULL) ) { 
 | 
			
		||||
	  send_buf = u_send_buf_p;
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	//	std::cout << " send_bufs  "<<std::hex<< send_buf <<" ubp "<<u_send_buf_p <<std::dec<<std::endl;
 | 
			
		||||
	t_data-=usecond();
 | 
			
		||||
	assert(u_send_buf_p!=NULL);
 | 
			
		||||
	assert(send_buf!=NULL);
 | 
			
		||||
	Gather_plane_simple_table         (face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so);  face_idx++;
 | 
			
		||||
	t_data+=usecond();
 | 
			
		||||
	
 | 
			
		||||
@@ -931,7 +931,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
	    _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); 
 | 
			
		||||
 
 | 
			
		||||
	    scalar_object *shm = (scalar_object *) _grid->ShmBufferTranslate(recv_from_rank,sp);
 | 
			
		||||
	    if ((ShmDirectCopy==0)||(shm==NULL)) { 
 | 
			
		||||
	    //	    if ((ShmDirectCopy==0)||(shm==NULL)) { 
 | 
			
		||||
	    if (shm==NULL) { 
 | 
			
		||||
	      shm = rp;
 | 
			
		||||
	    } 
 | 
			
		||||
	    
 | 
			
		||||
 
 | 
			
		||||
@@ -69,7 +69,7 @@ int                      CartesianCommunicator::ProcessorCount(void)    { return
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// very VERY rarely (Log, serial RNG) we need world without a grid
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
int  CartesianCommunicator::RankWorld(void) { return WorldRank; };
 | 
			
		||||
int  CartesianCommunicator::RankWorld(void){ return WorldRank; };
 | 
			
		||||
int CartesianCommunicator::Ranks    (void) { return WorldSize; };
 | 
			
		||||
int CartesianCommunicator::Nodes    (void) { return GroupSize; };
 | 
			
		||||
int CartesianCommunicator::Cores    (void) { return ShmSize;   };
 | 
			
		||||
@@ -108,22 +108,22 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
 | 
			
		||||
{
 | 
			
		||||
  SendToRecvFromComplete(waitall);
 | 
			
		||||
}
 | 
			
		||||
void StencilBarrier(void){};
 | 
			
		||||
void CartesianCommunicator::StencilBarrier(void){};
 | 
			
		||||
 | 
			
		||||
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
 | 
			
		||||
 | 
			
		||||
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
 | 
			
		||||
 | 
			
		||||
void *CartesianCommunicator::ShmBuffer(int rank) {
 | 
			
		||||
  if (rank != ShmRank ) return NULL;
 | 
			
		||||
  else                  return ShmCommBuf;
 | 
			
		||||
  return NULL;
 | 
			
		||||
}
 | 
			
		||||
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { 
 | 
			
		||||
  if (rank != ShmRank ) return NULL;
 | 
			
		||||
  else                  return local_p;
 | 
			
		||||
  return NULL;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::ShmInitGeneric(void){
 | 
			
		||||
  ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
 | 
			
		||||
  ShmCommBuf=(void *)&ShmBufStorageVector[0];
 | 
			
		||||
  std::cout << "allocated persistent buffer"<<std::hex << ShmCommBuf << std::dec<<std::endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -400,7 +400,9 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
#if 1
 | 
			
		||||
#if 0
 | 
			
		||||
  this->StencilBarrier();
 | 
			
		||||
 | 
			
		||||
  MPI_Request xrq;
 | 
			
		||||
  MPI_Request rrq;
 | 
			
		||||
  
 | 
			
		||||
@@ -440,9 +442,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
 | 
			
		||||
PARALLEL_FOR_LOOP 
 | 
			
		||||
    for(int w=0;w<words;w++) {
 | 
			
		||||
      op[w]=ip[w];
 | 
			
		||||
      if ( w == 0 ) { 
 | 
			
		||||
	//	std::cout << " xmit "<< ShmRank <<" -> "<< gdest<<" " <<std::hex<<op[w]<<std::dec<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
 | 
			
		||||
@@ -452,20 +451,15 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    assert(ierr==0);
 | 
			
		||||
    list.push_back(xrq);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  MPI_Win_sync (ShmWindow);   
 | 
			
		||||
  MPI_Barrier  (ShmComm);
 | 
			
		||||
  MPI_Win_sync (ShmWindow);   
 | 
			
		||||
 | 
			
		||||
  this->StencilBarrier();
 | 
			
		||||
  
 | 
			
		||||
  if (small && (gfrom !=MPI_UNDEFINED) ) {
 | 
			
		||||
    T *ip = (T *)from_ptr;
 | 
			
		||||
    T *op = (T *)recv;
 | 
			
		||||
PARALLEL_FOR_LOOP 
 | 
			
		||||
    for(int w=0;w<words;w++) {
 | 
			
		||||
      op[w]=ip[w];
 | 
			
		||||
      if ( w == 0 ) { 
 | 
			
		||||
	//	std::cout << " recv "<< ShmRank <<" <- "<< gfrom<<" " <<std::hex<<op[w]<<std::dec<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    bcopy(&from_ptr[bytes]  ,&tag  ,sizeof(tag));
 | 
			
		||||
    bcopy(&from_ptr[bytes+4],&check,sizeof(check));
 | 
			
		||||
@@ -477,9 +471,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    list.push_back(rrq);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  MPI_Win_sync (ShmWindow);   
 | 
			
		||||
  MPI_Barrier  (ShmComm);
 | 
			
		||||
  MPI_Win_sync (ShmWindow);   
 | 
			
		||||
  this->StencilBarrier();
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
  MPI_Request xrq;
 | 
			
		||||
  MPI_Request rrq;
 | 
			
		||||
@@ -528,9 +521,6 @@ void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_
 | 
			
		||||
    list.push_back(rrq);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  StencilBarrier();
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -47,8 +47,8 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
 | 
			
		||||
  WorldSize = 1;
 | 
			
		||||
  ShmRank=0;
 | 
			
		||||
  ShmSize=1;
 | 
			
		||||
  GroupRank=_WorldRank;
 | 
			
		||||
  GroupSize=_WorldSize;
 | 
			
		||||
  GroupRank=WorldRank;
 | 
			
		||||
  GroupSize=WorldSize;
 | 
			
		||||
  Slave    =0;
 | 
			
		||||
  ShmInitGeneric();
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -44,13 +44,16 @@ namespace Grid {
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Info that is setup once and indept of cartesian layout
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
int CartesianCommunicator::ShmRank;
 | 
			
		||||
int CartesianCommunicator::ShmSize;
 | 
			
		||||
int CartesianCommunicator::GroupRank;
 | 
			
		||||
int CartesianCommunicator::GroupSize;
 | 
			
		||||
int CartesianCommunicator::WorldRank;
 | 
			
		||||
int CartesianCommunicator::WorldSize;
 | 
			
		||||
int CartesianCommunicator::Slave;
 | 
			
		||||
 | 
			
		||||
typedef struct HandShake_t { 
 | 
			
		||||
  uint64_t seq_local;
 | 
			
		||||
  uint64_t seq_remote;
 | 
			
		||||
} HandShake;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static Vector< HandShake > XConnections;
 | 
			
		||||
static Vector< HandShake > RConnections;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
			
		||||
  shmem_init();
 | 
			
		||||
@@ -62,37 +65,17 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
			
		||||
    RConnections[pe].seq_local = 0;
 | 
			
		||||
    RConnections[pe].seq_remote= 0;
 | 
			
		||||
  }
 | 
			
		||||
  WorldSize = shmem_n_pes();
 | 
			
		||||
  WorldRank = shmem_my_pe();
 | 
			
		||||
  ShmRank=0;
 | 
			
		||||
  ShmSize=1;
 | 
			
		||||
  GroupRank=WorldRank;
 | 
			
		||||
  GroupSize=WorldSize;
 | 
			
		||||
  Slave    =0;
 | 
			
		||||
  shmem_barrier_all();
 | 
			
		||||
  ShmInitGeneric();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Should error check all MPI calls.
 | 
			
		||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
			
		||||
  int flag;
 | 
			
		||||
  MPI_Initialized(&flag); // needed to coexist with other libs apparently
 | 
			
		||||
  if ( !flag ) {
 | 
			
		||||
    MPI_Init(argc,argv);
 | 
			
		||||
    MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
 | 
			
		||||
    MPI_Comm_rank(communicator_world,&_WorldRank);
 | 
			
		||||
    MPI_Comm_size(communicator_world,&_WorldSize);
 | 
			
		||||
    _ShmRank=0;
 | 
			
		||||
    _ShmSize=1;
 | 
			
		||||
    _GroupRank=_WorldRank;
 | 
			
		||||
    _GroupSize=_WorldSize;
 | 
			
		||||
    _Slave    =0;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct HandShake_t { 
 | 
			
		||||
  uint64_t seq_local;
 | 
			
		||||
  uint64_t seq_remote;
 | 
			
		||||
} HandShake;
 | 
			
		||||
 | 
			
		||||
static Vector< HandShake > XConnections;
 | 
			
		||||
static Vector< HandShake > RConnections;
 | 
			
		||||
 | 
			
		||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
{
 | 
			
		||||
  _ndimension = processors.size();
 | 
			
		||||
@@ -261,12 +244,9 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
 | 
			
		||||
  if ( _processor == sender ) {
 | 
			
		||||
 | 
			
		||||
    printf("Sender SHMEM pt2pt %d -> %d\n",sender,receiver);
 | 
			
		||||
    // Check he has posted a receive
 | 
			
		||||
    while(SendSeq->seq_remote == SendSeq->seq_local);
 | 
			
		||||
 | 
			
		||||
    printf("Sender receive %d posted\n",sender,receiver);
 | 
			
		||||
 | 
			
		||||
    // Advance our send count
 | 
			
		||||
    seq = ++(SendSeq->seq_local);
 | 
			
		||||
    
 | 
			
		||||
@@ -275,26 +255,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
    shmem_putmem(recv,xmit,bytes,receiver);
 | 
			
		||||
    shmem_fence();
 | 
			
		||||
 | 
			
		||||
    printf("Sender sent payload %d\n",seq);
 | 
			
		||||
    //Notify him we're done
 | 
			
		||||
    shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver);
 | 
			
		||||
    shmem_fence();
 | 
			
		||||
    printf("Sender ringing door bell  %d\n",seq);
 | 
			
		||||
  }
 | 
			
		||||
  if ( _processor == receiver ) {
 | 
			
		||||
 | 
			
		||||
    printf("Receiver SHMEM pt2pt %d->%d\n",sender,receiver);
 | 
			
		||||
    // Post a receive
 | 
			
		||||
    seq = ++(RecvSeq->seq_local);
 | 
			
		||||
    shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender);
 | 
			
		||||
 | 
			
		||||
    printf("Receiver Opening letter box %d\n",seq);
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    // Now wait until he has advanced our reception counter
 | 
			
		||||
    while(RecvSeq->seq_remote != RecvSeq->seq_local);
 | 
			
		||||
 | 
			
		||||
    printf("Receiver Got the mail %d\n",seq);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user