mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
mpi, mpi3, shmem all compile.
mpi, mpi3 pass single node multi-rank
This commit is contained in:
parent
b6a65059a2
commit
b94478fa51
@ -141,7 +141,7 @@ public:
|
|||||||
|
|
||||||
if ( bcast != ptr ) {
|
if ( bcast != ptr ) {
|
||||||
std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
|
std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
|
||||||
BACKTRACEFILE();
|
// BACKTRACEFILE();
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
assert( bcast == (void *) ptr);
|
assert( bcast == (void *) ptr);
|
||||||
|
@ -32,8 +32,6 @@
|
|||||||
|
|
||||||
#include <Grid/stencil/Lebesgue.h> // subdir aggregate
|
#include <Grid/stencil/Lebesgue.h> // subdir aggregate
|
||||||
|
|
||||||
const int ShmDirectCopy = 1;
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Must not lose sight that goal is to be able to construct really efficient
|
// Must not lose sight that goal is to be able to construct really efficient
|
||||||
// gather to a point stencil code. CSHIFT is not the best way, so need
|
// gather to a point stencil code. CSHIFT is not the best way, so need
|
||||||
@ -170,13 +168,13 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
|
|||||||
reqs.resize(Packets.size());
|
reqs.resize(Packets.size());
|
||||||
commtime-=usecond();
|
commtime-=usecond();
|
||||||
for(int i=0;i<Packets.size();i++){
|
for(int i=0;i<Packets.size();i++){
|
||||||
if( ShmDirectCopy ) {
|
|
||||||
_grid->StencilSendToRecvFromBegin(reqs[i],
|
_grid->StencilSendToRecvFromBegin(reqs[i],
|
||||||
Packets[i].send_buf,
|
Packets[i].send_buf,
|
||||||
Packets[i].to_rank,
|
Packets[i].to_rank,
|
||||||
Packets[i].recv_buf,
|
Packets[i].recv_buf,
|
||||||
Packets[i].from_rank,
|
Packets[i].from_rank,
|
||||||
Packets[i].bytes);
|
Packets[i].bytes);
|
||||||
|
/*
|
||||||
}else{
|
}else{
|
||||||
_grid->SendToRecvFromBegin(reqs[i],
|
_grid->SendToRecvFromBegin(reqs[i],
|
||||||
Packets[i].send_buf,
|
Packets[i].send_buf,
|
||||||
@ -185,17 +183,19 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
|
|||||||
Packets[i].from_rank,
|
Packets[i].from_rank,
|
||||||
Packets[i].bytes);
|
Packets[i].bytes);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
commtime+=usecond();
|
commtime+=usecond();
|
||||||
}
|
}
|
||||||
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
|
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||||
{
|
{
|
||||||
commtime-=usecond();
|
commtime-=usecond();
|
||||||
|
|
||||||
for(int i=0;i<Packets.size();i++){
|
for(int i=0;i<Packets.size();i++){
|
||||||
if( ShmDirectCopy )
|
// if( ShmDirectCopy )
|
||||||
_grid->StencilSendToRecvFromComplete(reqs[i]);
|
_grid->StencilSendToRecvFromComplete(reqs[i]);
|
||||||
else
|
// else
|
||||||
_grid->SendToRecvFromComplete(reqs[i]);
|
// _grid->SendToRecvFromComplete(reqs[i]);
|
||||||
}
|
}
|
||||||
commtime+=usecond();
|
commtime+=usecond();
|
||||||
}
|
}
|
||||||
@ -253,8 +253,6 @@ PARALLEL_FOR_LOOP
|
|||||||
// Flat vector, change layout for cache friendly.
|
// Flat vector, change layout for cache friendly.
|
||||||
Vector<StencilEntry> _entries;
|
Vector<StencilEntry> _entries;
|
||||||
|
|
||||||
inline StencilEntry * GetEntry(int &ptype,int point,int osite) { ptype = _permute_type[point]; return & _entries[point+_npoints*osite]; }
|
|
||||||
|
|
||||||
void PrecomputeByteOffsets(void){
|
void PrecomputeByteOffsets(void){
|
||||||
for(int i=0;i<_entries.size();i++){
|
for(int i=0;i<_entries.size();i++){
|
||||||
if( _entries[i]._is_local ) {
|
if( _entries[i]._is_local ) {
|
||||||
@ -265,9 +263,7 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
inline uint64_t Touch(int ent) {
|
inline StencilEntry * GetEntry(int &ptype,int point,int osite) { ptype = _permute_type[point]; return & _entries[point+_npoints*osite]; }
|
||||||
// _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
|
|
||||||
}
|
|
||||||
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
|
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
|
||||||
uint64_t cbase = (uint64_t)&u_recv_buf_p[0];
|
uint64_t cbase = (uint64_t)&u_recv_buf_p[0];
|
||||||
local = _entries[ent]._is_local;
|
local = _entries[ent]._is_local;
|
||||||
@ -685,7 +681,9 @@ PARALLEL_FOR_LOOP
|
|||||||
_grid->StencilBarrier();
|
_grid->StencilBarrier();
|
||||||
HaloGather(source,compress);
|
HaloGather(source,compress);
|
||||||
this->CommunicateBegin(reqs);
|
this->CommunicateBegin(reqs);
|
||||||
|
_grid->StencilBarrier();
|
||||||
this->CommunicateComplete(reqs);
|
this->CommunicateComplete(reqs);
|
||||||
|
_grid->StencilBarrier();
|
||||||
CommsMerge(); // spins
|
CommsMerge(); // spins
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -823,11 +821,13 @@ PARALLEL_FOR_LOOP
|
|||||||
|
|
||||||
|
|
||||||
cobj *send_buf = (cobj *)_grid->ShmBufferTranslate(xmit_to_rank,u_recv_buf_p);
|
cobj *send_buf = (cobj *)_grid->ShmBufferTranslate(xmit_to_rank,u_recv_buf_p);
|
||||||
if ( (ShmDirectCopy==0)||send_buf==NULL ) {
|
if ( (send_buf==NULL) ) {
|
||||||
cobj *send_buf = u_send_buf_p;
|
send_buf = u_send_buf_p;
|
||||||
}
|
}
|
||||||
|
// std::cout << " send_bufs "<<std::hex<< send_buf <<" ubp "<<u_send_buf_p <<std::dec<<std::endl;
|
||||||
t_data-=usecond();
|
t_data-=usecond();
|
||||||
|
assert(u_send_buf_p!=NULL);
|
||||||
|
assert(send_buf!=NULL);
|
||||||
Gather_plane_simple_table (face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); face_idx++;
|
Gather_plane_simple_table (face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); face_idx++;
|
||||||
t_data+=usecond();
|
t_data+=usecond();
|
||||||
|
|
||||||
@ -931,7 +931,8 @@ PARALLEL_FOR_LOOP
|
|||||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
|
||||||
scalar_object *shm = (scalar_object *) _grid->ShmBufferTranslate(recv_from_rank,sp);
|
scalar_object *shm = (scalar_object *) _grid->ShmBufferTranslate(recv_from_rank,sp);
|
||||||
if ((ShmDirectCopy==0)||(shm==NULL)) {
|
// if ((ShmDirectCopy==0)||(shm==NULL)) {
|
||||||
|
if (shm==NULL) {
|
||||||
shm = rp;
|
shm = rp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,7 +69,7 @@ int CartesianCommunicator::ProcessorCount(void) { return
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// very VERY rarely (Log, serial RNG) we need world without a grid
|
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
int CartesianCommunicator::RankWorld(void) { return WorldRank; };
|
int CartesianCommunicator::RankWorld(void){ return WorldRank; };
|
||||||
int CartesianCommunicator::Ranks (void) { return WorldSize; };
|
int CartesianCommunicator::Ranks (void) { return WorldSize; };
|
||||||
int CartesianCommunicator::Nodes (void) { return GroupSize; };
|
int CartesianCommunicator::Nodes (void) { return GroupSize; };
|
||||||
int CartesianCommunicator::Cores (void) { return ShmSize; };
|
int CartesianCommunicator::Cores (void) { return ShmSize; };
|
||||||
@ -108,22 +108,22 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
|
|||||||
{
|
{
|
||||||
SendToRecvFromComplete(waitall);
|
SendToRecvFromComplete(waitall);
|
||||||
}
|
}
|
||||||
void StencilBarrier(void){};
|
void CartesianCommunicator::StencilBarrier(void){};
|
||||||
|
|
||||||
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
|
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
|
||||||
|
|
||||||
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
|
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
|
||||||
|
|
||||||
void *CartesianCommunicator::ShmBuffer(int rank) {
|
void *CartesianCommunicator::ShmBuffer(int rank) {
|
||||||
if (rank != ShmRank ) return NULL;
|
return NULL;
|
||||||
else return ShmCommBuf;
|
|
||||||
}
|
}
|
||||||
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
||||||
if (rank != ShmRank ) return NULL;
|
return NULL;
|
||||||
else return local_p;
|
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::ShmInitGeneric(void){
|
void CartesianCommunicator::ShmInitGeneric(void){
|
||||||
ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
|
ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
|
||||||
ShmCommBuf=(void *)&ShmBufStorageVector[0];
|
ShmCommBuf=(void *)&ShmBufStorageVector[0];
|
||||||
|
std::cout << "allocated persistent buffer"<<std::hex << ShmCommBuf << std::dec<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -400,7 +400,9 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
|||||||
int from,
|
int from,
|
||||||
int bytes)
|
int bytes)
|
||||||
{
|
{
|
||||||
#if 1
|
#if 0
|
||||||
|
this->StencilBarrier();
|
||||||
|
|
||||||
MPI_Request xrq;
|
MPI_Request xrq;
|
||||||
MPI_Request rrq;
|
MPI_Request rrq;
|
||||||
|
|
||||||
@ -440,9 +442,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
|||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int w=0;w<words;w++) {
|
for(int w=0;w<words;w++) {
|
||||||
op[w]=ip[w];
|
op[w]=ip[w];
|
||||||
if ( w == 0 ) {
|
|
||||||
// std::cout << " xmit "<< ShmRank <<" -> "<< gdest<<" " <<std::hex<<op[w]<<std::dec<<std::endl;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
|
bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
|
||||||
@ -453,9 +452,7 @@ PARALLEL_FOR_LOOP
|
|||||||
list.push_back(xrq);
|
list.push_back(xrq);
|
||||||
}
|
}
|
||||||
|
|
||||||
MPI_Win_sync (ShmWindow);
|
this->StencilBarrier();
|
||||||
MPI_Barrier (ShmComm);
|
|
||||||
MPI_Win_sync (ShmWindow);
|
|
||||||
|
|
||||||
if (small && (gfrom !=MPI_UNDEFINED) ) {
|
if (small && (gfrom !=MPI_UNDEFINED) ) {
|
||||||
T *ip = (T *)from_ptr;
|
T *ip = (T *)from_ptr;
|
||||||
@ -463,9 +460,6 @@ PARALLEL_FOR_LOOP
|
|||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int w=0;w<words;w++) {
|
for(int w=0;w<words;w++) {
|
||||||
op[w]=ip[w];
|
op[w]=ip[w];
|
||||||
if ( w == 0 ) {
|
|
||||||
// std::cout << " recv "<< ShmRank <<" <- "<< gfrom<<" " <<std::hex<<op[w]<<std::dec<<std::endl;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
bcopy(&from_ptr[bytes] ,&tag ,sizeof(tag));
|
bcopy(&from_ptr[bytes] ,&tag ,sizeof(tag));
|
||||||
bcopy(&from_ptr[bytes+4],&check,sizeof(check));
|
bcopy(&from_ptr[bytes+4],&check,sizeof(check));
|
||||||
@ -477,9 +471,8 @@ PARALLEL_FOR_LOOP
|
|||||||
list.push_back(rrq);
|
list.push_back(rrq);
|
||||||
}
|
}
|
||||||
|
|
||||||
MPI_Win_sync (ShmWindow);
|
this->StencilBarrier();
|
||||||
MPI_Barrier (ShmComm);
|
|
||||||
MPI_Win_sync (ShmWindow);
|
|
||||||
#else
|
#else
|
||||||
MPI_Request xrq;
|
MPI_Request xrq;
|
||||||
MPI_Request rrq;
|
MPI_Request rrq;
|
||||||
@ -528,9 +521,6 @@ void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_
|
|||||||
list.push_back(rrq);
|
list.push_back(rrq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
StencilBarrier();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,8 +47,8 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
|
|||||||
WorldSize = 1;
|
WorldSize = 1;
|
||||||
ShmRank=0;
|
ShmRank=0;
|
||||||
ShmSize=1;
|
ShmSize=1;
|
||||||
GroupRank=_WorldRank;
|
GroupRank=WorldRank;
|
||||||
GroupSize=_WorldSize;
|
GroupSize=WorldSize;
|
||||||
Slave =0;
|
Slave =0;
|
||||||
ShmInitGeneric();
|
ShmInitGeneric();
|
||||||
}
|
}
|
||||||
|
@ -44,13 +44,16 @@ namespace Grid {
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Info that is setup once and indept of cartesian layout
|
// Info that is setup once and indept of cartesian layout
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
int CartesianCommunicator::ShmRank;
|
|
||||||
int CartesianCommunicator::ShmSize;
|
typedef struct HandShake_t {
|
||||||
int CartesianCommunicator::GroupRank;
|
uint64_t seq_local;
|
||||||
int CartesianCommunicator::GroupSize;
|
uint64_t seq_remote;
|
||||||
int CartesianCommunicator::WorldRank;
|
} HandShake;
|
||||||
int CartesianCommunicator::WorldSize;
|
|
||||||
int CartesianCommunicator::Slave;
|
|
||||||
|
static Vector< HandShake > XConnections;
|
||||||
|
static Vector< HandShake > RConnections;
|
||||||
|
|
||||||
|
|
||||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||||
shmem_init();
|
shmem_init();
|
||||||
@ -62,37 +65,17 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
|||||||
RConnections[pe].seq_local = 0;
|
RConnections[pe].seq_local = 0;
|
||||||
RConnections[pe].seq_remote= 0;
|
RConnections[pe].seq_remote= 0;
|
||||||
}
|
}
|
||||||
|
WorldSize = shmem_n_pes();
|
||||||
|
WorldRank = shmem_my_pe();
|
||||||
|
ShmRank=0;
|
||||||
|
ShmSize=1;
|
||||||
|
GroupRank=WorldRank;
|
||||||
|
GroupSize=WorldSize;
|
||||||
|
Slave =0;
|
||||||
shmem_barrier_all();
|
shmem_barrier_all();
|
||||||
ShmInitGeneric();
|
ShmInitGeneric();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Should error check all MPI calls.
|
|
||||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
|
||||||
int flag;
|
|
||||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
|
||||||
if ( !flag ) {
|
|
||||||
MPI_Init(argc,argv);
|
|
||||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
|
||||||
MPI_Comm_rank(communicator_world,&_WorldRank);
|
|
||||||
MPI_Comm_size(communicator_world,&_WorldSize);
|
|
||||||
_ShmRank=0;
|
|
||||||
_ShmSize=1;
|
|
||||||
_GroupRank=_WorldRank;
|
|
||||||
_GroupSize=_WorldSize;
|
|
||||||
_Slave =0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct HandShake_t {
|
|
||||||
uint64_t seq_local;
|
|
||||||
uint64_t seq_remote;
|
|
||||||
} HandShake;
|
|
||||||
|
|
||||||
static Vector< HandShake > XConnections;
|
|
||||||
static Vector< HandShake > RConnections;
|
|
||||||
|
|
||||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
{
|
{
|
||||||
_ndimension = processors.size();
|
_ndimension = processors.size();
|
||||||
@ -261,12 +244,9 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
|
|||||||
|
|
||||||
if ( _processor == sender ) {
|
if ( _processor == sender ) {
|
||||||
|
|
||||||
printf("Sender SHMEM pt2pt %d -> %d\n",sender,receiver);
|
|
||||||
// Check he has posted a receive
|
// Check he has posted a receive
|
||||||
while(SendSeq->seq_remote == SendSeq->seq_local);
|
while(SendSeq->seq_remote == SendSeq->seq_local);
|
||||||
|
|
||||||
printf("Sender receive %d posted\n",sender,receiver);
|
|
||||||
|
|
||||||
// Advance our send count
|
// Advance our send count
|
||||||
seq = ++(SendSeq->seq_local);
|
seq = ++(SendSeq->seq_local);
|
||||||
|
|
||||||
@ -275,26 +255,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
|
|||||||
shmem_putmem(recv,xmit,bytes,receiver);
|
shmem_putmem(recv,xmit,bytes,receiver);
|
||||||
shmem_fence();
|
shmem_fence();
|
||||||
|
|
||||||
printf("Sender sent payload %d\n",seq);
|
|
||||||
//Notify him we're done
|
//Notify him we're done
|
||||||
shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver);
|
shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver);
|
||||||
shmem_fence();
|
shmem_fence();
|
||||||
printf("Sender ringing door bell %d\n",seq);
|
|
||||||
}
|
}
|
||||||
if ( _processor == receiver ) {
|
if ( _processor == receiver ) {
|
||||||
|
|
||||||
printf("Receiver SHMEM pt2pt %d->%d\n",sender,receiver);
|
|
||||||
// Post a receive
|
// Post a receive
|
||||||
seq = ++(RecvSeq->seq_local);
|
seq = ++(RecvSeq->seq_local);
|
||||||
shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender);
|
shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender);
|
||||||
|
|
||||||
printf("Receiver Opening letter box %d\n",seq);
|
|
||||||
|
|
||||||
|
|
||||||
// Now wait until he has advanced our reception counter
|
// Now wait until he has advanced our reception counter
|
||||||
while(RecvSeq->seq_remote != RecvSeq->seq_local);
|
while(RecvSeq->seq_remote != RecvSeq->seq_local);
|
||||||
|
|
||||||
printf("Receiver Got the mail %d\n",seq);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user