mirror of
https://github.com/paboyle/Grid.git
synced 2025-09-19 17:51:04 +01:00
Merge branch 'develop' into feature/dwf-multirhs
This commit is contained in:
@@ -37,11 +37,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
#include <sys/mman.h>
|
||||
//#include <zlib.h>
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#include <zlib.h>
|
||||
#ifdef HAVE_NUMAIF_H
|
||||
#include <numaif.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -197,7 +198,46 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
ShmCommBuf = 0;
|
||||
ShmCommBufs.resize(ShmSize);
|
||||
|
||||
#if 1
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbf and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMMMAP
|
||||
char shm_name [NAME_MAX];
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
|
||||
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
|
||||
sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",GroupRank,r);
|
||||
//sprintf(shm_name,"/var/lib/hugetlbfs/group/wheel/pagesize-2MB/" "Grid_mpi3_shm_%d_%d",GroupRank,r);
|
||||
// printf("Opening file %s \n",shm_name);
|
||||
int fd=open(shm_name,O_RDWR|O_CREAT,0666);
|
||||
if ( fd == -1) {
|
||||
printf("open %s failed\n",shm_name);
|
||||
perror("open hugetlbfs");
|
||||
exit(0);
|
||||
}
|
||||
int mmap_flag = MAP_SHARED ;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag|=MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( Hugepages ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void *ptr = (void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
|
||||
if ( ptr == (void *)MAP_FAILED ) {
|
||||
printf("mmap %s failed\n",shm_name);
|
||||
perror("failed mmap"); assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
ShmCommBufs[r] =ptr;
|
||||
|
||||
}
|
||||
#endif
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
|
||||
// tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for
|
||||
// the posix shm virtual file system
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMOPEN
|
||||
char shm_name [NAME_MAX];
|
||||
if ( ShmRank == 0 ) {
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
@@ -210,11 +250,39 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
|
||||
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
|
||||
ftruncate(fd, size);
|
||||
|
||||
int mmap_flag = MAP_SHARED;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag |= MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if (Hugepages) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
|
||||
|
||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
||||
if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
ShmCommBufs[r] =ptr;
|
||||
|
||||
// Experiments; Experiments; Try to force numa domain on the shm segment if we have numaif.h
|
||||
#if 0
|
||||
//#ifdef HAVE_NUMAIF_H
|
||||
int status;
|
||||
int flags=MPOL_MF_MOVE;
|
||||
#ifdef KNL
|
||||
int nodes=1; // numa domain == MCDRAM
|
||||
// Find out if in SNC2,SNC4 mode ?
|
||||
#else
|
||||
int nodes=r; // numa domain == MPI ID
|
||||
#endif
|
||||
unsigned long count=1;
|
||||
for(uint64_t page=0;page<size;page+=4096){
|
||||
void *pages = (void *) ( page + (uint64_t)ptr );
|
||||
uint64_t *cow_it = (uint64_t *)pages; *cow_it = 1;
|
||||
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
|
||||
if (ierr && (page==0)) perror("numa relocate command failed");
|
||||
}
|
||||
#endif
|
||||
ShmCommBufs[r] =ptr;
|
||||
|
||||
}
|
||||
}
|
||||
@@ -236,21 +304,32 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
ShmCommBufs[r] =ptr;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#endif
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SHMGET SHMAT and SHM_HUGETLB flag
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMGET
|
||||
std::vector<int> shmids(ShmSize);
|
||||
|
||||
if ( ShmRank == 0 ) {
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
|
||||
key_t key = 0x4545 + r;
|
||||
if ((shmids[r]= shmget(key,size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
|
||||
key_t key = IPC_PRIVATE;
|
||||
int flags = IPC_CREAT | SHM_R | SHM_W;
|
||||
#ifdef SHM_HUGETLB
|
||||
if (Hugepages) flags|=SHM_HUGETLB;
|
||||
#endif
|
||||
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
|
||||
int errsv = errno;
|
||||
printf("Errno %d\n",errsv);
|
||||
printf("key %d\n",key);
|
||||
printf("size %lld\n",size);
|
||||
printf("flags %d\n",flags);
|
||||
perror("shmget");
|
||||
exit(1);
|
||||
} else {
|
||||
printf("shmid: 0x%x\n", shmids[r]);
|
||||
}
|
||||
printf("shmid: 0x%x\n", shmids[r]);
|
||||
}
|
||||
}
|
||||
MPI_Barrier(ShmComm);
|
||||
@@ -384,8 +463,14 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
int ierr;
|
||||
communicator=communicator_world;
|
||||
|
||||
_ndimension = processors.size();
|
||||
|
||||
communicator_halo.resize (2*_ndimension);
|
||||
for(int i=0;i<_ndimension*2;i++){
|
||||
MPI_Comm_dup(communicator,&communicator_halo[i]);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
@@ -608,13 +693,27 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
||||
}
|
||||
}
|
||||
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes,int dir)
|
||||
{
|
||||
std::vector<CommsRequest_t> list;
|
||||
double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
|
||||
StencilSendToRecvFromComplete(list,dir);
|
||||
return offbytes;
|
||||
}
|
||||
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes,int dir)
|
||||
{
|
||||
assert(dir < communicator_halo.size());
|
||||
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
|
||||
@@ -633,26 +732,26 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
gfrom = MPI_UNDEFINED;
|
||||
#endif
|
||||
if ( gfrom ==MPI_UNDEFINED) {
|
||||
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[dir],&rrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(rrq);
|
||||
off_node_bytes+=bytes;
|
||||
}
|
||||
|
||||
if ( gdest == MPI_UNDEFINED ) {
|
||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[dir],&xrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(xrq);
|
||||
off_node_bytes+=bytes;
|
||||
}
|
||||
|
||||
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||
this->StencilSendToRecvFromComplete(list);
|
||||
this->StencilSendToRecvFromComplete(list,dir);
|
||||
}
|
||||
|
||||
return off_node_bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
|
||||
{
|
||||
SendToRecvFromComplete(waitall);
|
||||
}
|
||||
|
Reference in New Issue
Block a user