mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Update to use shared memory to contain the stencil comms buffers
Tested on 2.1.1.1 1.2.1.1 4.1.1.1 1.4.1.1 2.2.1.1 subnode decompositions
This commit is contained in:
parent
ea25a4d9ac
commit
b6a65059a2
@ -156,6 +156,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
assert (norm2(err)< 1.0e-5 );
|
||||
Dw.Report();
|
||||
}
|
||||
|
||||
@ -208,7 +209,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
|
||||
|
||||
RealF sum=0;
|
||||
RealD sum=0;
|
||||
for(int x=0;x<latt4[0];x++){
|
||||
for(int y=0;y<latt4[1];y++){
|
||||
for(int z=0;z<latt4[2];z++){
|
||||
@ -226,12 +227,12 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
}}}}}
|
||||
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
|
||||
assert (sum< 1.0e-5 );
|
||||
|
||||
|
||||
if (1) {
|
||||
|
||||
LatticeFermion sr_eo(sFGrid);
|
||||
LatticeFermion serr(sFGrid);
|
||||
|
||||
LatticeFermion ssrc_e (sFrbGrid);
|
||||
LatticeFermion ssrc_o (sFrbGrid);
|
||||
@ -243,8 +244,6 @@ int main (int argc, char ** argv)
|
||||
|
||||
setCheckerboard(sr_eo,ssrc_o);
|
||||
setCheckerboard(sr_eo,ssrc_e);
|
||||
serr = sr_eo-ssrc;
|
||||
std::cout<<GridLogMessage << "EO src norm diff "<< norm2(serr)<<std::endl;
|
||||
|
||||
sr_e = zero;
|
||||
sr_o = zero;
|
||||
@ -272,9 +271,18 @@ int main (int argc, char ** argv)
|
||||
pickCheckerboard(Even,ssrc_e,sresult);
|
||||
pickCheckerboard(Odd ,ssrc_o,sresult);
|
||||
ssrc_e = ssrc_e - sr_e;
|
||||
RealD error = norm2(ssrc_e);
|
||||
|
||||
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
|
||||
ssrc_o = ssrc_o - sr_o;
|
||||
|
||||
error+= norm2(ssrc_o);
|
||||
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
|
||||
if(error>1.0e-5) {
|
||||
setCheckerboard(ssrc,ssrc_o);
|
||||
setCheckerboard(ssrc,ssrc_e);
|
||||
std::cout<< ssrc << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -306,7 +314,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
assert(norm2(err)<1.0e-5);
|
||||
LatticeFermion src_e (FrbGrid);
|
||||
LatticeFermion src_o (FrbGrid);
|
||||
LatticeFermion r_e (FrbGrid);
|
||||
@ -349,11 +357,14 @@ int main (int argc, char ** argv)
|
||||
|
||||
err = r_eo-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
assert(norm2(err)<1.0e-5);
|
||||
|
||||
pickCheckerboard(Even,src_e,err);
|
||||
pickCheckerboard(Odd,src_o,err);
|
||||
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
||||
assert(norm2(src_e)<1.0e-5);
|
||||
assert(norm2(src_o)<1.0e-5);
|
||||
|
||||
|
||||
}
|
||||
|
14
lib/Init.cc
14
lib/Init.cc
@ -171,14 +171,17 @@ std::string GridCmdVectorIntToString(const std::vector<int> & vec){
|
||||
/////////////////////////////////////////////////////////
|
||||
//
|
||||
/////////////////////////////////////////////////////////
|
||||
static int Grid_is_initialised = 0;
|
||||
|
||||
|
||||
void Grid_init(int *argc,char ***argv)
|
||||
{
|
||||
GridLogger::StopWatch.Start();
|
||||
|
||||
CartesianCommunicator::Init(argc,argv);
|
||||
|
||||
// Parse command line args.
|
||||
|
||||
GridLogger::StopWatch.Start();
|
||||
|
||||
std::string arg;
|
||||
std::vector<std::string> logstreams;
|
||||
std::string defaultLog("Error,Warning,Message,Performance");
|
||||
@ -216,11 +219,14 @@ void Grid_init(int *argc,char ***argv)
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
||||
LebesgueOrder::UseLebesgueOrder=1;
|
||||
}
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
|
||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
|
||||
GridCmdOptionIntVector(arg,LebesgueOrder::Block);
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--timestamp") ){
|
||||
GridLogTimestamp(1);
|
||||
}
|
||||
|
||||
GridParseLayout(*argv,*argc,
|
||||
Grid_default_latt,
|
||||
Grid_default_mpi);
|
||||
@ -274,6 +280,8 @@ void Grid_init(int *argc,char ***argv)
|
||||
std::cout << "GNU General Public License for more details."<<std::endl;
|
||||
std::cout << COL_BACKGROUND <<std::endl;
|
||||
std::cout << std::endl;
|
||||
|
||||
Grid_is_initialised = 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -33,6 +33,7 @@ namespace Grid {
|
||||
|
||||
void Grid_init(int *argc,char ***argv);
|
||||
void Grid_finalize(void);
|
||||
|
||||
// internal, controled with --handle
|
||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
|
||||
void Grid_debug_handler_init(void);
|
||||
@ -44,6 +45,7 @@ namespace Grid {
|
||||
const std::vector<int> &GridDefaultMpi(void);
|
||||
const int &GridThreads(void) ;
|
||||
void GridSetThreads(int t) ;
|
||||
void GridLogTimestamp(int);
|
||||
|
||||
// Common parsing chores
|
||||
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
|
||||
|
@ -34,8 +34,13 @@ directory
|
||||
namespace Grid {
|
||||
|
||||
GridStopWatch Logger::StopWatch;
|
||||
int Logger::timestamp;
|
||||
std::ostream Logger::devnull(0);
|
||||
|
||||
void GridLogTimestamp(int on){
|
||||
Logger::Timestamp(on);
|
||||
}
|
||||
|
||||
Colours GridLogColours(0);
|
||||
GridLogger GridLogError(1, "Error", GridLogColours, "RED");
|
||||
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
|
||||
|
59
lib/Log.h
59
lib/Log.h
@ -37,10 +37,11 @@
|
||||
#include <execinfo.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Dress the output; use std::chrono for time stamping via the StopWatch class
|
||||
int Rank(void); // used for early stage debug before library init
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
class Colours{
|
||||
@ -55,7 +56,6 @@ public:
|
||||
|
||||
void Active(bool activate){
|
||||
is_active=activate;
|
||||
|
||||
if (is_active){
|
||||
colour["BLACK"] ="\033[30m";
|
||||
colour["RED"] ="\033[31m";
|
||||
@ -66,21 +66,18 @@ public:
|
||||
colour["CYAN"] ="\033[36m";
|
||||
colour["WHITE"] ="\033[37m";
|
||||
colour["NORMAL"] ="\033[0;39m";
|
||||
} else {
|
||||
colour["BLACK"] ="";
|
||||
colour["RED"] ="";
|
||||
colour["GREEN"] ="";
|
||||
colour["YELLOW"]="";
|
||||
colour["BLUE"] ="";
|
||||
colour["PURPLE"]="";
|
||||
colour["CYAN"] ="";
|
||||
colour["WHITE"] ="";
|
||||
colour["NORMAL"]="";
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
} else {
|
||||
colour["BLACK"] ="";
|
||||
colour["RED"] ="";
|
||||
colour["GREEN"] ="";
|
||||
colour["YELLOW"]="";
|
||||
colour["BLUE"] ="";
|
||||
colour["PURPLE"]="";
|
||||
colour["CYAN"] ="";
|
||||
colour["WHITE"] ="";
|
||||
colour["NORMAL"]="";
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@ -88,6 +85,7 @@ class Logger {
|
||||
protected:
|
||||
Colours &Painter;
|
||||
int active;
|
||||
static int timestamp;
|
||||
std::string name, topName;
|
||||
std::string COLOUR;
|
||||
|
||||
@ -99,25 +97,28 @@ public:
|
||||
std::string evidence() {return Painter.colour["YELLOW"];}
|
||||
std::string colour() {return Painter.colour[COLOUR];}
|
||||
|
||||
Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col)
|
||||
: active(on),
|
||||
name(nm),
|
||||
topName(topNm),
|
||||
Painter(col_class),
|
||||
COLOUR(col){} ;
|
||||
Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col) : active(on),
|
||||
name(nm),
|
||||
topName(topNm),
|
||||
Painter(col_class),
|
||||
COLOUR(col) {} ;
|
||||
|
||||
void Active(int on) {active = on;};
|
||||
int isActive(void) {return active;};
|
||||
static void Timestamp(int on) {timestamp = on;};
|
||||
|
||||
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||
|
||||
if ( log.active ) {
|
||||
StopWatch.Stop();
|
||||
GridTime now = StopWatch.Elapsed();
|
||||
StopWatch.Start();
|
||||
stream << log.background()<< log.topName << log.background()<< " : ";
|
||||
stream << log.colour() <<std::setw(14) << std::left << log.name << log.background() << " : ";
|
||||
stream << log.evidence()<< now << log.background() << " : " << log.colour();
|
||||
if ( log.timestamp ) {
|
||||
StopWatch.Stop();
|
||||
GridTime now = StopWatch.Elapsed();
|
||||
StopWatch.Start();
|
||||
stream << log.evidence()<< now << log.background() << " : " ;
|
||||
}
|
||||
stream << log.colour();
|
||||
return stream;
|
||||
} else {
|
||||
return devnull;
|
||||
@ -149,7 +150,7 @@ extern void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||
|
||||
#define BACKTRACEFILE() {\
|
||||
char string[20]; \
|
||||
std::sprintf(string,"backtrace.%d",Rank()); \
|
||||
std::sprintf(string,"backtrace.%d",CartesianCommunicator::RankWorld()); \
|
||||
std::FILE * fp = std::fopen(string,"w"); \
|
||||
BACKTRACEFP(fp)\
|
||||
std::fclose(fp); \
|
||||
|
@ -1,18 +1,22 @@
|
||||
extra_sources=
|
||||
if BUILD_COMMS_MPI
|
||||
extra_sources+=communicator/Communicator_mpi.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
if BUILD_COMMS_MPI3
|
||||
extra_sources+=communicator/Communicator_mpi3.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
if BUILD_COMMS_SHMEM
|
||||
extra_sources+=communicator/Communicator_shmem.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
if BUILD_COMMS_NONE
|
||||
extra_sources+=communicator/Communicator_none.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
#
|
||||
|
@ -290,10 +290,11 @@ PARALLEL_FOR_LOOP
|
||||
// Unified Comms buffers for all directions
|
||||
///////////////////////////////////////////////////////////
|
||||
// Vectors that live on the symmetric heap in case of SHMEM
|
||||
std::vector<commVector<scalar_object> > u_simd_send_buf_hide;
|
||||
std::vector<commVector<scalar_object> > u_simd_recv_buf_hide;
|
||||
commVector<cobj> u_send_buf;
|
||||
commVector<cobj> u_recv_buf_hide;
|
||||
// std::vector<commVector<scalar_object> > u_simd_send_buf_hide;
|
||||
// std::vector<commVector<scalar_object> > u_simd_recv_buf_hide;
|
||||
// commVector<cobj> u_send_buf_hide;
|
||||
// commVector<cobj> u_recv_buf_hide;
|
||||
|
||||
// These are used; either SHM objects or refs to the above symmetric heap vectors
|
||||
// depending on comms target
|
||||
cobj* u_recv_buf_p;
|
||||
@ -439,36 +440,19 @@ PARALLEL_FOR_LOOP
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
uint8_t *shm_ptr = (uint8_t *)_grid->ShmBufferSelf();
|
||||
_grid->ShmBufferFreeAll();
|
||||
|
||||
u_simd_send_buf.resize(Nsimd);
|
||||
u_simd_recv_buf.resize(Nsimd);
|
||||
|
||||
u_send_buf.resize(_unified_buffer_size);
|
||||
|
||||
if( ShmDirectCopy && shm_ptr != NULL ) {
|
||||
|
||||
u_recv_buf_p=(cobj *)shm_ptr; shm_ptr+= _unified_buffer_size*sizeof(cobj);
|
||||
for(int l=0;l<Nsimd;l++){
|
||||
u_simd_send_buf[l] = (scalar_object *)shm_ptr; shm_ptr += _unified_buffer_size*sizeof(scalar_object);
|
||||
u_simd_recv_buf[l] = (scalar_object *)shm_ptr; shm_ptr += _unified_buffer_size*sizeof(scalar_object);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
u_recv_buf_hide.resize(_unified_buffer_size);
|
||||
u_simd_send_buf_hide.resize(Nsimd,commVector<scalar_object>(_unified_buffer_size));
|
||||
u_simd_recv_buf_hide.resize(Nsimd,commVector<scalar_object>(_unified_buffer_size));
|
||||
|
||||
u_recv_buf_p=&u_recv_buf_hide[0];
|
||||
for(int l=0;l<Nsimd;l++){
|
||||
u_simd_send_buf[l] = & u_simd_send_buf_hide[l][0];
|
||||
u_simd_recv_buf[l] = & u_simd_recv_buf_hide[l][0];
|
||||
}
|
||||
u_send_buf_p=(cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
|
||||
u_recv_buf_p=(cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
|
||||
for(int l=0;l<Nsimd;l++){
|
||||
u_simd_recv_buf[l] = (scalar_object *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(scalar_object));
|
||||
u_simd_send_buf[l] = (scalar_object *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(scalar_object));
|
||||
}
|
||||
|
||||
PrecomputeByteOffsets();
|
||||
|
||||
}
|
||||
|
||||
void Local (int point, int dimension,int shiftpm,int cbmask)
|
||||
@ -698,6 +682,7 @@ PARALLEL_FOR_LOOP
|
||||
calls++;
|
||||
Mergers.resize(0);
|
||||
Packets.resize(0);
|
||||
_grid->StencilBarrier();
|
||||
HaloGather(source,compress);
|
||||
this->CommunicateBegin(reqs);
|
||||
this->CommunicateComplete(reqs);
|
||||
@ -836,19 +821,17 @@ PARALLEL_FOR_LOOP
|
||||
// try the direct copy if possible
|
||||
/////////////////////////////////////////////////////////
|
||||
|
||||
cobj *u_send_buf_p = &u_send_buf[0];
|
||||
if (ShmDirectCopy) {
|
||||
cobj *shm = (cobj *) _grid->ShmBuffer(xmit_to_rank);
|
||||
if ( shm!=NULL) {
|
||||
u_send_buf_p = shm;
|
||||
}
|
||||
|
||||
cobj *send_buf = (cobj *)_grid->ShmBufferTranslate(xmit_to_rank,u_recv_buf_p);
|
||||
if ( (ShmDirectCopy==0)||send_buf==NULL ) {
|
||||
cobj *send_buf = u_send_buf_p;
|
||||
}
|
||||
|
||||
t_data-=usecond();
|
||||
Gather_plane_simple_table (face_table[face_idx],rhs,u_send_buf_p,compress,u_comm_offset,so); face_idx++;
|
||||
Gather_plane_simple_table (face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); face_idx++;
|
||||
t_data+=usecond();
|
||||
|
||||
AddPacket((void *)&u_send_buf_p[u_comm_offset],
|
||||
AddPacket((void *)&send_buf[u_comm_offset],
|
||||
(void *)&u_recv_buf_p[u_comm_offset],
|
||||
xmit_to_rank,
|
||||
recv_from_rank,
|
||||
@ -947,18 +930,16 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
|
||||
AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes);
|
||||
|
||||
auto shm_or_rp = rp;
|
||||
if (ShmDirectCopy) {
|
||||
scalar_object *shm = (scalar_object *) _grid->ShmBufferTranslate(xmit_to_rank,sp);
|
||||
if ( shm!=NULL) {
|
||||
shm_or_rp = shm;
|
||||
}
|
||||
}
|
||||
scalar_object *shm = (scalar_object *) _grid->ShmBufferTranslate(recv_from_rank,sp);
|
||||
if ((ShmDirectCopy==0)||(shm==NULL)) {
|
||||
shm = rp;
|
||||
}
|
||||
|
||||
rpointers[i] = shm_or_rp;
|
||||
// if Direct, StencilSendToRecvFrom will suppress copy to a peer on node
|
||||
// assuming above pointer flip
|
||||
AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes);
|
||||
|
||||
rpointers[i] = shm;
|
||||
|
||||
} else {
|
||||
|
||||
|
132
lib/communicator/Communicator_base.cc
Normal file
132
lib/communicator/Communicator_base.cc
Normal file
@ -0,0 +1,132 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/Communicator_none.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include "Grid.h"
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////
|
||||
int CartesianCommunicator::ShmRank;
|
||||
int CartesianCommunicator::ShmSize;
|
||||
int CartesianCommunicator::GroupRank;
|
||||
int CartesianCommunicator::GroupSize;
|
||||
int CartesianCommunicator::WorldRank;
|
||||
int CartesianCommunicator::WorldSize;
|
||||
int CartesianCommunicator::Slave;
|
||||
void * CartesianCommunicator::ShmCommBuf;
|
||||
|
||||
/////////////////////////////////
|
||||
// Alloc, free shmem region
|
||||
/////////////////////////////////
|
||||
void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){
|
||||
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
|
||||
void *ptr = (void *)heap_top;
|
||||
heap_top += bytes;
|
||||
heap_bytes+= bytes;
|
||||
assert(heap_bytes < MAX_MPI_SHM_BYTES);
|
||||
return ptr;
|
||||
}
|
||||
void *CartesianCommunicator::ShmBufferFreeAll(void) {
|
||||
heap_top =(size_t)ShmBufferSelf();
|
||||
heap_bytes=0;
|
||||
}
|
||||
|
||||
/////////////////////////////////
|
||||
// Grid information queries
|
||||
/////////////////////////////////
|
||||
int CartesianCommunicator::IsBoss(void) { return _processor==0; };
|
||||
int CartesianCommunicator::BossRank(void) { return 0; };
|
||||
int CartesianCommunicator::ThisRank(void) { return _processor; };
|
||||
const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; };
|
||||
const std::vector<int> & CartesianCommunicator::ProcessorGrid(void) { return _processors; };
|
||||
int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; };
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int CartesianCommunicator::RankWorld(void) { return WorldRank; };
|
||||
int CartesianCommunicator::Ranks (void) { return WorldSize; };
|
||||
int CartesianCommunicator::Nodes (void) { return GroupSize; };
|
||||
int CartesianCommunicator::Cores (void) { return ShmSize; };
|
||||
int CartesianCommunicator::NodeRank (void) { return GroupRank; };
|
||||
int CartesianCommunicator::CoreRank (void) { return ShmRank; };
|
||||
|
||||
void CartesianCommunicator::GlobalSum(ComplexF &c)
|
||||
{
|
||||
GlobalSumVector((float *)&c,2);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(ComplexF *c,int N)
|
||||
{
|
||||
GlobalSumVector((float *)c,2*N);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(ComplexD &c)
|
||||
{
|
||||
GlobalSumVector((double *)&c,2);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
|
||||
{
|
||||
GlobalSumVector((double *)c,2*N);
|
||||
}
|
||||
|
||||
#ifndef GRID_COMMS_MPI3
|
||||
|
||||
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes)
|
||||
{
|
||||
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
|
||||
{
|
||||
SendToRecvFromComplete(waitall);
|
||||
}
|
||||
void StencilBarrier(void){};
|
||||
|
||||
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
|
||||
|
||||
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
|
||||
void *CartesianCommunicator::ShmBuffer(int rank) {
|
||||
if (rank != ShmRank ) return NULL;
|
||||
else return ShmCommBuf;
|
||||
}
|
||||
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
||||
if (rank != ShmRank ) return NULL;
|
||||
else return local_p;
|
||||
}
|
||||
void CartesianCommunicator::ShmInitGeneric(void){
|
||||
ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
|
||||
ShmCommBuf=(void *)&ShmBufStorageVector[0];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -40,169 +40,188 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
#include <mpp/shmem.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
class CartesianCommunicator {
|
||||
public:
|
||||
|
||||
// 65536 ranks per node adequate for now
|
||||
// 128MB shared memory for comms enought for 48^4 local vol comms
|
||||
// Give external control (command line override?) of this
|
||||
|
||||
static const int MAXLOG2RANKSPERNODE = 16;
|
||||
static const uint64_t MAX_MPI_SHM_BYTES = 128*1024*1024;
|
||||
|
||||
// Communicator should know nothing of the physics grid, only processor grid.
|
||||
|
||||
int _Nprocessors; // How many in all
|
||||
std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.
|
||||
int _processor; // linear processor rank
|
||||
std::vector<int> _processor_coor; // linear processor coordinate
|
||||
unsigned long _ndimension;
|
||||
int _Nprocessors; // How many in all
|
||||
std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.
|
||||
int _processor; // linear processor rank
|
||||
std::vector<int> _processor_coor; // linear processor coordinate
|
||||
unsigned long _ndimension;
|
||||
|
||||
#ifdef GRID_COMMS_MPI
|
||||
MPI_Comm communicator;
|
||||
typedef MPI_Request CommsRequest_t;
|
||||
#elif GRID_COMMS_MPI3
|
||||
int shm_mode;
|
||||
|
||||
MPI_Comm communicator;
|
||||
typedef MPI_Request CommsRequest_t;
|
||||
|
||||
const int MAXLOG2RANKSPERNODE = 16; // 65536 ranks per node adequate for now
|
||||
const uint64_t MAX_MPI_SHM_BYTES = 256*1024*1024; // 256MB shared memory for comms enought for 48^4 local vol comms
|
||||
|
||||
std::vector<int> WorldDims;
|
||||
std::vector<int> GroupDims;
|
||||
std::vector<int> ShmDims;
|
||||
|
||||
std::vector<int> GroupCoor;
|
||||
std::vector<int> ShmCoor;
|
||||
std::vector<int> WorldCoor;
|
||||
|
||||
static std::vector<int> GroupRanks;
|
||||
static std::vector<int> MyGroup;
|
||||
static int ShmSetup;
|
||||
static MPI_Win ShmWindow;
|
||||
static MPI_Comm ShmComm;
|
||||
|
||||
void * ShmCommBuf;
|
||||
std::vector<void *> ShmCommBufs;
|
||||
|
||||
int WorldRank;
|
||||
int WorldSize;
|
||||
|
||||
static int ShmRank;
|
||||
static int ShmSize;
|
||||
static int GroupSize;
|
||||
static int GroupRank;
|
||||
|
||||
std::vector<int> LexicographicToWorldRank;
|
||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
|
||||
MPI_Comm communicator;
|
||||
static MPI_Comm communicator_world;
|
||||
typedef MPI_Request CommsRequest_t;
|
||||
#else
|
||||
typedef int CommsRequest_t;
|
||||
typedef int CommsRequest_t;
|
||||
#endif
|
||||
|
||||
static void Init(int *argc, char ***argv);
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Helper functionality for SHM Windows common to all other impls
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Longer term; drop this in favour of a master / slave model with
|
||||
// cartesian communicator on a subset of ranks, slave ranks controlled
|
||||
// by group leader with data xfer via shared memory
|
||||
////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_COMMS_MPI3
|
||||
std::vector<int> WorldDims;
|
||||
std::vector<int> GroupDims;
|
||||
std::vector<int> ShmDims;
|
||||
|
||||
std::vector<int> GroupCoor;
|
||||
std::vector<int> ShmCoor;
|
||||
std::vector<int> WorldCoor;
|
||||
|
||||
static std::vector<int> GroupRanks;
|
||||
static std::vector<int> MyGroup;
|
||||
static int ShmSetup;
|
||||
static MPI_Win ShmWindow;
|
||||
static MPI_Comm ShmComm;
|
||||
|
||||
std::vector<int> LexicographicToWorldRank;
|
||||
|
||||
static std::vector<void *> ShmCommBufs;
|
||||
#else
|
||||
static void ShmInitGeneric(void);
|
||||
static commVector<uint8_t> ShmBufStorageVector;
|
||||
#endif
|
||||
static void * ShmCommBuf;
|
||||
size_t heap_top;
|
||||
size_t heap_bytes;
|
||||
void *ShmBufferSelf(void);
|
||||
void *ShmBuffer(int rank);
|
||||
void *ShmBufferTranslate(int rank,void * local_p);
|
||||
void *ShmBufferMalloc(size_t bytes);
|
||||
void *ShmBufferFreeAll(void) ;
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Must call in Grid startup
|
||||
////////////////////////////////////////////////
|
||||
static void Init(int *argc, char ***argv);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Constructor of any given grid
|
||||
////////////////////////////////////////////////
|
||||
CartesianCommunicator(const std::vector<int> &pdimensions_in);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Wraps MPI_Cart routines, or implements equivalent on other impls
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
void ShiftedRanks(int dim,int shift,int & source, int & dest);
|
||||
int RankFromProcessorCoor(std::vector<int> &coor);
|
||||
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
|
||||
|
||||
/////////////////////////////////
|
||||
// Grid information and queries
|
||||
/////////////////////////////////
|
||||
static int ShmRank;
|
||||
static int ShmSize;
|
||||
static int GroupSize;
|
||||
static int GroupRank;
|
||||
static int WorldRank;
|
||||
static int WorldSize;
|
||||
static int Slave;
|
||||
|
||||
int IsBoss(void) ;
|
||||
int BossRank(void) ;
|
||||
int ThisRank(void) ;
|
||||
const std::vector<int> & ThisProcessorCoor(void) ;
|
||||
const std::vector<int> & ProcessorGrid(void) ;
|
||||
int ProcessorCount(void) ;
|
||||
static int Ranks (void);
|
||||
static int Nodes (void);
|
||||
static int Cores (void);
|
||||
static int NodeRank (void);
|
||||
static int CoreRank (void);
|
||||
|
||||
// Constructor
|
||||
CartesianCommunicator(const std::vector<int> &pdimensions_in);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
static int RankWorld(void) ;
|
||||
static void BroadcastWorld(int root,void* data, int bytes);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Reduction
|
||||
////////////////////////////////////////////////////////////
|
||||
void GlobalSum(RealF &);
|
||||
void GlobalSumVector(RealF *,int N);
|
||||
void GlobalSum(RealD &);
|
||||
void GlobalSumVector(RealD *,int N);
|
||||
void GlobalSum(uint32_t &);
|
||||
void GlobalSum(uint64_t &);
|
||||
void GlobalSum(ComplexF &c);
|
||||
void GlobalSumVector(ComplexF *c,int N);
|
||||
void GlobalSum(ComplexD &c);
|
||||
void GlobalSumVector(ComplexD *c,int N);
|
||||
|
||||
template<class obj> void GlobalSum(obj &o){
|
||||
typedef typename obj::scalar_type scalar_type;
|
||||
int words = sizeof(obj)/sizeof(scalar_type);
|
||||
scalar_type * ptr = (scalar_type *)& o;
|
||||
GlobalSumVector(ptr,words);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Face exchange, buffer swap in translational invariant way
|
||||
////////////////////////////////////////////////////////////
|
||||
void SendToRecvFrom(void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int xmit_to_rank,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||
|
||||
// Wraps MPI_Cart routines
|
||||
void ShiftedRanks(int dim,int shift,int & source, int & dest);
|
||||
int RankFromProcessorCoor(std::vector<int> &coor);
|
||||
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
|
||||
void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||
void StencilBarrier(void);
|
||||
|
||||
// Helper function for SHM Windows in MPI3
|
||||
void *ShmBufferSelf(void);
|
||||
void *ShmBuffer(int rank);
|
||||
|
||||
/////////////////////////////////
|
||||
// Grid information queries
|
||||
/////////////////////////////////
|
||||
int IsBoss(void) { return _processor==0; };
|
||||
int BossRank(void) { return 0; };
|
||||
int ThisRank(void) { return _processor; };
|
||||
const std::vector<int> & ThisProcessorCoor(void) { return _processor_coor; };
|
||||
const std::vector<int> & ProcessorGrid(void) { return _processors; };
|
||||
int ProcessorCount(void) { return _Nprocessors; };
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Reduction
|
||||
////////////////////////////////////////////////////////////
|
||||
void GlobalSum(RealF &);
|
||||
void GlobalSumVector(RealF *,int N);
|
||||
|
||||
void GlobalSum(RealD &);
|
||||
void GlobalSumVector(RealD *,int N);
|
||||
|
||||
void GlobalSum(uint32_t &);
|
||||
void GlobalSum(uint64_t &);
|
||||
|
||||
void GlobalSum(ComplexF &c)
|
||||
{
|
||||
GlobalSumVector((float *)&c,2);
|
||||
}
|
||||
void GlobalSumVector(ComplexF *c,int N)
|
||||
{
|
||||
GlobalSumVector((float *)c,2*N);
|
||||
}
|
||||
|
||||
void GlobalSum(ComplexD &c)
|
||||
{
|
||||
GlobalSumVector((double *)&c,2);
|
||||
}
|
||||
void GlobalSumVector(ComplexD *c,int N)
|
||||
{
|
||||
GlobalSumVector((double *)c,2*N);
|
||||
}
|
||||
|
||||
template<class obj> void GlobalSum(obj &o){
|
||||
typedef typename obj::scalar_type scalar_type;
|
||||
int words = sizeof(obj)/sizeof(scalar_type);
|
||||
scalar_type * ptr = (scalar_type *)& o;
|
||||
GlobalSumVector(ptr,words);
|
||||
}
|
||||
////////////////////////////////////////////////////////////
|
||||
// Face exchange, buffer swap in translational invariant way
|
||||
////////////////////////////////////////////////////////////
|
||||
void SendToRecvFrom(void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int xmit_to_rank,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||
void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
|
||||
{
|
||||
SendToRecvFromComplete(waitall);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Barrier
|
||||
////////////////////////////////////////////////////////////
|
||||
void Barrier(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Broadcast a buffer and composite larger
|
||||
////////////////////////////////////////////////////////////
|
||||
void Broadcast(int root,void* data, int bytes);
|
||||
template<class obj> void Broadcast(int root,obj &data)
|
||||
////////////////////////////////////////////////////////////
|
||||
// Barrier
|
||||
////////////////////////////////////////////////////////////
|
||||
void Barrier(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Broadcast a buffer and composite larger
|
||||
////////////////////////////////////////////////////////////
|
||||
void Broadcast(int root,void* data, int bytes);
|
||||
|
||||
template<class obj> void Broadcast(int root,obj &data)
|
||||
{
|
||||
Broadcast(root,(void *)&data,sizeof(data));
|
||||
};
|
||||
|
||||
static void BroadcastWorld(int root,void* data, int bytes);
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -30,19 +30,28 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// Should error check all MPI calls.
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Comm CartesianCommunicator::communicator_world;
|
||||
|
||||
// Should error check all MPI calls.
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
int flag;
|
||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||
if ( !flag ) {
|
||||
MPI_Init(argc,argv);
|
||||
}
|
||||
}
|
||||
|
||||
int Rank(void) {
|
||||
int pe;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&pe);
|
||||
return pe;
|
||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||
MPI_Comm_rank(communicator_world,&WorldRank);
|
||||
MPI_Comm_size(communicator_world,&WorldSize);
|
||||
ShmRank=0;
|
||||
ShmSize=1;
|
||||
GroupRank=WorldRank;
|
||||
GroupSize=WorldSize;
|
||||
Slave =0;
|
||||
ShmInitGeneric();
|
||||
}
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
@ -54,7 +63,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
_processors = processors;
|
||||
_processor_coor.resize(_ndimension);
|
||||
|
||||
MPI_Cart_create(MPI_COMM_WORLD, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
||||
MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
||||
MPI_Comm_rank(communicator,&_processor);
|
||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||
|
||||
@ -67,15 +76,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
|
||||
assert(Size==_Nprocessors);
|
||||
}
|
||||
void *CartesianCommunicator::ShmBufferSelf(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
void *CartesianCommunicator::ShmBuffer(int rank)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
@ -194,14 +194,17 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||
communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Should only be used prior to Grid Init finished.
|
||||
// Check for this?
|
||||
///////////////////////////////////////////////////////
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
{
|
||||
int ierr= MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
root,
|
||||
MPI_COMM_WORLD);
|
||||
communicator_world);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
@ -33,26 +32,197 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
namespace Grid {
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
int CartesianCommunicator::ShmSetup = 0;
|
||||
|
||||
// Global used by Init and nowhere else. How to hide?
|
||||
int Rank(void) {
|
||||
int pe;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&pe);
|
||||
return pe;
|
||||
MPI_Comm CartesianCommunicator::communicator_world;
|
||||
MPI_Comm CartesianCommunicator::ShmComm;
|
||||
MPI_Win CartesianCommunicator::ShmWindow;
|
||||
|
||||
std::vector<int> CartesianCommunicator::GroupRanks;
|
||||
std::vector<int> CartesianCommunicator::MyGroup;
|
||||
std::vector<void *> CartesianCommunicator::ShmCommBufs;
|
||||
|
||||
void *CartesianCommunicator::ShmBufferSelf(void)
|
||||
{
|
||||
return ShmCommBufs[ShmRank];
|
||||
}
|
||||
// Should error check all MPI calls.
|
||||
void *CartesianCommunicator::ShmBuffer(int rank)
|
||||
{
|
||||
int gpeer = GroupRanks[rank];
|
||||
if (gpeer == MPI_UNDEFINED){
|
||||
return NULL;
|
||||
} else {
|
||||
return ShmCommBufs[gpeer];
|
||||
}
|
||||
}
|
||||
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p)
|
||||
{
|
||||
int gpeer = GroupRanks[rank];
|
||||
if (gpeer == MPI_UNDEFINED){
|
||||
return NULL;
|
||||
} else {
|
||||
uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
|
||||
uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
|
||||
return (void *) remote;
|
||||
}
|
||||
}
|
||||
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
int flag;
|
||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||
if ( !flag ) {
|
||||
MPI_Init(argc,argv);
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Want to implement some magic ... Group sub-cubes into those on same node
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||
MPI_Comm_rank(communicator_world,&WorldRank);
|
||||
MPI_Comm_size(communicator_world,&WorldSize);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Split into groups that can share memory
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Comm_split_type(communicator_world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
|
||||
MPI_Comm_rank(ShmComm ,&ShmRank);
|
||||
MPI_Comm_size(ShmComm ,&ShmSize);
|
||||
GroupSize = WorldSize/ShmSize;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// find world ranks in our SHM group (i.e. which ranks are on our node)
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Group WorldGroup, ShmGroup;
|
||||
MPI_Comm_group (communicator_world, &WorldGroup);
|
||||
MPI_Comm_group (ShmComm, &ShmGroup);
|
||||
|
||||
std::vector<int> world_ranks(WorldSize);
|
||||
GroupRanks.resize(WorldSize);
|
||||
MyGroup.resize(ShmSize);
|
||||
for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
|
||||
|
||||
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &GroupRanks[0]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify who is in my group and noninate the leader
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int g=0;
|
||||
for(int rank=0;rank<WorldSize;rank++){
|
||||
if(GroupRanks[rank]!=MPI_UNDEFINED){
|
||||
assert(g<ShmSize);
|
||||
MyGroup[g++] = rank;
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
|
||||
int myleader = MyGroup[0];
|
||||
|
||||
std::vector<int> leaders_1hot(WorldSize,0);
|
||||
std::vector<int> leaders_group(GroupSize,0);
|
||||
leaders_1hot [ myleader ] = 1;
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// global sum leaders over comm world
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world);
|
||||
assert(ierr==0);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// find the group leaders world rank
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int group=0;
|
||||
for(int l=0;l<WorldSize;l++){
|
||||
if(leaders_1hot[l]){
|
||||
leaders_group[group++] = l;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify the rank of the group in which I (and my leader) live
|
||||
///////////////////////////////////////////////////////////////////
|
||||
GroupRank=-1;
|
||||
for(int g=0;g<GroupSize;g++){
|
||||
if (myleader == leaders_group[g]){
|
||||
GroupRank=g;
|
||||
}
|
||||
}
|
||||
assert(GroupRank!=-1);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared window for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
ShmCommBuf = 0;
|
||||
ierr = MPI_Win_allocate_shared(MAX_MPI_SHM_BYTES,1,MPI_INFO_NULL,ShmComm,&ShmCommBuf,&ShmWindow);
|
||||
assert(ierr==0);
|
||||
// KNL hack -- force to numa-domain 1 in flat
|
||||
#if 0
|
||||
for(uint64_t page=0;page<MAX_MPI_SHM_BYTES;page+=4096){
|
||||
void *pages = (void *) ( page + ShmCommBuf );
|
||||
int status;
|
||||
int flags=MPOL_MF_MOVE_ALL;
|
||||
int nodes=1; // numa domain == MCDRAM
|
||||
unsigned long count=1;
|
||||
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
|
||||
if (ierr && (page==0)) perror("numa relocate command failed");
|
||||
}
|
||||
#endif
|
||||
MPI_Win_lock_all (MPI_MODE_NOCHECK, ShmWindow);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Plan: allocate a fixed SHM region. Scratch that is just used via some scheme during stencil comms, with no allocate free.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
ShmCommBufs.resize(ShmSize);
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
MPI_Aint sz;
|
||||
int dsp_unit;
|
||||
MPI_Win_shared_query (ShmWindow, r, &sz, &dsp_unit, &ShmCommBufs[r]);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Verbose for now
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
if (WorldRank == 0){
|
||||
std::cout<<GridLogMessage<< "Grid MPI-3 configuration: detected ";
|
||||
std::cout<< WorldSize << " Ranks " ;
|
||||
std::cout<< GroupSize << " Nodes " ;
|
||||
std::cout<< ShmSize << " with ranks-per-node "<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<"Grid MPI-3 configuration: allocated shared memory region of size ";
|
||||
std::cout<<std::hex << MAX_MPI_SHM_BYTES <<" ShmCommBuf address = "<<ShmCommBuf << std::dec<<std::endl;
|
||||
|
||||
for(int g=0;g<GroupSize;g++){
|
||||
std::cout<<GridLogMessage<<" Node "<<g<<" led by MPI rank "<<leaders_group[g]<<std::endl;
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<" Boss Node Shm Pointers are {";
|
||||
for(int g=0;g<ShmSize;g++){
|
||||
std::cout<<std::hex<<ShmCommBufs[g]<<std::dec;
|
||||
if(g!=ShmSize-1) std::cout<<",";
|
||||
else std::cout<<"}"<<std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for(int g=0;g<GroupSize;g++){
|
||||
if ( (ShmRank == 0) && (GroupRank==g) ) std::cout<<GridLogMessage<<"["<<g<<"] Node Group "<<g<<" is ranks {";
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
if ( (ShmRank == 0) && (GroupRank==g) ) {
|
||||
std::cout<<MyGroup[r];
|
||||
if(r<ShmSize-1) std::cout<<",";
|
||||
else std::cout<<"}"<<std::endl;
|
||||
}
|
||||
MPI_Barrier(communicator_world);
|
||||
}
|
||||
}
|
||||
|
||||
assert(ShmSetup==0); ShmSetup=1;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Want to implement some magic ... Group sub-cubes into those on same node
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||
{
|
||||
std::vector<int> coor = _processor_coor;
|
||||
@ -80,139 +250,13 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c
|
||||
rank = LexicographicToWorldRank[rank];
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
int CartesianCommunicator::ShmSetup = 0;
|
||||
int CartesianCommunicator::ShmRank;
|
||||
int CartesianCommunicator::ShmSize;
|
||||
int CartesianCommunicator::GroupRank;
|
||||
int CartesianCommunicator::GroupSize;
|
||||
MPI_Comm CartesianCommunicator::ShmComm;
|
||||
MPI_Win CartesianCommunicator::ShmWindow;
|
||||
std::vector<int> CartesianCommunicator::GroupRanks;
|
||||
std::vector<int> CartesianCommunicator::MyGroup;
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
|
||||
_ndimension = processors.size();
|
||||
|
||||
WorldDims = processors;
|
||||
|
||||
communicator = MPI_COMM_WORLD;
|
||||
MPI_Comm_rank(communicator,&WorldRank);
|
||||
MPI_Comm_size(communicator,&WorldSize);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Plan: allocate a fixed SHM region. Scratch that is just used via some scheme during stencil comms, with no allocate free.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Does every grid need one, or could we share across all grids via a singleton/guard?
|
||||
int ierr;
|
||||
|
||||
if ( !ShmSetup ) {
|
||||
communicator=communicator_world;
|
||||
|
||||
MPI_Comm_split_type(communicator, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
|
||||
MPI_Comm_rank(ShmComm ,&ShmRank);
|
||||
MPI_Comm_size(ShmComm ,&ShmSize);
|
||||
GroupSize = WorldSize/ShmSize;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// find world ranks in our SHM group (i.e. which ranks are on our node)
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Group WorldGroup, ShmGroup;
|
||||
MPI_Comm_group (communicator, &WorldGroup);
|
||||
MPI_Comm_group (ShmComm, &ShmGroup);
|
||||
|
||||
std::vector<int> world_ranks(WorldSize);
|
||||
GroupRanks.resize(WorldSize);
|
||||
MyGroup.resize(ShmSize);
|
||||
for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
|
||||
|
||||
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &GroupRanks[0]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify who is in my group and noninate the leader
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int g=0;
|
||||
for(int rank=0;rank<WorldSize;rank++){
|
||||
if(GroupRanks[rank]!=MPI_UNDEFINED){
|
||||
assert(g<ShmSize);
|
||||
MyGroup[g++] = rank;
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(MyGroup.begin(),MyGroup.end(),std::greater<int>());
|
||||
int myleader = MyGroup[0];
|
||||
|
||||
std::vector<int> leaders_1hot(WorldSize,0);
|
||||
std::vector<int> leaders_group(GroupSize,0);
|
||||
leaders_1hot [ myleader ] = 1;
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// global sum leaders over comm world
|
||||
///////////////////////////////////////////////////////////////////
|
||||
ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// find the group leaders world rank
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int group=0;
|
||||
for(int l=0;l<WorldSize;l++){
|
||||
if(leaders_1hot[l]){
|
||||
leaders_group[group++] = l;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify the rank of the group in which I (and my leader) live
|
||||
///////////////////////////////////////////////////////////////////
|
||||
GroupRank=-1;
|
||||
for(int g=0;g<GroupSize;g++){
|
||||
if (myleader == leaders_group[g]){
|
||||
GroupRank=g;
|
||||
}
|
||||
}
|
||||
assert(GroupRank!=-1);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared window for our group
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
ShmCommBuf = 0;
|
||||
ierr = MPI_Win_allocate_shared(MAX_MPI_SHM_BYTES,1,MPI_INFO_NULL,ShmComm,&ShmCommBuf,&ShmWindow);
|
||||
assert(ierr==0);
|
||||
for(uint64_t page=0;page<MAX_MPI_SHM_BYTES;page+=4096){
|
||||
void *pages = (void *) ( page + ShmCommBuf );
|
||||
int status;
|
||||
int flags=MPOL_MF_MOVE_ALL;
|
||||
int nodes=1; // numa domain == MCDRAM
|
||||
unsigned long count=1;
|
||||
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
|
||||
if (ierr && (page==0)) perror("numa relocate command failed");
|
||||
}
|
||||
MPI_Win_lock_all (MPI_MODE_NOCHECK, ShmWindow);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Verbose for now
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage<< "MPI-3 configuration: Ranks per node "<< ShmSize ;
|
||||
std::cout<< " Nodes "<< GroupSize;
|
||||
std::cout<< " Ranks "<< WorldSize;
|
||||
std::cout<< " Shm CommBuf address"<< std::hex <<ShmCommBuf << std::dec<<std::endl;
|
||||
|
||||
// Done
|
||||
ShmSetup=1;
|
||||
|
||||
}
|
||||
|
||||
ShmCommBufs.resize(ShmSize);
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
MPI_Aint sz;
|
||||
int dsp_unit;
|
||||
MPI_Win_shared_query (ShmWindow, r, &sz, &dsp_unit, &ShmCommBufs[r]);
|
||||
}
|
||||
_ndimension = processors.size();
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
@ -232,6 +276,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
////////////////////////////////////////////////////////////////
|
||||
int dim = 0;
|
||||
|
||||
std::vector<int> WorldDims = processors;
|
||||
|
||||
ShmDims.resize(_ndimension,1);
|
||||
GroupDims.resize(_ndimension);
|
||||
|
||||
@ -346,21 +392,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void *CartesianCommunicator::ShmBufferSelf(void)
|
||||
{
|
||||
return ShmCommBufs[ShmRank];
|
||||
}
|
||||
void *CartesianCommunicator::ShmBuffer(int rank)
|
||||
{
|
||||
int gpeer = GroupRanks[rank];
|
||||
if (gpeer == MPI_UNDEFINED){
|
||||
return NULL;
|
||||
} else {
|
||||
return ShmCommBufs[gpeer];
|
||||
}
|
||||
}
|
||||
|
||||
// Basic Halo comms primitive
|
||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
@ -369,6 +400,7 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
#if 1
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
|
||||
@ -387,12 +419,11 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
||||
|
||||
sequence++;
|
||||
|
||||
char *to_ptr = (char *)ShmCommBufs[gdest];
|
||||
char *from_ptr = (char *)ShmCommBufs[ShmRank];
|
||||
|
||||
int small = (bytes<MAX_MPI_SHM_BYTES);
|
||||
|
||||
typedef vRealD T;
|
||||
typedef uint64_t T;
|
||||
int words = bytes/sizeof(T);
|
||||
|
||||
assert(((size_t)bytes &(sizeof(T)-1))==0);
|
||||
@ -400,13 +431,18 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
||||
|
||||
if ( small && (gdest !=MPI_UNDEFINED) ) {
|
||||
|
||||
char *to_ptr = (char *)ShmCommBufs[gdest];
|
||||
|
||||
assert(gme != gdest);
|
||||
|
||||
T *ip = (T *)xmit;
|
||||
T *op = (T *)to_ptr;
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int w=0;w<words;w++) {
|
||||
vstream(op[w],ip[w]);
|
||||
op[w]=ip[w];
|
||||
if ( w == 0 ) {
|
||||
// std::cout << " xmit "<< ShmRank <<" -> "<< gdest<<" " <<std::hex<<op[w]<<std::dec<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
|
||||
@ -426,7 +462,10 @@ PARALLEL_FOR_LOOP
|
||||
T *op = (T *)recv;
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int w=0;w<words;w++) {
|
||||
vstream(op[w],ip[w]);
|
||||
op[w]=ip[w];
|
||||
if ( w == 0 ) {
|
||||
// std::cout << " recv "<< ShmRank <<" <- "<< gfrom<<" " <<std::hex<<op[w]<<std::dec<<std::endl;
|
||||
}
|
||||
}
|
||||
bcopy(&from_ptr[bytes] ,&tag ,sizeof(tag));
|
||||
bcopy(&from_ptr[bytes+4],&check,sizeof(check));
|
||||
@ -441,6 +480,19 @@ PARALLEL_FOR_LOOP
|
||||
MPI_Win_sync (ShmWindow);
|
||||
MPI_Barrier (ShmComm);
|
||||
MPI_Win_sync (ShmWindow);
|
||||
#else
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
int rank = _processor;
|
||||
int ierr;
|
||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||
|
||||
assert(ierr==0);
|
||||
|
||||
list.push_back(xrq);
|
||||
list.push_back(rrq);
|
||||
#endif
|
||||
}
|
||||
|
||||
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
@ -476,19 +528,29 @@ void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_
|
||||
list.push_back(rrq);
|
||||
}
|
||||
|
||||
|
||||
StencilBarrier();
|
||||
|
||||
}
|
||||
|
||||
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
SendToRecvFromComplete(list);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::StencilBarrier(void)
|
||||
{
|
||||
MPI_Win_sync (ShmWindow);
|
||||
MPI_Barrier (ShmComm);
|
||||
MPI_Win_sync (ShmWindow);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
int nreq=list.size();
|
||||
std::vector<MPI_Status> status(nreq);
|
||||
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
@ -514,7 +576,7 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
root,
|
||||
MPI_COMM_WORLD);
|
||||
communicator_world);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
|
@ -28,18 +28,29 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include "Grid.h"
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
int CartesianCommunicator::ShmRank;
|
||||
int CartesianCommunicator::ShmSize;
|
||||
int CartesianCommunicator::GroupRank;
|
||||
int CartesianCommunicator::GroupSize;
|
||||
int CartesianCommunicator::WorldRank;
|
||||
int CartesianCommunicator::WorldSize;
|
||||
int CartesianCommunicator::Slave;
|
||||
void * CartesianCommunicator::ShmCommBuf;
|
||||
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
|
||||
|
||||
void CartesianCommunicator::Init(int *argc, char *** arv)
|
||||
{
|
||||
}
|
||||
|
||||
int Rank(void ){ return 0; };
|
||||
void *CartesianCommunicator::ShmBufferSelf(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
void *CartesianCommunicator::ShmBuffer(int rank)
|
||||
{
|
||||
return NULL;
|
||||
WorldRank = 0;
|
||||
WorldSize = 1;
|
||||
ShmRank=0;
|
||||
ShmSize=1;
|
||||
GroupRank=_WorldRank;
|
||||
GroupSize=_WorldSize;
|
||||
Slave =0;
|
||||
ShmInitGeneric();
|
||||
}
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
@ -97,30 +108,16 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
|
||||
assert(0);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::Barrier(void)
|
||||
{
|
||||
}
|
||||
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||
{
|
||||
}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void CartesianCommunicator::Barrier(void){}
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
|
||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) { return 0;}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ assert(0);}
|
||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||
{
|
||||
source =0;
|
||||
dest=0;
|
||||
}
|
||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -39,25 +39,19 @@ namespace Grid {
|
||||
BACKTRACEFILE(); \
|
||||
}\
|
||||
}
|
||||
int Rank(void) {
|
||||
return shmem_my_pe();
|
||||
}
|
||||
typedef struct HandShake_t {
|
||||
uint64_t seq_local;
|
||||
uint64_t seq_remote;
|
||||
} HandShake;
|
||||
|
||||
static Vector< HandShake > XConnections;
|
||||
static Vector< HandShake > RConnections;
|
||||
|
||||
void *CartesianCommunicator::ShmBufferSelf(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
void *CartesianCommunicator::ShmBuffer(int rank)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
int CartesianCommunicator::ShmRank;
|
||||
int CartesianCommunicator::ShmSize;
|
||||
int CartesianCommunicator::GroupRank;
|
||||
int CartesianCommunicator::GroupSize;
|
||||
int CartesianCommunicator::WorldRank;
|
||||
int CartesianCommunicator::WorldSize;
|
||||
int CartesianCommunicator::Slave;
|
||||
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
shmem_init();
|
||||
XConnections.resize(shmem_n_pes());
|
||||
@ -69,7 +63,36 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
RConnections[pe].seq_remote= 0;
|
||||
}
|
||||
shmem_barrier_all();
|
||||
ShmInitGeneric();
|
||||
}
|
||||
|
||||
|
||||
// Should error check all MPI calls.
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
int flag;
|
||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||
if ( !flag ) {
|
||||
MPI_Init(argc,argv);
|
||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||
MPI_Comm_rank(communicator_world,&_WorldRank);
|
||||
MPI_Comm_size(communicator_world,&_WorldSize);
|
||||
_ShmRank=0;
|
||||
_ShmSize=1;
|
||||
_GroupRank=_WorldRank;
|
||||
_GroupSize=_WorldSize;
|
||||
_Slave =0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
typedef struct HandShake_t {
|
||||
uint64_t seq_local;
|
||||
uint64_t seq_remote;
|
||||
} HandShake;
|
||||
|
||||
static Vector< HandShake > XConnections;
|
||||
static Vector< HandShake > RConnections;
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
_ndimension = processors.size();
|
||||
|
Loading…
Reference in New Issue
Block a user