mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Shmem comms [NO MPI] target added. The dwf test runs and passes.
Not really shaken out to my satisfaction though as I want more tests done, so don't declare as working. But committing my current while I try a few experimentals.
This commit is contained in:
parent
294dbf1bf0
commit
41c2b09184
2
configure
vendored
2
configure
vendored
@ -5898,7 +5898,7 @@ _ACEOF
|
||||
echo Configuring for SHMEM communications
|
||||
|
||||
cat >>confdefs.h <<\_ACEOF
|
||||
#define GRID_COMMS_MPI 1
|
||||
#define GRID_COMMS_SHMEM 1
|
||||
_ACEOF
|
||||
|
||||
;;
|
||||
|
@ -180,7 +180,7 @@ case ${ac_COMMS} in
|
||||
;;
|
||||
shmem)
|
||||
echo Configuring for SHMEM communications
|
||||
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_SHMEM] )
|
||||
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
||||
|
@ -75,18 +75,30 @@ public:
|
||||
|
||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||
|
||||
pointer allocate(size_type __n, const void* = 0)
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
|
||||
_Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
|
||||
|
||||
|
||||
#define PARANOID_SYMMETRIC_HEAP
|
||||
#ifdef PARANOID_SYMMETRIC_HEAP
|
||||
static void * bcast;
|
||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
||||
|
||||
shmem_barrier_all();
|
||||
_Tp *ptr = (_Tp *) shmem_align(128,__n*sizeof(_Tp));
|
||||
shmem_barrier_all();
|
||||
bcast = (void *) _Tp;
|
||||
bcast = (void *) ptr;
|
||||
shmem_broadcast32((void *)&bcast,(void *)&bcast,sizeof(void *)/4,0,0,0,shmem_n_pes(),psync);
|
||||
assert( bcast == (void *) _Tp);
|
||||
|
||||
if ( bcast != ptr ) {
|
||||
std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
|
||||
BACKTRACEFILE();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
assert( bcast == (void *) ptr);
|
||||
|
||||
#endif
|
||||
#else
|
||||
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
|
@ -15,12 +15,15 @@
|
||||
/* EMPTY_SIMD only for DEBUGGING */
|
||||
#undef EMPTY_SIMD
|
||||
|
||||
/* GRID_COMMS_SHMEM */
|
||||
/* GRID_COMMS_MPI */
|
||||
#undef GRID_COMMS_MPI
|
||||
|
||||
/* GRID_COMMS_NONE */
|
||||
#undef GRID_COMMS_NONE
|
||||
|
||||
/* GRID_COMMS_SHMEM */
|
||||
#undef GRID_COMMS_SHMEM
|
||||
|
||||
/* GRID_DEFAULT_PRECISION is DOUBLE */
|
||||
#undef GRID_DEFAULT_PRECISION_DOUBLE
|
||||
|
||||
|
@ -37,4 +37,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#ifdef GRID_COMMS_MPI
|
||||
#include <cshift/Cshift_mpi.h>
|
||||
#endif
|
||||
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
#include <cshift/Cshift_mpi.h> // uses same implementation of communicator
|
||||
#endif
|
||||
#endif
|
||||
|
@ -47,9 +47,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
#define __X86_64
|
||||
|
||||
#ifdef HAVE_EXECINFO_H
|
||||
#include <execinfo.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
@ -174,9 +171,8 @@ std::string GridCmdVectorIntToString(const std::vector<int> & vec){
|
||||
/////////////////////////////////////////////////////////
|
||||
void Grid_init(int *argc,char ***argv)
|
||||
{
|
||||
#ifdef GRID_COMMS_MPI
|
||||
MPI_Init(argc,argv);
|
||||
#endif
|
||||
CartesianCommunicator::Init(argc,argv);
|
||||
|
||||
// Parse command line args.
|
||||
|
||||
GridLogger::StopWatch.Start();
|
||||
@ -284,7 +280,6 @@ double usecond(void) {
|
||||
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
|
||||
}
|
||||
|
||||
#define _NBACKTRACE (256)
|
||||
void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||
|
||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
||||
|
@ -78,13 +78,16 @@ void GridLogConfigure(std::vector<std::string> &logstreams)
|
||||
////////////////////////////////////////////////////////////
|
||||
void Grid_quiesce_nodes(void)
|
||||
{
|
||||
#ifdef GRID_COMMS_MPI
|
||||
int me;
|
||||
#ifdef GRID_COMMS_MPI
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&me);
|
||||
#endif
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
me = shmem_my_pe();
|
||||
#endif
|
||||
if ( me ) {
|
||||
std::cout.setstate(std::ios::badbit);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Grid_unquiesce_nodes(void)
|
||||
|
36
lib/Log.h
36
lib/Log.h
@ -29,9 +29,15 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LOG_H
|
||||
#define GRID_LOG_H
|
||||
|
||||
#ifdef HAVE_EXECINFO_H
|
||||
#include <execinfo.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// Dress the output; use std::chrono for time stamping via the StopWatch class
|
||||
int Rank(void); // used for early stage debug before library init
|
||||
|
||||
|
||||
class Logger {
|
||||
@ -89,5 +95,35 @@ extern GridLogger GridLogPerformance;
|
||||
extern GridLogger GridLogIterative ;
|
||||
extern GridLogger GridLogIntegrator ;
|
||||
|
||||
|
||||
#define _NBACKTRACE (256)
|
||||
extern void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||
|
||||
#ifdef HAVE_EXECINFO_H
|
||||
#define BACKTRACEFILE() {\
|
||||
char string[20]; \
|
||||
std::sprintf(string,"backtrace.%d",Rank()); \
|
||||
std::FILE * fp = std::fopen(string,"w"); \
|
||||
BACKTRACEFP(fp)\
|
||||
std::fclose(fp); \
|
||||
}
|
||||
#define BACKTRACE() BACKTRACE(std::stdout)
|
||||
#define BACKTRACEFP(fp) { \
|
||||
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\
|
||||
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\
|
||||
for (int i = 0; i < symbols; i++){\
|
||||
std::fprintf (fp,"BackTrace Strings: %d %s\n",i, strings[i]); std::fflush(fp); \
|
||||
}\
|
||||
}
|
||||
#else
|
||||
#define BACKTRACE() BACKTRACE(std::stdout);
|
||||
|
||||
#define BACKTRACEFP(fp) { \
|
||||
for (int i = 0; i < 4; i++){\
|
||||
std::fprintf (fp,"BT %d %lx\n",i, __builtin_return_address(i); std::fflush(fp); \
|
||||
}\
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -114,15 +114,19 @@ namespace Grid {
|
||||
}
|
||||
|
||||
void Communicate(void ) {
|
||||
typedef CartesianCommunicator::CommsRequest_t CommsRequest_t;
|
||||
std::vector<CommsRequest_t> reqs(0);
|
||||
commtime-=usecond();
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
_grid->SendToRecvFrom(Packets[i].send_buf,
|
||||
_grid->SendToRecvFromBegin(reqs,
|
||||
Packets[i].send_buf,
|
||||
Packets[i].to_rank,
|
||||
Packets[i].recv_buf,
|
||||
Packets[i].from_rank,
|
||||
Packets[i].bytes);
|
||||
Packets[i].done = 1;
|
||||
}
|
||||
_grid->SendToRecvFromComplete(reqs);
|
||||
commtime+=usecond();
|
||||
}
|
||||
|
||||
@ -648,7 +652,7 @@ PARALLEL_FOR_LOOP
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
_grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
assert (xmit_to_rank != _grid->ThisRank());
|
||||
assert (xmit_to_rank != _grid->ThisRank());
|
||||
assert (recv_from_rank != _grid->ThisRank());
|
||||
|
||||
// FIXME Implement asynchronous send & also avoid buffer copy
|
||||
|
@ -56,6 +56,8 @@ class CartesianCommunicator {
|
||||
typedef int CommsRequest_t;
|
||||
#endif
|
||||
|
||||
static void Init(int *argc, char ***argv);
|
||||
|
||||
// Constructor
|
||||
CartesianCommunicator(const std::vector<int> &pdimensions_in);
|
||||
|
||||
|
@ -31,6 +31,15 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
namespace Grid {
|
||||
|
||||
// Should error check all MPI calls.
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
MPI_Init(argc,argv);
|
||||
}
|
||||
|
||||
int Rank(void) {
|
||||
int pe;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&pe);
|
||||
return pe;
|
||||
}
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
|
@ -28,6 +28,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include "Grid.h"
|
||||
namespace Grid {
|
||||
|
||||
void CartesianCommunicator::Init(int *argc, char *** arv)
|
||||
{
|
||||
}
|
||||
|
||||
int Rank(void ){ return 0 };
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
_processors = processors;
|
||||
|
@ -31,7 +31,20 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
namespace Grid {
|
||||
|
||||
// Should error check all MPI calls.
|
||||
#define SHMEM_VET(addr)
|
||||
|
||||
#define SHMEM_VET_DEBUG(addr) { \
|
||||
if ( ! shmem_addr_accessible(addr,_processor) ) {\
|
||||
std::fprintf(stderr,"%d Inaccessible shmem address %lx %s %s\n",_processor,addr,__FUNCTION__,#addr); \
|
||||
BACKTRACEFILE(); \
|
||||
}\
|
||||
}
|
||||
int Rank(void) {
|
||||
return shmem_my_pe();
|
||||
}
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
shmem_init();
|
||||
}
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
_ndimension = processors.size();
|
||||
@ -41,8 +54,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
_processors = processors;
|
||||
_processor_coor.resize(_ndimension);
|
||||
|
||||
// shmem_init_thread(SHMEM_THREAD_FUNNELED);
|
||||
start_pes(0);
|
||||
_processor = shmem_my_pe();
|
||||
|
||||
Lexicographic::CoorFromIndex(_processor_coor,_processor,_processors);
|
||||
@ -50,10 +61,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
for(int i=0;i<_ndimension;i++){
|
||||
_Nprocessors*=_processors[i];
|
||||
}
|
||||
if ( _processor == 0 ) {
|
||||
printf("I'm running SHMEM communications %d \n",_processor);
|
||||
}
|
||||
|
||||
int Size = shmem_n_pes();
|
||||
|
||||
|
||||
assert(Size==_Nprocessors);
|
||||
}
|
||||
|
||||
@ -85,6 +96,12 @@ void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
||||
static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
||||
|
||||
// Inefficient, but don't want to dynamic alloc
|
||||
if ( shmem_addr_accessible(f,_processor) ){
|
||||
shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync);
|
||||
return;
|
||||
}
|
||||
|
||||
for(int i=0;i<N;i++){
|
||||
source = f[i];
|
||||
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
||||
@ -108,6 +125,11 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
||||
static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
||||
|
||||
if ( shmem_addr_accessible(d,_processor) ){
|
||||
shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync);
|
||||
return;
|
||||
}
|
||||
|
||||
for(int i=0;i<N;i++){
|
||||
source = d[i];
|
||||
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
||||
@ -117,12 +139,13 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||
{
|
||||
std::vector<int> coor = _processor_coor;
|
||||
|
||||
assert(std::abs(shift) <_processors[dim]);
|
||||
|
||||
coor[dim] = (coor[dim] + shift + _processors[dim])%_processors[dim];
|
||||
coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
|
||||
Lexicographic::IndexFromCoor(coor,source,_processors);
|
||||
|
||||
coor[dim] = (coor[dim] - shift + _processors[dim])%_processors[dim];
|
||||
coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
|
||||
Lexicographic::IndexFromCoor(coor,dest,_processors);
|
||||
|
||||
}
|
||||
@ -144,6 +167,8 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
SHMEM_VET(xmit);
|
||||
SHMEM_VET(recv);
|
||||
std::vector<CommsRequest_t> reqs(0);
|
||||
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||
SendToRecvFromComplete(reqs);
|
||||
@ -171,6 +196,9 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
SHMEM_VET(xmit);
|
||||
SHMEM_VET(recv);
|
||||
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
|
||||
shmem_putmem(recv,xmit,bytes,dest);
|
||||
}
|
||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
@ -185,14 +213,37 @@ void CartesianCommunicator::Barrier(void)
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||
{
|
||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
||||
static uint32_t word;
|
||||
uint32_t *array = (uint32_t *) data;
|
||||
assert( (bytes % 4)==0);
|
||||
shmem_broadcast32(data,data,bytes/4,root,0,0,_Nprocessors,psync);
|
||||
int words = bytes/4;
|
||||
|
||||
for(int w=0;w<words;w++){
|
||||
word = array[w];
|
||||
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
|
||||
if ( shmem_my_pe() != root ) {
|
||||
array[w] = word;
|
||||
}
|
||||
shmem_barrier_all();
|
||||
}
|
||||
|
||||
}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
{
|
||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
||||
static uint32_t word;
|
||||
uint32_t *array = (uint32_t *) data;
|
||||
assert( (bytes % 4)==0);
|
||||
shmem_broadcast32(data,data,bytes/4,root,0,0,shmem_n_pes(),psync);
|
||||
int words = bytes/4;
|
||||
|
||||
for(int w=0;w<words;w++){
|
||||
word = array[w];
|
||||
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
|
||||
if ( shmem_my_pe() != root ) {
|
||||
array[w]= word;
|
||||
}
|
||||
shmem_barrier_all();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -191,8 +191,9 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||
int words = sizeof(vobj)/sizeof(vector_type);
|
||||
|
||||
std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
||||
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
||||
std::vector<Vector<scalar_object> > send_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) );
|
||||
std::vector<Vector<scalar_object> > recv_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) );
|
||||
|
||||
int bytes = buffer_size*sizeof(scalar_object);
|
||||
|
||||
std::vector<scalar_object *> pointers(Nsimd); //
|
||||
|
Loading…
Reference in New Issue
Block a user