1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Adding a better controlled threading class, preparing to

force in deterministic reduction.
This commit is contained in:
Peter Boyle 2015-05-11 18:59:03 +01:00
parent f5dcca7b1b
commit 22d384b07d
14 changed files with 199 additions and 97 deletions

View File

@ -8,11 +8,8 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size;
std::vector<int> simd_layout;
std::vector<int> mpi_layout;
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
std::vector<int> simd_layout = GridDefaultSimd();
std::vector<int> mpi_layout = GridDefaultMpi();
int Nloop=10;
int nmu=0;

View File

@ -8,17 +8,14 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> tmp_latt_size;
std::vector<int> simd_layout;
std::vector<int> mpi_layout;
GridParseLayout(argv,argc,tmp_latt_size,simd_layout,mpi_layout);
const int Nvec=8;
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
int Nloop=1000;
std::vector<int> simd_layout = GridDefaultSimd();
std::vector<int> mpi_layout = GridDefaultMpi();
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking fused AXPY bandwidth"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;

View File

@ -20,13 +20,12 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size;
std::vector<int> simd_layout;
std::vector<int> mpi_layout;
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd();
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);

View File

@ -45,8 +45,11 @@
#include <Grid_aligned_allocator.h>
#include <Grid_simd.h>
#include <Grid_math.h>
#include <Grid_threads.h>
#include <Grid_cartesian.h>
#include <Grid_math.h>
#include <Grid_lattice.h>
#include <Grid_comparison.h>
#include <Grid_cshift.h>
@ -60,6 +63,7 @@ namespace Grid {
void Grid_init(int *argc,char ***argv);
void Grid_finalize(void);
// internal, controled with --handle
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
void Grid_debug_handler_init(void);
void Grid_quiesce_nodes(void);
@ -68,6 +72,11 @@ namespace Grid {
// C++11 time facilities better?
double usecond(void);
const std::vector<int> &GridDefaultSimd(void);
const std::vector<int> &GridDefaultLatt(void);
const std::vector<int> &GridDefaultMpi(void);
const int &GridThreads(void) ;
void GridSetThreads(int t) ;
// Common parsing chores
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
@ -75,8 +84,8 @@ namespace Grid {
void GridParseIntVector(std::string &str,std::vector<int> & vec);
void GridParseLayout(char **argv,int argc,
std::vector<int> &simd,
std::vector<int> &latt,
std::vector<int> &simd,
std::vector<int> &mpi);

View File

@ -1,5 +1,5 @@
/****************************************************************************/
/* PAB: Signal magic. Processor state dump is x86-64 specific */
/* pab: Signal magic. Processor state dump is x86-64 specific */
/****************************************************************************/
#include <stdlib.h>
@ -23,23 +23,25 @@
namespace Grid {
void Grid_quiesce_nodes(void)
{
#ifdef GRID_COMMS_MPI
int me;
MPI_Comm_rank(MPI_COMM_WORLD,&me);
if ( me ) {
std::cout.setstate(std::ios::badbit);
}
#endif
}
void Grid_unquiesce_nodes(void)
{
#ifdef GRID_COMMS_MPI
std::cout.clear();
#endif
}
//////////////////////////////////////////////////////
// Convenience functions to access stadard command line arg
// driven parallelism controls
//////////////////////////////////////////////////////
static std::vector<int> Grid_default_simd;
static std::vector<int> Grid_default_latt;
static std::vector<int> Grid_default_mpi;
int GridThread::_threads;
const std::vector<int> &GridDefaultSimd(void) {return Grid_default_simd;};
const std::vector<int> &GridDefaultLatt(void) {return Grid_default_latt;};
const std::vector<int> &GridDefaultMpi(void) {return Grid_default_mpi;};
////////////////////////////////////////////////////////////
// Command line parsing assist for stock controls
////////////////////////////////////////////////////////////
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option)
{
char ** itr = std::find(begin, end, option);
@ -53,15 +55,6 @@ bool GridCmdOptionExists(char** begin, char** end, const std::string& option)
{
return std::find(begin, end, option) != end;
}
void Grid_init(int *argc,char ***argv)
{
#ifdef GRID_COMMS_MPI
MPI_Init(argc,argv);
#endif
// Parse command line args.
Grid_quiesce_nodes();
}
void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
{
@ -70,7 +63,7 @@ void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
int i;
while (ss >> i){
vec.push_back(i);
if (ss.peek() == ',')
if(std::ispunct(ss.peek()))
ss.ignore();
}
return;
@ -94,38 +87,74 @@ void GridParseLayout(char **argv,int argc,
simd=std::vector<int>({1,2,2,2});
#endif
GridThread::SetMaxThreads();
std::string arg;
if( GridCmdOptionExists(argv,argv+argc,"--mpi") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--mpi");
GridCmdOptionIntVector(arg,mpi);
std::cout<<"MPI ";
for(int i=0;i<mpi.size();i++){
std::cout<<mpi[i]<<" ";
}
std::cout<<std::endl;
}
if( GridCmdOptionExists(argv,argv+argc,"--simd") ){
arg= GridCmdOptionPayload(argv,argv+argc,"--simd");
GridCmdOptionIntVector(arg,simd);
std::cout<<"SIMD ";
for(int i=0;i<simd.size();i++){
std::cout<<simd[i]<<" ";
}
std::cout<<std::endl;
}
if( GridCmdOptionExists(argv,argv+argc,"--grid") ){
arg= GridCmdOptionPayload(argv,argv+argc,"--grid");
GridCmdOptionIntVector(arg,latt);
std::cout<<"Grid ";
for(int i=0;i<latt.size();i++){
std::cout<<latt[i]<<" ";
}
std::cout<<std::endl;
}
if( GridCmdOptionExists(argv,argv+argc,"--omp") ){
std::vector<int> ompthreads(0);
arg= GridCmdOptionPayload(argv,argv+argc,"--omp");
GridCmdOptionIntVector(arg,ompthreads);
assert(ompthreads.size()==1);
GridThread::SetThreads(ompthreads[0]);
}
}
/////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////
void Grid_init(int *argc,char ***argv)
{
#ifdef GRID_COMMS_MPI
MPI_Init(argc,argv);
#endif
// Parse command line args.
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init();
}
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
Grid_quiesce_nodes();
}
GridParseLayout(*argv,*argc,
Grid_default_latt,
Grid_default_simd,
Grid_default_mpi);
}
////////////////////////////////////////////////////////////
// Verbose limiter on MPI tasks
////////////////////////////////////////////////////////////
void Grid_quiesce_nodes(void)
{
#ifdef GRID_COMMS_MPI
int me;
MPI_Comm_rank(MPI_COMM_WORLD,&me);
if ( me ) {
std::cout.setstate(std::ios::badbit);
}
#endif
}
void Grid_unquiesce_nodes(void)
{
#ifdef GRID_COMMS_MPI
std::cout.clear();
#endif
}
void Grid_finalize(void)
{
@ -146,14 +175,14 @@ void * Grid_backtrace_buffer[_NBACKTRACE];
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
{
printf("Caught signal %d\n",si->si_signo);
printf(" mem address %lx\n",(uint64_t)si->si_addr);
printf(" mem address %llx\n",(unsigned long long)si->si_addr);
printf(" code %d\n",si->si_code);
#ifdef __X86_64
ucontext_t * uc= (ucontext_t *)ptr;
struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
printf(" instruction %llx\n",(uint64_t)sc->rip);
#define REG(A) printf(" %s %lx\n",#A, sc-> A);
printf(" instruction %llx\n",(unsigned long long)sc->rip);
#define REG(A) printf(" %s %lx\n",#A,sc-> A);
REG(rdi);
REG(rsi);
REG(rbp);

80
lib/Grid_threads.h Normal file
View File

@ -0,0 +1,80 @@
#ifndef GRID_THREADS_H
#define GRID_THREADS_H
#ifdef HAVE_OPENMP
#include <omp.h>
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for")
#define PARALLEL_NESTED_LOOP(n) _Pragma("omp parallel for collapse(" #n ")")
#else
#define PARALLEL_FOR_LOOP
#define PARALLEL_NESTED_LOOP(n)
#endif
namespace Grid {
// Introduce a class to gain deterministic bit reproducible reduction.
// make static; perhaps just a namespace is required.
class GridThread {
public:
static int _threads;
static void SetThreads(int thr) {
#ifdef HAVE_OPENMP
_threads = MIN(thr,omp_get_max_threads()) ;
omp_set_num_threads(_threads);
#else
_threads = 1;
#endif
};
static void SetMaxThreads(void) {
#ifdef HAVE_OPENMP
_threads = omp_get_max_threads();
omp_set_num_threads(_threads);
#else
_threads = 1;
#endif
};
static int GetThreads(void) { return _threads; };
static int SumArraySize(void) {return _threads;};
static void GetWork(int nwork, int me, int & mywork, int & myoff){
int basework = nwork/_threads;
int backfill = _threads-(nwork%_threads);
if ( me >= _threads ) {
mywork = myoff = 0;
} else {
mywork = (nwork+me)/_threads;
myoff = basework * me;
if ( me > backfill )
myoff+= (me-backfill);
}
return;
};
static void GetWorkBarrier(int nwork, int &me, int & mywork, int & myoff){
me = ThreadBarrier();
GetWork(nwork,me,mywork,myoff);
};
static int ThreadBarrier(void) {
#ifdef HAVE_OPENMP
#pragma omp barrier
return omp_get_thread_num();
#else
return 0;
#endif
};
template<class obj> static void ThreadSum( std::vector<obj> &sum_array,obj &val,int me){
sum_array[me] = val;
val=zero;
ThreadBarrier();
for(int i=0;i<_threads;i++) val+= sum_array[i];
ThreadBarrier();
};
};
}
#endif

View File

@ -14,7 +14,7 @@ namespace Grid{
// int _processor; // linear processor rank
// std::vector<int> _processor_coor; // linear processor rank
//////////////////////////////////////////////////////////////////////
class GridBase : public CartesianCommunicator {
class GridBase : public CartesianCommunicator , public GridThread {
public:
@ -22,7 +22,8 @@ public:
template<class object> friend class Lattice;
GridBase(std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
// Physics Grid information.
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
std::vector<int> _fdimensions;// Global dimensions of array prior to cb removal

View File

@ -46,7 +46,7 @@ public:
};
GridRedBlackCartesian(std::vector<int> &dimensions,
std::vector<int> &simd_layout,
std::vector<int> &processor_grid) : GridBase(processor_grid)
std::vector<int> &processor_grid ) : GridBase(processor_grid)
{
///////////////////////
// Grid information

View File

@ -8,11 +8,10 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> simd_layout;
std::vector<int> mpi_layout;
std::vector<int> latt_size;
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd();
std::vector<int> mpi_layout = GridDefaultMpi();
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice();

View File

@ -14,12 +14,10 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size;
std::vector<int> simd_layout;
std::vector<int> mpi_layout;
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd();
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid);

View File

@ -25,11 +25,10 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size;
std::vector<int> simd_layout;
std::vector<int> mpi_layout;
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd();
std::vector<int> mpi_layout = GridDefaultMpi();
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
latt_size.resize(4);
#ifdef AVX512

View File

@ -10,12 +10,9 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> tmp_latt_size;
std::vector<int> simd_layout;
std::vector<int> mpi_layout;
GridParseLayout(argv,argc,tmp_latt_size,simd_layout,mpi_layout);
std::vector<int> simd_layout = GridDefaultSimd();
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> latt_size ({16,16,16,32});
std::vector<int> clatt_size ({4,4,4,8});
int orthodir=3;

View File

@ -106,11 +106,9 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size;
std::vector<int> simd_layout;
std::vector<int> mpi_layout;
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd();
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});

View File

@ -8,11 +8,10 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size;
std::vector<int> simd_layout;
std::vector<int> mpi_layout;
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd();
std::vector<int> mpi_layout = GridDefaultMpi();
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];