mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Adding a better controlled threading class, preparing to
force in deterministic reduction.
This commit is contained in:
parent
f5dcca7b1b
commit
22d384b07d
@ -8,11 +8,8 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size;
|
||||
std::vector<int> simd_layout;
|
||||
std::vector<int> mpi_layout;
|
||||
|
||||
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
|
||||
std::vector<int> simd_layout = GridDefaultSimd();
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
int Nloop=10;
|
||||
int nmu=0;
|
||||
|
@ -8,17 +8,14 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> tmp_latt_size;
|
||||
std::vector<int> simd_layout;
|
||||
std::vector<int> mpi_layout;
|
||||
|
||||
GridParseLayout(argv,argc,tmp_latt_size,simd_layout,mpi_layout);
|
||||
|
||||
const int Nvec=8;
|
||||
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
||||
|
||||
int Nloop=1000;
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd();
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
std::cout << "===================================================================================================="<<std::endl;
|
||||
std::cout << "= Benchmarking fused AXPY bandwidth"<<std::endl;
|
||||
std::cout << "===================================================================================================="<<std::endl;
|
||||
|
@ -20,13 +20,12 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size;
|
||||
std::vector<int> simd_layout;
|
||||
std::vector<int> mpi_layout;
|
||||
|
||||
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd();
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
|
||||
GridParallelRNG pRNG(&Grid);
|
||||
|
13
lib/Grid.h
13
lib/Grid.h
@ -45,8 +45,11 @@
|
||||
|
||||
#include <Grid_aligned_allocator.h>
|
||||
#include <Grid_simd.h>
|
||||
#include <Grid_math.h>
|
||||
#include <Grid_threads.h>
|
||||
|
||||
#include <Grid_cartesian.h>
|
||||
|
||||
#include <Grid_math.h>
|
||||
#include <Grid_lattice.h>
|
||||
#include <Grid_comparison.h>
|
||||
#include <Grid_cshift.h>
|
||||
@ -60,6 +63,7 @@ namespace Grid {
|
||||
|
||||
void Grid_init(int *argc,char ***argv);
|
||||
void Grid_finalize(void);
|
||||
// internal, controled with --handle
|
||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
|
||||
void Grid_debug_handler_init(void);
|
||||
void Grid_quiesce_nodes(void);
|
||||
@ -68,6 +72,11 @@ namespace Grid {
|
||||
// C++11 time facilities better?
|
||||
double usecond(void);
|
||||
|
||||
const std::vector<int> &GridDefaultSimd(void);
|
||||
const std::vector<int> &GridDefaultLatt(void);
|
||||
const std::vector<int> &GridDefaultMpi(void);
|
||||
const int &GridThreads(void) ;
|
||||
void GridSetThreads(int t) ;
|
||||
|
||||
// Common parsing chores
|
||||
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
|
||||
@ -75,8 +84,8 @@ namespace Grid {
|
||||
void GridParseIntVector(std::string &str,std::vector<int> & vec);
|
||||
|
||||
void GridParseLayout(char **argv,int argc,
|
||||
std::vector<int> &simd,
|
||||
std::vector<int> &latt,
|
||||
std::vector<int> &simd,
|
||||
std::vector<int> &mpi);
|
||||
|
||||
|
||||
|
119
lib/Grid_init.cc
119
lib/Grid_init.cc
@ -1,5 +1,5 @@
|
||||
/****************************************************************************/
|
||||
/* PAB: Signal magic. Processor state dump is x86-64 specific */
|
||||
/* pab: Signal magic. Processor state dump is x86-64 specific */
|
||||
/****************************************************************************/
|
||||
|
||||
#include <stdlib.h>
|
||||
@ -23,23 +23,25 @@
|
||||
|
||||
namespace Grid {
|
||||
|
||||
void Grid_quiesce_nodes(void)
|
||||
{
|
||||
#ifdef GRID_COMMS_MPI
|
||||
int me;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&me);
|
||||
if ( me ) {
|
||||
std::cout.setstate(std::ios::badbit);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
void Grid_unquiesce_nodes(void)
|
||||
{
|
||||
#ifdef GRID_COMMS_MPI
|
||||
std::cout.clear();
|
||||
#endif
|
||||
}
|
||||
//////////////////////////////////////////////////////
|
||||
// Convenience functions to access stadard command line arg
|
||||
// driven parallelism controls
|
||||
//////////////////////////////////////////////////////
|
||||
static std::vector<int> Grid_default_simd;
|
||||
static std::vector<int> Grid_default_latt;
|
||||
static std::vector<int> Grid_default_mpi;
|
||||
|
||||
int GridThread::_threads;
|
||||
|
||||
|
||||
const std::vector<int> &GridDefaultSimd(void) {return Grid_default_simd;};
|
||||
const std::vector<int> &GridDefaultLatt(void) {return Grid_default_latt;};
|
||||
const std::vector<int> &GridDefaultMpi(void) {return Grid_default_mpi;};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Command line parsing assist for stock controls
|
||||
////////////////////////////////////////////////////////////
|
||||
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option)
|
||||
{
|
||||
char ** itr = std::find(begin, end, option);
|
||||
@ -53,15 +55,6 @@ bool GridCmdOptionExists(char** begin, char** end, const std::string& option)
|
||||
{
|
||||
return std::find(begin, end, option) != end;
|
||||
}
|
||||
void Grid_init(int *argc,char ***argv)
|
||||
{
|
||||
#ifdef GRID_COMMS_MPI
|
||||
MPI_Init(argc,argv);
|
||||
#endif
|
||||
// Parse command line args.
|
||||
Grid_quiesce_nodes();
|
||||
|
||||
}
|
||||
|
||||
void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
|
||||
{
|
||||
@ -70,7 +63,7 @@ void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
|
||||
int i;
|
||||
while (ss >> i){
|
||||
vec.push_back(i);
|
||||
if (ss.peek() == ',')
|
||||
if(std::ispunct(ss.peek()))
|
||||
ss.ignore();
|
||||
}
|
||||
return;
|
||||
@ -94,39 +87,75 @@ void GridParseLayout(char **argv,int argc,
|
||||
simd=std::vector<int>({1,2,2,2});
|
||||
#endif
|
||||
|
||||
GridThread::SetMaxThreads();
|
||||
|
||||
std::string arg;
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--mpi") ){
|
||||
arg = GridCmdOptionPayload(argv,argv+argc,"--mpi");
|
||||
GridCmdOptionIntVector(arg,mpi);
|
||||
std::cout<<"MPI ";
|
||||
for(int i=0;i<mpi.size();i++){
|
||||
std::cout<<mpi[i]<<" ";
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--simd") ){
|
||||
arg= GridCmdOptionPayload(argv,argv+argc,"--simd");
|
||||
GridCmdOptionIntVector(arg,simd);
|
||||
std::cout<<"SIMD ";
|
||||
for(int i=0;i<simd.size();i++){
|
||||
std::cout<<simd[i]<<" ";
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--grid") ){
|
||||
arg= GridCmdOptionPayload(argv,argv+argc,"--grid");
|
||||
GridCmdOptionIntVector(arg,latt);
|
||||
std::cout<<"Grid ";
|
||||
for(int i=0;i<latt.size();i++){
|
||||
std::cout<<latt[i]<<" ";
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--omp") ){
|
||||
std::vector<int> ompthreads(0);
|
||||
arg= GridCmdOptionPayload(argv,argv+argc,"--omp");
|
||||
GridCmdOptionIntVector(arg,ompthreads);
|
||||
assert(ompthreads.size()==1);
|
||||
GridThread::SetThreads(ompthreads[0]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////
|
||||
void Grid_init(int *argc,char ***argv)
|
||||
{
|
||||
#ifdef GRID_COMMS_MPI
|
||||
MPI_Init(argc,argv);
|
||||
#endif
|
||||
// Parse command line args.
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
||||
Grid_debug_handler_init();
|
||||
}
|
||||
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
|
||||
Grid_quiesce_nodes();
|
||||
}
|
||||
GridParseLayout(*argv,*argc,
|
||||
Grid_default_latt,
|
||||
Grid_default_simd,
|
||||
Grid_default_mpi);
|
||||
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Verbose limiter on MPI tasks
|
||||
////////////////////////////////////////////////////////////
|
||||
void Grid_quiesce_nodes(void)
|
||||
{
|
||||
#ifdef GRID_COMMS_MPI
|
||||
int me;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&me);
|
||||
if ( me ) {
|
||||
std::cout.setstate(std::ios::badbit);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
void Grid_unquiesce_nodes(void)
|
||||
{
|
||||
#ifdef GRID_COMMS_MPI
|
||||
std::cout.clear();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void Grid_finalize(void)
|
||||
{
|
||||
#ifdef GRID_COMMS_MPI
|
||||
@ -146,14 +175,14 @@ void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
||||
{
|
||||
printf("Caught signal %d\n",si->si_signo);
|
||||
printf(" mem address %lx\n",(uint64_t)si->si_addr);
|
||||
printf(" mem address %llx\n",(unsigned long long)si->si_addr);
|
||||
printf(" code %d\n",si->si_code);
|
||||
|
||||
#ifdef __X86_64
|
||||
ucontext_t * uc= (ucontext_t *)ptr;
|
||||
struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
|
||||
printf(" instruction %llx\n",(uint64_t)sc->rip);
|
||||
#define REG(A) printf(" %s %lx\n",#A, sc-> A);
|
||||
printf(" instruction %llx\n",(unsigned long long)sc->rip);
|
||||
#define REG(A) printf(" %s %lx\n",#A,sc-> A);
|
||||
REG(rdi);
|
||||
REG(rsi);
|
||||
REG(rbp);
|
||||
|
80
lib/Grid_threads.h
Normal file
80
lib/Grid_threads.h
Normal file
@ -0,0 +1,80 @@
|
||||
#ifndef GRID_THREADS_H
|
||||
#define GRID_THREADS_H
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#include <omp.h>
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for")
|
||||
#define PARALLEL_NESTED_LOOP(n) _Pragma("omp parallel for collapse(" #n ")")
|
||||
#else
|
||||
#define PARALLEL_FOR_LOOP
|
||||
#define PARALLEL_NESTED_LOOP(n)
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// Introduce a class to gain deterministic bit reproducible reduction.
|
||||
// make static; perhaps just a namespace is required.
|
||||
|
||||
class GridThread {
|
||||
public:
|
||||
static int _threads;
|
||||
|
||||
static void SetThreads(int thr) {
|
||||
#ifdef HAVE_OPENMP
|
||||
_threads = MIN(thr,omp_get_max_threads()) ;
|
||||
omp_set_num_threads(_threads);
|
||||
#else
|
||||
_threads = 1;
|
||||
#endif
|
||||
};
|
||||
static void SetMaxThreads(void) {
|
||||
#ifdef HAVE_OPENMP
|
||||
_threads = omp_get_max_threads();
|
||||
omp_set_num_threads(_threads);
|
||||
#else
|
||||
_threads = 1;
|
||||
#endif
|
||||
};
|
||||
static int GetThreads(void) { return _threads; };
|
||||
static int SumArraySize(void) {return _threads;};
|
||||
|
||||
static void GetWork(int nwork, int me, int & mywork, int & myoff){
|
||||
int basework = nwork/_threads;
|
||||
int backfill = _threads-(nwork%_threads);
|
||||
if ( me >= _threads ) {
|
||||
mywork = myoff = 0;
|
||||
} else {
|
||||
mywork = (nwork+me)/_threads;
|
||||
myoff = basework * me;
|
||||
if ( me > backfill )
|
||||
myoff+= (me-backfill);
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
static void GetWorkBarrier(int nwork, int &me, int & mywork, int & myoff){
|
||||
me = ThreadBarrier();
|
||||
GetWork(nwork,me,mywork,myoff);
|
||||
};
|
||||
|
||||
static int ThreadBarrier(void) {
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp barrier
|
||||
return omp_get_thread_num();
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
template<class obj> static void ThreadSum( std::vector<obj> &sum_array,obj &val,int me){
|
||||
sum_array[me] = val;
|
||||
val=zero;
|
||||
ThreadBarrier();
|
||||
for(int i=0;i<_threads;i++) val+= sum_array[i];
|
||||
ThreadBarrier();
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -14,7 +14,7 @@ namespace Grid{
|
||||
// int _processor; // linear processor rank
|
||||
// std::vector<int> _processor_coor; // linear processor rank
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
class GridBase : public CartesianCommunicator {
|
||||
class GridBase : public CartesianCommunicator , public GridThread {
|
||||
|
||||
public:
|
||||
|
||||
@ -23,6 +23,7 @@ public:
|
||||
|
||||
GridBase(std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
|
||||
|
||||
|
||||
// Physics Grid information.
|
||||
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
|
||||
std::vector<int> _fdimensions;// Global dimensions of array prior to cb removal
|
||||
|
@ -46,7 +46,7 @@ public:
|
||||
};
|
||||
GridRedBlackCartesian(std::vector<int> &dimensions,
|
||||
std::vector<int> &simd_layout,
|
||||
std::vector<int> &processor_grid) : GridBase(processor_grid)
|
||||
std::vector<int> &processor_grid ) : GridBase(processor_grid)
|
||||
{
|
||||
///////////////////////
|
||||
// Grid information
|
||||
|
@ -8,11 +8,10 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> simd_layout;
|
||||
std::vector<int> mpi_layout;
|
||||
std::vector<int> latt_size;
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd();
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice();
|
||||
|
@ -14,11 +14,9 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size;
|
||||
std::vector<int> simd_layout;
|
||||
std::vector<int> mpi_layout;
|
||||
|
||||
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd();
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
|
@ -25,11 +25,10 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size;
|
||||
std::vector<int> simd_layout;
|
||||
std::vector<int> mpi_layout;
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd();
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
|
||||
latt_size.resize(4);
|
||||
|
||||
#ifdef AVX512
|
||||
|
@ -10,12 +10,9 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> tmp_latt_size;
|
||||
std::vector<int> simd_layout;
|
||||
std::vector<int> mpi_layout;
|
||||
|
||||
GridParseLayout(argv,argc,tmp_latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd();
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
std::vector<int> latt_size ({16,16,16,32});
|
||||
std::vector<int> clatt_size ({4,4,4,8});
|
||||
int orthodir=3;
|
||||
|
@ -106,11 +106,9 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size;
|
||||
std::vector<int> simd_layout;
|
||||
std::vector<int> mpi_layout;
|
||||
|
||||
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd();
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
|
@ -8,11 +8,10 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size;
|
||||
std::vector<int> simd_layout;
|
||||
std::vector<int> mpi_layout;
|
||||
|
||||
GridParseLayout(argv,argc,latt_size,simd_layout,mpi_layout);
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd();
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user