mirror of
https://github.com/paboyle/Grid.git
synced 2026-06-23 20:23:17 +01:00
Assertion updates to macros (mostly) with backtrace.
WIlson flow to include options for DBW2, Iwasaki, Symanzik. View logging for data assurance
This commit is contained in:
@@ -47,6 +47,7 @@ int32_t FlightRecorder::CsumLoggingCounter;
|
||||
int32_t FlightRecorder::NormLoggingCounter;
|
||||
int32_t FlightRecorder::ReductionLoggingCounter;
|
||||
uint64_t FlightRecorder::ErrorCounter;
|
||||
|
||||
std::vector<double> FlightRecorder::NormLogVector;
|
||||
std::vector<double> FlightRecorder::ReductionLogVector;
|
||||
std::vector<uint64_t> FlightRecorder::CsumLogVector;
|
||||
@@ -89,7 +90,7 @@ void FlightRecorder::SetLoggingMode(FlightRecorder::LoggingMode_t mode)
|
||||
Truncate();
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
GRID_ASSERT(0);
|
||||
}
|
||||
}
|
||||
bool FlightRecorder::StepLog(const char *name)
|
||||
@@ -260,7 +261,7 @@ void FlightRecorder::ReductionLog(double local,double global)
|
||||
global, local, ReductionLogVector[ReductionLoggingCounter]); fflush(stderr);
|
||||
BACKTRACEFP(stderr);
|
||||
|
||||
if ( !ContinueOnFail ) assert(0);
|
||||
if ( !ContinueOnFail ) GRID_ASSERT(0);
|
||||
|
||||
ErrorCounter++;
|
||||
} else {
|
||||
@@ -308,7 +309,7 @@ void FlightRecorder::xmitLog(void *buf,uint64_t bytes)
|
||||
_xor, XmitLogVector[XmitLoggingCounter]); fflush(stderr);
|
||||
BACKTRACEFP(stderr);
|
||||
|
||||
if ( !ContinueOnFail ) assert(0);
|
||||
if ( !ContinueOnFail ) GRID_ASSERT(0);
|
||||
|
||||
ErrorCounter++;
|
||||
} else {
|
||||
@@ -354,7 +355,7 @@ void FlightRecorder::recvLog(void *buf,uint64_t bytes,int rank)
|
||||
_xor, RecvLogVector[RecvLoggingCounter],rank); fflush(stderr);
|
||||
BACKTRACEFP(stderr);
|
||||
|
||||
if ( !ContinueOnFail ) assert(0);
|
||||
if ( !ContinueOnFail ) GRID_ASSERT(0);
|
||||
|
||||
ErrorCounter++;
|
||||
} else {
|
||||
@@ -379,8 +380,12 @@ std::vector<ViewLogger::Entry_t> ViewLogger::LogVector;
|
||||
|
||||
void ViewLogger::Begin() { Enabled = true; LogVector.resize(0); }
|
||||
void ViewLogger::End() { Enabled = false; }
|
||||
|
||||
void ViewLogger::Log(const char* filename, int line, int index, int mode, void* data, uint64_t bytes)
|
||||
#ifdef GRID_LOG_VIEWS_FENCEPOST
|
||||
void ViewLogger::LogOpen(const char* filename, int line, int index, int mode, void* data, uint64_t bytes)
|
||||
{
|
||||
ViewLogger::LogClose(filename,line,index,mode,data,bytes);
|
||||
}
|
||||
void ViewLogger::LogClose(const char* filename, int line, int index, int mode, void* data, uint64_t bytes)
|
||||
{
|
||||
if (!Enabled)
|
||||
return;
|
||||
@@ -416,6 +421,27 @@ void ViewLogger::Log(const char* filename, int line, int index, int mode, void*
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void ViewLogger::LogOpen(const char* filename, int line, int index, int mode, void* data, uint64_t bytes){ }
|
||||
void ViewLogger::LogClose(const char* filename, int line, int index, int mode, void* data, uint64_t bytes)
|
||||
{
|
||||
if (!Enabled)
|
||||
return;
|
||||
|
||||
if (bytes < sizeof(uint64_t)) return;
|
||||
|
||||
#ifdef GRID_SYCL
|
||||
uint64_t *u_data = (uint64_t *)data;
|
||||
switch (mode) {
|
||||
case AcceleratorWrite:
|
||||
case AcceleratorWriteDiscard:
|
||||
uint64_t csum = checksum_gpu(u_data,bytes/sizeof(uint64_t));
|
||||
FlightRecorder::CsumLog(csum);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -56,7 +56,8 @@ public:
|
||||
static std::vector<Entry_t> LogVector;
|
||||
static void Begin();
|
||||
static void End();
|
||||
static void Log(const char* filename, int line, int index, int mode, void* data, uint64_t bytes);
|
||||
static void LogOpen(const char* filename, int line, int index, int mode, void* data, uint64_t bytes);
|
||||
static void LogClose(const char* filename, int line, int index, int mode, void* data, uint64_t bytes);
|
||||
};
|
||||
#endif
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
+58
-40
@@ -85,6 +85,8 @@ feenableexcept (unsigned int excepts)
|
||||
#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
|
||||
#endif
|
||||
|
||||
void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
@@ -118,7 +120,7 @@ const Coordinate GridDefaultSimd(int dims,int nsimd)
|
||||
layout[d]=1;
|
||||
}
|
||||
}
|
||||
assert(nn==1);
|
||||
GRID_ASSERT(nn==1);
|
||||
return layout;
|
||||
}
|
||||
|
||||
@@ -213,14 +215,14 @@ void GridParseLayout(char **argv,int argc,
|
||||
#endif
|
||||
arg= GridCmdOptionPayload(argv,argv+argc,"--threads");
|
||||
GridCmdOptionIntVector(arg,ompthreads);
|
||||
assert(ompthreads.size()==1);
|
||||
GRID_ASSERT(ompthreads.size()==1);
|
||||
GridThread::SetThreads(ompthreads[0]);
|
||||
}
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--accelerator-threads") ){
|
||||
std::vector<int> gputhreads(0);
|
||||
arg= GridCmdOptionPayload(argv,argv+argc,"--accelerator-threads");
|
||||
GridCmdOptionIntVector(arg,gputhreads);
|
||||
assert(gputhreads.size()==1);
|
||||
GRID_ASSERT(gputhreads.size()==1);
|
||||
acceleratorThreads(gputhreads[0]);
|
||||
}
|
||||
|
||||
@@ -232,7 +234,7 @@ void GridParseLayout(char **argv,int argc,
|
||||
}
|
||||
// Copy back into coordinate format
|
||||
int nd = mpi.size();
|
||||
assert(latt.size()==nd);
|
||||
GRID_ASSERT(latt.size()==nd);
|
||||
latt_c.resize(nd);
|
||||
mpi_c.resize(nd);
|
||||
for(int d=0;d<nd;d++){
|
||||
@@ -315,8 +317,8 @@ std::vector<dlRegion> dlMap;
|
||||
|
||||
void Grid_init(int *argc,char ***argv)
|
||||
{
|
||||
|
||||
assert(Grid_is_initialised == 0);
|
||||
|
||||
GRID_ASSERT(Grid_is_initialised == 0);
|
||||
|
||||
GridLogger::GlobalStopWatch.Start();
|
||||
|
||||
@@ -361,24 +363,6 @@ void Grid_init(int *argc,char ***argv)
|
||||
GlobalSharedMemory::Hugepages = 1;
|
||||
}
|
||||
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
||||
Grid_debug_handler_init();
|
||||
}
|
||||
// Sleep n-seconds at end of handler
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--signal-delay") ){
|
||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--signal-delay");
|
||||
GridCmdOptionInt(arg,signal_delay);
|
||||
}
|
||||
// periodic wakeup with stack trace printed
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-heartbeat") ){
|
||||
Grid_debug_heartbeat();
|
||||
}
|
||||
// periodic wakeup with empty handler (interrupts some system calls)
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--heartbeat") ){
|
||||
Grid_heartbeat();
|
||||
}
|
||||
|
||||
#if defined(A64FX)
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-overlap") ){
|
||||
std::cout << "Option --comms-overlap currently not supported on QPACE4. Exiting." << std::endl;
|
||||
@@ -418,7 +402,7 @@ void Grid_init(int *argc,char ***argv)
|
||||
std::ostringstream fname;
|
||||
|
||||
int rank = CartesianCommunicator::RankWorld();
|
||||
int radix=64;
|
||||
int radix=32;
|
||||
char* root = getenv("GRID_STDOUT_ROOT");
|
||||
if (root) {
|
||||
fname << root ;
|
||||
@@ -430,8 +414,11 @@ void Grid_init(int *argc,char ***argv)
|
||||
fname << "/";
|
||||
fname<<"Grid.stdout.";
|
||||
fname<<CartesianCommunicator::RankWorld();
|
||||
|
||||
std::cout << " Reconnecting stdout to "<<fname.str()<<std::endl;
|
||||
|
||||
fp=freopen(fname.str().c_str(),"w",stdout);
|
||||
assert(fp!=(FILE *)NULL);
|
||||
GRID_ASSERT(fp!=(FILE *)NULL);
|
||||
|
||||
std::ostringstream ename;
|
||||
if (root){
|
||||
@@ -440,12 +427,14 @@ void Grid_init(int *argc,char ***argv)
|
||||
ename << (rank/radix)*radix << "/";
|
||||
ename<<"Grid.stderr.";
|
||||
ename<<CartesianCommunicator::RankWorld();
|
||||
std::cout << " Reconnecting stderr to "<<ename.str()<<std::endl;
|
||||
fp=freopen(ename.str().c_str(),"w",stderr);
|
||||
assert(fp!=(FILE *)NULL);
|
||||
GRID_ASSERT(fp!=(FILE *)NULL);
|
||||
}
|
||||
fileno_stdout = fileno(stdout);
|
||||
fileno_stderr = fileno(stderr) ;
|
||||
|
||||
dup2(fileno_stdout, STDOUT_FILENO);
|
||||
dup2(fileno_stderr, STDERR_FILENO);
|
||||
////////////////////////////////////////////////////
|
||||
// OK to use GridLogMessage etc from here on
|
||||
////////////////////////////////////////////////////
|
||||
@@ -578,7 +567,7 @@ void Grid_init(int *argc,char ***argv)
|
||||
}
|
||||
|
||||
////////////////////////////////////
|
||||
// Debug and performance options
|
||||
// Performance options
|
||||
////////////////////////////////////
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){
|
||||
@@ -601,6 +590,10 @@ void Grid_init(int *argc,char ***argv)
|
||||
StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsThenCompute;
|
||||
}
|
||||
|
||||
////////////////////////////////
|
||||
// Timestamping or not
|
||||
////////////////////////////////
|
||||
|
||||
CartesianCommunicator::nCommThreads = 1;
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--notimestamp") ){
|
||||
GridLogTimestamp(0);
|
||||
@@ -608,18 +601,13 @@ void Grid_init(int *argc,char ***argv)
|
||||
GridLogTimestamp(1);
|
||||
}
|
||||
|
||||
////////////////////////////////
|
||||
// Default layout
|
||||
////////////////////////////////
|
||||
GridParseLayout(*argv,*argc,
|
||||
Grid_default_latt,
|
||||
Grid_default_mpi);
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--flightrecorder") ){
|
||||
std::cout << GridLogMessage <<" Enabling flight recorder " <<std::endl;
|
||||
FlightRecorder::SetLoggingMode(FlightRecorder::LoggingModeRecord);
|
||||
FlightRecorder::PrintEntireLog = 1;
|
||||
FlightRecorder::ChecksumComms = 1;
|
||||
FlightRecorder::ChecksumCommsSend=1;
|
||||
}
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
|
||||
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
|
||||
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
|
||||
@@ -629,6 +617,36 @@ void Grid_init(int *argc,char ***argv)
|
||||
std::cout<<GridLogMessage<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
|
||||
}
|
||||
|
||||
////////////////////////////////////
|
||||
// Debug options
|
||||
////////////////////////////////////
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
||||
Grid_debug_handler_init();
|
||||
}
|
||||
// Sleep n-seconds at end of handler
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--signal-delay") ){
|
||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--signal-delay");
|
||||
GridCmdOptionInt(arg,signal_delay);
|
||||
}
|
||||
// periodic wakeup with stack trace printed
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-heartbeat") ){
|
||||
Grid_debug_heartbeat();
|
||||
}
|
||||
// periodic wakeup with empty handler (interrupts some system calls)
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--heartbeat") ){
|
||||
Grid_heartbeat();
|
||||
}
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--flightrecorder") ){
|
||||
std::cout << GridLogMessage <<" Enabling flight recorder " <<std::endl;
|
||||
FlightRecorder::SetLoggingMode(FlightRecorder::LoggingModeRecord);
|
||||
FlightRecorder::PrintEntireLog = 1;
|
||||
FlightRecorder::ChecksumComms = 1;
|
||||
FlightRecorder::ChecksumCommsSend=1;
|
||||
}
|
||||
|
||||
Grid_is_initialised = 1;
|
||||
}
|
||||
|
||||
@@ -657,7 +675,6 @@ void GridLogLayout() {
|
||||
std::cout << GridLogMessage << "\tMPI tasks : "<< GridCmdVectorIntToString(GridDefaultMpi()) << std::endl;
|
||||
}
|
||||
|
||||
void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||
#define SIGLOG(A) ::write(fileno_stderr,A,strlen(A));
|
||||
|
||||
void sig_print_dig(uint32_t dig)
|
||||
@@ -841,8 +858,8 @@ void Grid_heartbeat(void)
|
||||
|
||||
// repeating 10s heartbeat
|
||||
struct itimerval it_val;
|
||||
it_val.it_value.tv_sec = 10;
|
||||
it_val.it_value.tv_usec = 1000;
|
||||
it_val.it_value.tv_sec = 0;
|
||||
it_val.it_value.tv_usec = 10000;
|
||||
it_val.it_interval = it_val.it_value;
|
||||
setitimer(ITIMER_REAL, &it_val, NULL);
|
||||
}
|
||||
@@ -859,6 +876,7 @@ void Grid_debug_handler_init(void)
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
sigaction(SIGTRAP,&sa,NULL);
|
||||
sigaction(SIGILL,&sa,NULL);
|
||||
sigaction(SIGABRT,&sa,NULL); // SigABRT backtrace
|
||||
#ifndef GRID_SYCL
|
||||
sigaction(SIGSEGV,&sa,NULL); // SYCL is using SIGSEGV
|
||||
sigaction(SIGBUS,&sa,NULL);
|
||||
|
||||
@@ -31,7 +31,7 @@ namespace Grid{
|
||||
static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){
|
||||
int64_t index64;
|
||||
IndexFromCoor(coor,index64,dims);
|
||||
assert(index64<2*1024*1024*1024LL);
|
||||
GRID_ASSERT(index64<2*1024*1024*1024LL);
|
||||
index = (int) index64;
|
||||
}
|
||||
|
||||
@@ -52,7 +52,7 @@ namespace Grid{
|
||||
if ( index64>=2*1024*1024*1024LL ){
|
||||
// std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl;
|
||||
}
|
||||
assert(index64<2*1024*1024*1024LL);
|
||||
GRID_ASSERT(index64<2*1024*1024*1024LL);
|
||||
index = (int) index64;
|
||||
}
|
||||
template<class coor_t>
|
||||
|
||||
Reference in New Issue
Block a user