1
0
mirror of https://github.com/paboyle/Grid.git synced 2026-06-23 20:23:17 +01:00

Assertion updates to macros (mostly) with backtrace.

WIlson flow to include options for DBW2, Iwasaki, Symanzik.
View logging for data assurance
This commit is contained in:
2025-08-07 15:48:38 +00:00
parent 41f344bbd3
commit 9e6a4a4737
306 changed files with 2013 additions and 1928 deletions
+32 -6
View File
@@ -47,6 +47,7 @@ int32_t FlightRecorder::CsumLoggingCounter;
int32_t FlightRecorder::NormLoggingCounter;
int32_t FlightRecorder::ReductionLoggingCounter;
uint64_t FlightRecorder::ErrorCounter;
std::vector<double> FlightRecorder::NormLogVector;
std::vector<double> FlightRecorder::ReductionLogVector;
std::vector<uint64_t> FlightRecorder::CsumLogVector;
@@ -89,7 +90,7 @@ void FlightRecorder::SetLoggingMode(FlightRecorder::LoggingMode_t mode)
Truncate();
break;
default:
assert(0);
GRID_ASSERT(0);
}
}
bool FlightRecorder::StepLog(const char *name)
@@ -260,7 +261,7 @@ void FlightRecorder::ReductionLog(double local,double global)
global, local, ReductionLogVector[ReductionLoggingCounter]); fflush(stderr);
BACKTRACEFP(stderr);
if ( !ContinueOnFail ) assert(0);
if ( !ContinueOnFail ) GRID_ASSERT(0);
ErrorCounter++;
} else {
@@ -308,7 +309,7 @@ void FlightRecorder::xmitLog(void *buf,uint64_t bytes)
_xor, XmitLogVector[XmitLoggingCounter]); fflush(stderr);
BACKTRACEFP(stderr);
if ( !ContinueOnFail ) assert(0);
if ( !ContinueOnFail ) GRID_ASSERT(0);
ErrorCounter++;
} else {
@@ -354,7 +355,7 @@ void FlightRecorder::recvLog(void *buf,uint64_t bytes,int rank)
_xor, RecvLogVector[RecvLoggingCounter],rank); fflush(stderr);
BACKTRACEFP(stderr);
if ( !ContinueOnFail ) assert(0);
if ( !ContinueOnFail ) GRID_ASSERT(0);
ErrorCounter++;
} else {
@@ -379,8 +380,12 @@ std::vector<ViewLogger::Entry_t> ViewLogger::LogVector;
void ViewLogger::Begin() { Enabled = true; LogVector.resize(0); }
void ViewLogger::End() { Enabled = false; }
void ViewLogger::Log(const char* filename, int line, int index, int mode, void* data, uint64_t bytes)
#ifdef GRID_LOG_VIEWS_FENCEPOST
void ViewLogger::LogOpen(const char* filename, int line, int index, int mode, void* data, uint64_t bytes)
{
ViewLogger::LogClose(filename,line,index,mode,data,bytes);
}
void ViewLogger::LogClose(const char* filename, int line, int index, int mode, void* data, uint64_t bytes)
{
if (!Enabled)
return;
@@ -416,6 +421,27 @@ void ViewLogger::Log(const char* filename, int line, int index, int mode, void*
}
}
}
#else
void ViewLogger::LogOpen(const char* filename, int line, int index, int mode, void* data, uint64_t bytes){ }
void ViewLogger::LogClose(const char* filename, int line, int index, int mode, void* data, uint64_t bytes)
{
if (!Enabled)
return;
if (bytes < sizeof(uint64_t)) return;
#ifdef GRID_SYCL
uint64_t *u_data = (uint64_t *)data;
switch (mode) {
case AcceleratorWrite:
case AcceleratorWriteDiscard:
uint64_t csum = checksum_gpu(u_data,bytes/sizeof(uint64_t));
FlightRecorder::CsumLog(csum);
break;
}
#endif
}
#endif
#endif
+2 -1
View File
@@ -56,7 +56,8 @@ public:
static std::vector<Entry_t> LogVector;
static void Begin();
static void End();
static void Log(const char* filename, int line, int index, int mode, void* data, uint64_t bytes);
static void LogOpen(const char* filename, int line, int index, int mode, void* data, uint64_t bytes);
static void LogClose(const char* filename, int line, int index, int mode, void* data, uint64_t bytes);
};
#endif
NAMESPACE_END(Grid);
+58 -40
View File
@@ -85,6 +85,8 @@ feenableexcept (unsigned int excepts)
#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
#endif
void * Grid_backtrace_buffer[_NBACKTRACE];
NAMESPACE_BEGIN(Grid);
//////////////////////////////////////////////////////
@@ -118,7 +120,7 @@ const Coordinate GridDefaultSimd(int dims,int nsimd)
layout[d]=1;
}
}
assert(nn==1);
GRID_ASSERT(nn==1);
return layout;
}
@@ -213,14 +215,14 @@ void GridParseLayout(char **argv,int argc,
#endif
arg= GridCmdOptionPayload(argv,argv+argc,"--threads");
GridCmdOptionIntVector(arg,ompthreads);
assert(ompthreads.size()==1);
GRID_ASSERT(ompthreads.size()==1);
GridThread::SetThreads(ompthreads[0]);
}
if( GridCmdOptionExists(argv,argv+argc,"--accelerator-threads") ){
std::vector<int> gputhreads(0);
arg= GridCmdOptionPayload(argv,argv+argc,"--accelerator-threads");
GridCmdOptionIntVector(arg,gputhreads);
assert(gputhreads.size()==1);
GRID_ASSERT(gputhreads.size()==1);
acceleratorThreads(gputhreads[0]);
}
@@ -232,7 +234,7 @@ void GridParseLayout(char **argv,int argc,
}
// Copy back into coordinate format
int nd = mpi.size();
assert(latt.size()==nd);
GRID_ASSERT(latt.size()==nd);
latt_c.resize(nd);
mpi_c.resize(nd);
for(int d=0;d<nd;d++){
@@ -315,8 +317,8 @@ std::vector<dlRegion> dlMap;
void Grid_init(int *argc,char ***argv)
{
assert(Grid_is_initialised == 0);
GRID_ASSERT(Grid_is_initialised == 0);
GridLogger::GlobalStopWatch.Start();
@@ -361,24 +363,6 @@ void Grid_init(int *argc,char ***argv)
GlobalSharedMemory::Hugepages = 1;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init();
}
// Sleep n-seconds at end of handler
if( GridCmdOptionExists(*argv,*argv+*argc,"--signal-delay") ){
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--signal-delay");
GridCmdOptionInt(arg,signal_delay);
}
// periodic wakeup with stack trace printed
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-heartbeat") ){
Grid_debug_heartbeat();
}
// periodic wakeup with empty handler (interrupts some system calls)
if( GridCmdOptionExists(*argv,*argv+*argc,"--heartbeat") ){
Grid_heartbeat();
}
#if defined(A64FX)
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-overlap") ){
std::cout << "Option --comms-overlap currently not supported on QPACE4. Exiting." << std::endl;
@@ -418,7 +402,7 @@ void Grid_init(int *argc,char ***argv)
std::ostringstream fname;
int rank = CartesianCommunicator::RankWorld();
int radix=64;
int radix=32;
char* root = getenv("GRID_STDOUT_ROOT");
if (root) {
fname << root ;
@@ -430,8 +414,11 @@ void Grid_init(int *argc,char ***argv)
fname << "/";
fname<<"Grid.stdout.";
fname<<CartesianCommunicator::RankWorld();
std::cout << " Reconnecting stdout to "<<fname.str()<<std::endl;
fp=freopen(fname.str().c_str(),"w",stdout);
assert(fp!=(FILE *)NULL);
GRID_ASSERT(fp!=(FILE *)NULL);
std::ostringstream ename;
if (root){
@@ -440,12 +427,14 @@ void Grid_init(int *argc,char ***argv)
ename << (rank/radix)*radix << "/";
ename<<"Grid.stderr.";
ename<<CartesianCommunicator::RankWorld();
std::cout << " Reconnecting stderr to "<<ename.str()<<std::endl;
fp=freopen(ename.str().c_str(),"w",stderr);
assert(fp!=(FILE *)NULL);
GRID_ASSERT(fp!=(FILE *)NULL);
}
fileno_stdout = fileno(stdout);
fileno_stderr = fileno(stderr) ;
dup2(fileno_stdout, STDOUT_FILENO);
dup2(fileno_stderr, STDERR_FILENO);
////////////////////////////////////////////////////
// OK to use GridLogMessage etc from here on
////////////////////////////////////////////////////
@@ -578,7 +567,7 @@ void Grid_init(int *argc,char ***argv)
}
////////////////////////////////////
// Debug and performance options
// Performance options
////////////////////////////////////
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){
@@ -601,6 +590,10 @@ void Grid_init(int *argc,char ***argv)
StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsThenCompute;
}
////////////////////////////////
// Timestamping or not
////////////////////////////////
CartesianCommunicator::nCommThreads = 1;
if( GridCmdOptionExists(*argv,*argv+*argc,"--notimestamp") ){
GridLogTimestamp(0);
@@ -608,18 +601,13 @@ void Grid_init(int *argc,char ***argv)
GridLogTimestamp(1);
}
////////////////////////////////
// Default layout
////////////////////////////////
GridParseLayout(*argv,*argc,
Grid_default_latt,
Grid_default_mpi);
if( GridCmdOptionExists(*argv,*argv+*argc,"--flightrecorder") ){
std::cout << GridLogMessage <<" Enabling flight recorder " <<std::endl;
FlightRecorder::SetLoggingMode(FlightRecorder::LoggingModeRecord);
FlightRecorder::PrintEntireLog = 1;
FlightRecorder::ChecksumComms = 1;
FlightRecorder::ChecksumCommsSend=1;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
@@ -629,6 +617,36 @@ void Grid_init(int *argc,char ***argv)
std::cout<<GridLogMessage<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
}
////////////////////////////////////
// Debug options
////////////////////////////////////
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init();
}
// Sleep n-seconds at end of handler
if( GridCmdOptionExists(*argv,*argv+*argc,"--signal-delay") ){
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--signal-delay");
GridCmdOptionInt(arg,signal_delay);
}
// periodic wakeup with stack trace printed
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-heartbeat") ){
Grid_debug_heartbeat();
}
// periodic wakeup with empty handler (interrupts some system calls)
if( GridCmdOptionExists(*argv,*argv+*argc,"--heartbeat") ){
Grid_heartbeat();
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--flightrecorder") ){
std::cout << GridLogMessage <<" Enabling flight recorder " <<std::endl;
FlightRecorder::SetLoggingMode(FlightRecorder::LoggingModeRecord);
FlightRecorder::PrintEntireLog = 1;
FlightRecorder::ChecksumComms = 1;
FlightRecorder::ChecksumCommsSend=1;
}
Grid_is_initialised = 1;
}
@@ -657,7 +675,6 @@ void GridLogLayout() {
std::cout << GridLogMessage << "\tMPI tasks : "<< GridCmdVectorIntToString(GridDefaultMpi()) << std::endl;
}
void * Grid_backtrace_buffer[_NBACKTRACE];
#define SIGLOG(A) ::write(fileno_stderr,A,strlen(A));
void sig_print_dig(uint32_t dig)
@@ -841,8 +858,8 @@ void Grid_heartbeat(void)
// repeating 10s heartbeat
struct itimerval it_val;
it_val.it_value.tv_sec = 10;
it_val.it_value.tv_usec = 1000;
it_val.it_value.tv_sec = 0;
it_val.it_value.tv_usec = 10000;
it_val.it_interval = it_val.it_value;
setitimer(ITIMER_REAL, &it_val, NULL);
}
@@ -859,6 +876,7 @@ void Grid_debug_handler_init(void)
sa.sa_flags = SA_SIGINFO;
sigaction(SIGTRAP,&sa,NULL);
sigaction(SIGILL,&sa,NULL);
sigaction(SIGABRT,&sa,NULL); // SigABRT backtrace
#ifndef GRID_SYCL
sigaction(SIGSEGV,&sa,NULL); // SYCL is using SIGSEGV
sigaction(SIGBUS,&sa,NULL);
+2 -2
View File
@@ -31,7 +31,7 @@ namespace Grid{
static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){
int64_t index64;
IndexFromCoor(coor,index64,dims);
assert(index64<2*1024*1024*1024LL);
GRID_ASSERT(index64<2*1024*1024*1024LL);
index = (int) index64;
}
@@ -52,7 +52,7 @@ namespace Grid{
if ( index64>=2*1024*1024*1024LL ){
// std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl;
}
assert(index64<2*1024*1024*1024LL);
GRID_ASSERT(index64<2*1024*1024*1024LL);
index = (int) index64;
}
template<class coor_t>