mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Flight recorder, resurrecting the "world famous" Britney test
This commit is contained in:
parent
b92dfcc8d3
commit
60b7f6c99d
@ -90,129 +90,6 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
static Coordinate Grid_default_latt;
|
static Coordinate Grid_default_latt;
|
||||||
static Coordinate Grid_default_mpi;
|
static Coordinate Grid_default_mpi;
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////
|
|
||||||
// Grid Norm logging for repro testing
|
|
||||||
///////////////////////////////////////////////////////
|
|
||||||
int GridNormLoggingMode;
|
|
||||||
int32_t GridNormLoggingCounter;
|
|
||||||
int32_t GridMPINormLoggingCounter;
|
|
||||||
std::vector<double> GridNormLogVector;
|
|
||||||
std::vector<double> GridMPINormLogVector;
|
|
||||||
std::vector<uint32_t> GridCsumLogVector;
|
|
||||||
|
|
||||||
void SetGridNormLoggingMode(GridNormLoggingMode_t mode)
|
|
||||||
{
|
|
||||||
switch ( mode ) {
|
|
||||||
case GridNormLoggingModePrint:
|
|
||||||
SetGridNormLoggingModePrint();
|
|
||||||
break;
|
|
||||||
case GridNormLoggingModeRecord:
|
|
||||||
SetGridNormLoggingModeRecord();
|
|
||||||
break;
|
|
||||||
case GridNormLoggingModeVerify:
|
|
||||||
SetGridNormLoggingModeVerify();
|
|
||||||
break;
|
|
||||||
case GridNormLoggingModeNone:
|
|
||||||
GridNormLoggingMode = mode;
|
|
||||||
GridNormLoggingCounter=0;
|
|
||||||
GridMPINormLoggingCounter=0;
|
|
||||||
GridNormLogVector.resize(0);
|
|
||||||
GridCsumLogVector.resize(0);
|
|
||||||
GridMPINormLogVector.resize(0);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetGridNormLoggingModePrint(void)
|
|
||||||
{
|
|
||||||
std::cout << " GridNormLogging Reproducibility logging set to print output " <<std::endl;
|
|
||||||
GridNormLoggingCounter = 0;
|
|
||||||
GridMPINormLoggingCounter=0;
|
|
||||||
GridNormLogVector.resize(0);
|
|
||||||
GridCsumLogVector.resize(0);
|
|
||||||
GridMPINormLogVector.resize(0);
|
|
||||||
GridNormLoggingMode = GridNormLoggingModePrint;
|
|
||||||
}
|
|
||||||
void SetGridNormLoggingModeRecord(void)
|
|
||||||
{
|
|
||||||
std::cout << " GridNormLogging Reproducibility logging set to RECORD " <<std::endl;
|
|
||||||
GridNormLoggingCounter = 0;
|
|
||||||
GridMPINormLoggingCounter=0;
|
|
||||||
GridNormLogVector.resize(0);
|
|
||||||
GridCsumLogVector.resize(0);
|
|
||||||
GridMPINormLogVector.resize(0);
|
|
||||||
GridNormLoggingMode = GridNormLoggingModeRecord;
|
|
||||||
}
|
|
||||||
void SetGridNormLoggingModeVerify(void)
|
|
||||||
{
|
|
||||||
std::cout << " GridNormLogging Reproducibility logging set to VERIFY " << GridNormLogVector.size()<< " log entries "<<std::endl;
|
|
||||||
GridNormLoggingCounter = 0;
|
|
||||||
GridMPINormLoggingCounter=0;
|
|
||||||
GridNormLoggingMode = GridNormLoggingModeVerify;
|
|
||||||
}
|
|
||||||
void GridNormLog(double value,uint32_t csum)
|
|
||||||
{
|
|
||||||
if(GridNormLoggingMode == GridNormLoggingModePrint) {
|
|
||||||
std::cerr<<"GridNormLog : "<< GridNormLoggingCounter <<" " << std::hexfloat << value << " csum " <<std::hex<<csum<<std::dec <<std::endl;
|
|
||||||
GridNormLoggingCounter++;
|
|
||||||
}
|
|
||||||
if(GridNormLoggingMode == GridNormLoggingModeRecord) {
|
|
||||||
GridNormLogVector.push_back(value);
|
|
||||||
GridCsumLogVector.push_back(csum);
|
|
||||||
GridNormLoggingCounter++;
|
|
||||||
}
|
|
||||||
if(GridNormLoggingMode == GridNormLoggingModeVerify) {
|
|
||||||
assert(GridNormLoggingCounter < GridNormLogVector.size());
|
|
||||||
if ( (value != GridNormLogVector[GridNormLoggingCounter])
|
|
||||||
|| (csum!=GridCsumLogVector[GridNormLoggingCounter]) ) {
|
|
||||||
std::cerr << " Oops got norm "<< std::hexfloat<<value<<" expect "<<GridNormLogVector[GridNormLoggingCounter] <<std::endl;
|
|
||||||
std::cerr << " Oops got csum "<< std::hex<<csum<<" expect "<<GridCsumLogVector[GridNormLoggingCounter] <<std::endl;
|
|
||||||
fprintf(stderr,"%s:%d Oops, I did it again! Reproduce failure for norm %d/%zu %.16e %.16e %x %x\n",
|
|
||||||
GridHostname(),
|
|
||||||
GlobalSharedMemory::WorldShmRank,
|
|
||||||
GridNormLoggingCounter,GridNormLogVector.size(),
|
|
||||||
value, GridNormLogVector[GridNormLoggingCounter],
|
|
||||||
csum, GridCsumLogVector[GridNormLoggingCounter]); fflush(stderr);
|
|
||||||
assert(0); // Force takedown of job
|
|
||||||
}
|
|
||||||
if ( GridNormLogVector.size()==GridNormLoggingCounter ) {
|
|
||||||
std::cout << " GridNormLogging : Verified entire sequence of "<<GridNormLoggingCounter<<" norms "<<std::endl;
|
|
||||||
}
|
|
||||||
GridNormLoggingCounter++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void GridMPINormLog(double local,double result)
|
|
||||||
{
|
|
||||||
if(GridNormLoggingMode == GridNormLoggingModePrint) {
|
|
||||||
std::cerr<<"GridMPINormLog : "<< GridMPINormLoggingCounter <<" " << std::hexfloat << local << " -> " <<result <<std::endl;
|
|
||||||
GridMPINormLoggingCounter++;
|
|
||||||
}
|
|
||||||
if(GridNormLoggingMode == GridNormLoggingModeRecord) {
|
|
||||||
std::cerr<<"GridMPINormLog RECORDING : "<< GridMPINormLoggingCounter <<" " << std::hexfloat << local << "-> "<< result <<std::endl;
|
|
||||||
GridMPINormLogVector.push_back(result);
|
|
||||||
GridMPINormLoggingCounter++;
|
|
||||||
}
|
|
||||||
if(GridNormLoggingMode == GridNormLoggingModeVerify) {
|
|
||||||
std::cerr<<"GridMPINormLog : "<< GridMPINormLoggingCounter <<" " << std::hexfloat << local << "-> "<< result <<std::endl;
|
|
||||||
assert(GridMPINormLoggingCounter < GridMPINormLogVector.size());
|
|
||||||
if ( result != GridMPINormLogVector[GridMPINormLoggingCounter] ) {
|
|
||||||
fprintf(stderr,"%s:%d MPI_Allreduce did it again! Reproduce failure for norm %d/%zu glb %.16e lcl %.16e hist %.16e\n",
|
|
||||||
GridHostname(),
|
|
||||||
GlobalSharedMemory::WorldShmRank,
|
|
||||||
GridMPINormLoggingCounter,GridMPINormLogVector.size(),
|
|
||||||
result, local, GridMPINormLogVector[GridMPINormLoggingCounter]); fflush(stderr);
|
|
||||||
assert(0); // Force takedown of job
|
|
||||||
}
|
|
||||||
if ( GridMPINormLogVector.size()==GridMPINormLoggingCounter ) {
|
|
||||||
std::cout << " GridMPINormLogging : Verified entire sequence of "<<GridMPINormLoggingCounter<<" norms "<<std::endl;
|
|
||||||
}
|
|
||||||
GridMPINormLoggingCounter++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int GridThread::_threads =1;
|
int GridThread::_threads =1;
|
||||||
int GridThread::_hyperthreads=1;
|
int GridThread::_hyperthreads=1;
|
||||||
int GridThread::_cores=1;
|
int GridThread::_cores=1;
|
||||||
|
Loading…
Reference in New Issue
Block a user