mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-13 01:05:36 +00:00
Hadrons: A2A vector write can fail and retry
This commit is contained in:
parent
bccfd4cbb3
commit
10fc263675
@ -83,9 +83,14 @@ public:
|
|||||||
Record(void): index(0) {}
|
Record(void): index(0) {}
|
||||||
};
|
};
|
||||||
public:
|
public:
|
||||||
|
// maxRetry meaning:
|
||||||
|
// -1: don't read back to check (default)
|
||||||
|
// 0: read to check, and crash (assert) in case of failure
|
||||||
|
// n > 0: read to check, retry to write n times before crashing
|
||||||
template <typename Field>
|
template <typename Field>
|
||||||
static void write(const std::string fileStem, std::vector<Field> &vec,
|
static void write(const std::string fileStem, std::vector<Field> &vec,
|
||||||
const bool multiFile, const int trajectory = -1);
|
const bool multiFile, const int trajectory = -1,
|
||||||
|
const int maxRetry = -1);
|
||||||
template <typename Field>
|
template <typename Field>
|
||||||
static void read(std::vector<Field> &vec, const std::string fileStem,
|
static void read(std::vector<Field> &vec, const std::string fileStem,
|
||||||
const bool multiFile, const int trajectory = -1);
|
const bool multiFile, const int trajectory = -1);
|
||||||
@ -258,12 +263,13 @@ void A2AVectorsSchurDiagTwo<FImpl>::makeHighModeW5D(FermionField &wout_4d,
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
template <typename Field>
|
template <typename Field>
|
||||||
void A2AVectorsIo::write(const std::string fileStem, std::vector<Field> &vec,
|
void A2AVectorsIo::write(const std::string fileStem, std::vector<Field> &vec,
|
||||||
const bool multiFile, const int trajectory)
|
const bool multiFile, const int trajectory, const int maxRetry)
|
||||||
{
|
{
|
||||||
Record record;
|
Record record;
|
||||||
GridBase *grid = vec[0]._grid;
|
GridBase *grid = vec[0]._grid;
|
||||||
ScidacWriter binWriter(grid->IsBoss());
|
ScidacWriter binWriter(grid->IsBoss());
|
||||||
std::string filename = vecFilename(fileStem, trajectory, multiFile);
|
std::string filename = vecFilename(fileStem, trajectory, multiFile);
|
||||||
|
Field buf(grid);
|
||||||
|
|
||||||
if (multiFile)
|
if (multiFile)
|
||||||
{
|
{
|
||||||
@ -271,27 +277,86 @@ void A2AVectorsIo::write(const std::string fileStem, std::vector<Field> &vec,
|
|||||||
|
|
||||||
for (unsigned int i = 0; i < vec.size(); ++i)
|
for (unsigned int i = 0; i < vec.size(); ++i)
|
||||||
{
|
{
|
||||||
fullFilename = filename + "/elem" + std::to_string(i) + ".bin";
|
int status = GridLimeReader::LIME_READ_FAILURE, attempt = std::max(0, maxRetry);
|
||||||
|
|
||||||
LOG(Message) << "Writing vector " << i << std::endl;
|
while ((status != GridLimeReader::LIME_READ_SUCCESS) and (attempt >= 0))
|
||||||
makeFileDir(fullFilename, grid);
|
{
|
||||||
binWriter.open(fullFilename);
|
fullFilename = filename + "/elem" + std::to_string(i) + ".bin";
|
||||||
record.index = i;
|
|
||||||
binWriter.writeScidacFieldRecord(vec[i], record);
|
LOG(Message) << "Writing vector " << i << std::endl;
|
||||||
binWriter.close();
|
makeFileDir(fullFilename, grid);
|
||||||
|
binWriter.open(fullFilename);
|
||||||
|
record.index = i;
|
||||||
|
binWriter.writeScidacFieldRecord(vec[i], record);
|
||||||
|
binWriter.close();
|
||||||
|
if (maxRetry < -1)
|
||||||
|
{
|
||||||
|
status = GridLimeReader::LIME_READ_SUCCESS;
|
||||||
|
}
|
||||||
|
else if (attempt >= 0)
|
||||||
|
{
|
||||||
|
ScidacReader binReader;
|
||||||
|
|
||||||
|
LOG(Message) << "Reading back vector " << i
|
||||||
|
<< " (" << attempt << " attempt(s) left)" << std::endl;
|
||||||
|
binReader.open(fullFilename);
|
||||||
|
status = binReader.readScidacFieldRecord(buf, record, false);
|
||||||
|
if (status != GridLimeReader::LIME_READ_SUCCESS)
|
||||||
|
{
|
||||||
|
LOG(Message) << "Read failure" << std::endl;
|
||||||
|
}
|
||||||
|
attempt--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (status != GridLimeReader::LIME_READ_SUCCESS)
|
||||||
|
{
|
||||||
|
HADRONS_ERROR(Io, "I/O error while writing vector " + std::to_string(i));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
makeFileDir(filename, grid);
|
int status = GridLimeReader::LIME_READ_FAILURE, attempt = std::max(0, maxRetry);
|
||||||
binWriter.open(filename);
|
|
||||||
for (unsigned int i = 0; i < vec.size(); ++i)
|
while ((status != GridLimeReader::LIME_READ_SUCCESS) and (attempt >= 0))
|
||||||
{
|
{
|
||||||
LOG(Message) << "Writing vector " << i << std::endl;
|
makeFileDir(filename, grid);
|
||||||
record.index = i;
|
binWriter.open(filename);
|
||||||
binWriter.writeScidacFieldRecord(vec[i], record);
|
for (unsigned int i = 0; i < vec.size(); ++i)
|
||||||
|
{
|
||||||
|
LOG(Message) << "Writing vector " << i << std::endl;
|
||||||
|
record.index = i;
|
||||||
|
binWriter.writeScidacFieldRecord(vec[i], record);
|
||||||
|
}
|
||||||
|
binWriter.close();
|
||||||
|
if (maxRetry < -1)
|
||||||
|
{
|
||||||
|
status = GridLimeReader::LIME_READ_SUCCESS;
|
||||||
|
}
|
||||||
|
else if (attempt >= 0)
|
||||||
|
{
|
||||||
|
ScidacReader binReader;
|
||||||
|
|
||||||
|
binReader.open(filename);
|
||||||
|
LOG(Message) << "Reading back vector set ("
|
||||||
|
<< attempt << " attempt(s) left)" << std::endl;
|
||||||
|
for (unsigned int i = 0; i < vec.size(); ++i)
|
||||||
|
{
|
||||||
|
LOG(Message) << "Reading vector " << i << std::endl;
|
||||||
|
status = binReader.readScidacFieldRecord(buf, record, false);
|
||||||
|
if (status != GridLimeReader::LIME_READ_SUCCESS)
|
||||||
|
{
|
||||||
|
LOG(Message) << "Read failure" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
attempt--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (status != GridLimeReader::LIME_READ_SUCCESS)
|
||||||
|
{
|
||||||
|
HADRONS_ERROR(Io, "I/O error while writing vector set");
|
||||||
}
|
}
|
||||||
binWriter.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,16 +44,24 @@ BEGIN_HADRONS_NAMESPACE
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
BEGIN_MODULE_NAMESPACE(MSolver)
|
BEGIN_MODULE_NAMESPACE(MSolver)
|
||||||
|
|
||||||
|
class A2AVectorsIoPar: Serializable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AVectorsIoPar,
|
||||||
|
std::string, filestem,
|
||||||
|
bool, multiFile,
|
||||||
|
int, maxRetry);
|
||||||
|
};
|
||||||
|
|
||||||
class A2AVectorsPar: Serializable
|
class A2AVectorsPar: Serializable
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AVectorsPar,
|
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AVectorsPar,
|
||||||
std::string, noise,
|
std::string, noise,
|
||||||
std::string, action,
|
std::string, action,
|
||||||
std::string, eigenPack,
|
std::string, eigenPack,
|
||||||
std::string, solver,
|
std::string, solver,
|
||||||
std::string, output,
|
A2AVectorsIoPar, output);
|
||||||
bool, multiFile);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename FImpl, typename Pack>
|
template <typename FImpl, typename Pack>
|
||||||
@ -240,13 +248,17 @@ void TA2AVectors<FImpl, Pack>::execute(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// I/O if necessary
|
// I/O if necessary
|
||||||
if (!par().output.empty())
|
if (!par().output.filestem.empty())
|
||||||
{
|
{
|
||||||
startTimer("V I/O");
|
startTimer("V I/O");
|
||||||
A2AVectorsIo::write(par().output + "_v", v, par().multiFile, vm().getTrajectory());
|
A2AVectorsIo::write(par().output.filestem + "_v", v,
|
||||||
|
par().output.multiFile, vm().getTrajectory(),
|
||||||
|
par().output.maxRetry);
|
||||||
stopTimer("V I/O");
|
stopTimer("V I/O");
|
||||||
startTimer("W I/O");
|
startTimer("W I/O");
|
||||||
A2AVectorsIo::write(par().output + "_w", w, par().multiFile, vm().getTrajectory());
|
A2AVectorsIo::write(par().output.filestem + "_w", w,
|
||||||
|
par().output.multiFile, vm().getTrajectory(),
|
||||||
|
par().output.maxRetry);
|
||||||
stopTimer("W I/O");
|
stopTimer("W I/O");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user