mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Hadrons: A2A vector write can fail and retry
This commit is contained in:
		@@ -83,9 +83,14 @@ public:
 | 
			
		||||
        Record(void): index(0) {}
 | 
			
		||||
    };
 | 
			
		||||
public:
 | 
			
		||||
    // maxRetry meaning:
 | 
			
		||||
    // -1: don't read back to check (default)
 | 
			
		||||
    // 0: read to check, and crash (assert) in case of failure
 | 
			
		||||
    // n > 0: read to check, retry to write n times before crashing
 | 
			
		||||
    template <typename Field>
 | 
			
		||||
    static void write(const std::string fileStem, std::vector<Field> &vec, 
 | 
			
		||||
                      const bool multiFile, const int trajectory = -1);
 | 
			
		||||
                      const bool multiFile, const int trajectory = -1,
 | 
			
		||||
                      const int maxRetry = -1);
 | 
			
		||||
    template <typename Field>
 | 
			
		||||
    static void read(std::vector<Field> &vec, const std::string fileStem,
 | 
			
		||||
                     const bool multiFile, const int trajectory = -1);
 | 
			
		||||
@@ -258,12 +263,13 @@ void A2AVectorsSchurDiagTwo<FImpl>::makeHighModeW5D(FermionField &wout_4d,
 | 
			
		||||
 ******************************************************************************/
 | 
			
		||||
template <typename Field>
 | 
			
		||||
void A2AVectorsIo::write(const std::string fileStem, std::vector<Field> &vec, 
 | 
			
		||||
                         const bool multiFile, const int trajectory)
 | 
			
		||||
                         const bool multiFile, const int trajectory, const int maxRetry)
 | 
			
		||||
{
 | 
			
		||||
    Record       record;
 | 
			
		||||
    GridBase     *grid = vec[0]._grid;
 | 
			
		||||
    ScidacWriter binWriter(grid->IsBoss());
 | 
			
		||||
    std::string  filename = vecFilename(fileStem, trajectory, multiFile);
 | 
			
		||||
    Field        buf(grid);
 | 
			
		||||
 | 
			
		||||
    if (multiFile)
 | 
			
		||||
    {
 | 
			
		||||
@@ -271,27 +277,86 @@ void A2AVectorsIo::write(const std::string fileStem, std::vector<Field> &vec,
 | 
			
		||||
 | 
			
		||||
        for (unsigned int i = 0; i < vec.size(); ++i)
 | 
			
		||||
        {
 | 
			
		||||
            fullFilename = filename + "/elem" + std::to_string(i) + ".bin";
 | 
			
		||||
            int status = GridLimeReader::LIME_READ_FAILURE, attempt = std::max(0, maxRetry);
 | 
			
		||||
 | 
			
		||||
            LOG(Message) << "Writing vector " << i << std::endl;
 | 
			
		||||
            makeFileDir(fullFilename, grid);
 | 
			
		||||
            binWriter.open(fullFilename);
 | 
			
		||||
            record.index = i;
 | 
			
		||||
            binWriter.writeScidacFieldRecord(vec[i], record);
 | 
			
		||||
            binWriter.close();
 | 
			
		||||
            while ((status != GridLimeReader::LIME_READ_SUCCESS) and (attempt >= 0))
 | 
			
		||||
            {
 | 
			
		||||
                fullFilename = filename + "/elem" + std::to_string(i) + ".bin";
 | 
			
		||||
 | 
			
		||||
                LOG(Message) << "Writing vector " << i << std::endl;
 | 
			
		||||
                makeFileDir(fullFilename, grid);
 | 
			
		||||
                binWriter.open(fullFilename);
 | 
			
		||||
                record.index = i;
 | 
			
		||||
                binWriter.writeScidacFieldRecord(vec[i], record);
 | 
			
		||||
                binWriter.close();
 | 
			
		||||
                if (maxRetry < -1)
 | 
			
		||||
                {
 | 
			
		||||
                    status = GridLimeReader::LIME_READ_SUCCESS;
 | 
			
		||||
                }
 | 
			
		||||
                else if (attempt >= 0)
 | 
			
		||||
                {
 | 
			
		||||
                    ScidacReader binReader;
 | 
			
		||||
 | 
			
		||||
                    LOG(Message) << "Reading back vector " << i 
 | 
			
		||||
                                 << " (" << attempt << " attempt(s) left)" << std::endl;
 | 
			
		||||
                    binReader.open(fullFilename);
 | 
			
		||||
                    status = binReader.readScidacFieldRecord(buf, record, false);
 | 
			
		||||
                    if (status != GridLimeReader::LIME_READ_SUCCESS)
 | 
			
		||||
                    {
 | 
			
		||||
                        LOG(Message) << "Read failure" << std::endl;
 | 
			
		||||
                    }
 | 
			
		||||
                    attempt--;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            if (status != GridLimeReader::LIME_READ_SUCCESS)
 | 
			
		||||
            {
 | 
			
		||||
                HADRONS_ERROR(Io, "I/O error while writing vector " + std::to_string(i));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
        makeFileDir(filename, grid);
 | 
			
		||||
        binWriter.open(filename);
 | 
			
		||||
        for (unsigned int i = 0; i < vec.size(); ++i)
 | 
			
		||||
        int status = GridLimeReader::LIME_READ_FAILURE, attempt = std::max(0, maxRetry);
 | 
			
		||||
 | 
			
		||||
        while ((status != GridLimeReader::LIME_READ_SUCCESS) and (attempt >= 0))
 | 
			
		||||
        {
 | 
			
		||||
            LOG(Message) << "Writing vector " << i << std::endl;
 | 
			
		||||
            record.index = i;
 | 
			
		||||
            binWriter.writeScidacFieldRecord(vec[i], record);
 | 
			
		||||
            makeFileDir(filename, grid);
 | 
			
		||||
            binWriter.open(filename);
 | 
			
		||||
            for (unsigned int i = 0; i < vec.size(); ++i)
 | 
			
		||||
            {
 | 
			
		||||
                LOG(Message) << "Writing vector " << i << std::endl;
 | 
			
		||||
                record.index = i;
 | 
			
		||||
                binWriter.writeScidacFieldRecord(vec[i], record);
 | 
			
		||||
            }
 | 
			
		||||
            binWriter.close();
 | 
			
		||||
            if (maxRetry < -1)
 | 
			
		||||
            {
 | 
			
		||||
                status = GridLimeReader::LIME_READ_SUCCESS;
 | 
			
		||||
            }
 | 
			
		||||
            else if (attempt >= 0)
 | 
			
		||||
            {
 | 
			
		||||
                ScidacReader binReader;
 | 
			
		||||
 | 
			
		||||
                binReader.open(filename);
 | 
			
		||||
                LOG(Message) << "Reading back vector set ("
 | 
			
		||||
                             << attempt << " attempt(s) left)" << std::endl;
 | 
			
		||||
                for (unsigned int i = 0; i < vec.size(); ++i)
 | 
			
		||||
                {
 | 
			
		||||
                    LOG(Message) << "Reading vector " << i << std::endl;
 | 
			
		||||
                    status = binReader.readScidacFieldRecord(buf, record, false);
 | 
			
		||||
                    if (status != GridLimeReader::LIME_READ_SUCCESS)
 | 
			
		||||
                    {
 | 
			
		||||
                        LOG(Message) << "Read failure" << std::endl;
 | 
			
		||||
                        break;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                attempt--;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        if (status != GridLimeReader::LIME_READ_SUCCESS)
 | 
			
		||||
        {
 | 
			
		||||
            HADRONS_ERROR(Io, "I/O error while writing vector set");
 | 
			
		||||
        }
 | 
			
		||||
        binWriter.close();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -44,16 +44,24 @@ BEGIN_HADRONS_NAMESPACE
 | 
			
		||||
 ******************************************************************************/
 | 
			
		||||
BEGIN_MODULE_NAMESPACE(MSolver)
 | 
			
		||||
 | 
			
		||||
class A2AVectorsIoPar: Serializable
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(A2AVectorsIoPar,
 | 
			
		||||
                                  std::string, filestem,
 | 
			
		||||
                                  bool,        multiFile,
 | 
			
		||||
                                  int,         maxRetry);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class A2AVectorsPar: Serializable
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(A2AVectorsPar,
 | 
			
		||||
                                  std::string, noise,
 | 
			
		||||
                                  std::string, action,
 | 
			
		||||
                                  std::string, eigenPack,
 | 
			
		||||
                                  std::string, solver,
 | 
			
		||||
                                  std::string, output,
 | 
			
		||||
                                  bool,        multiFile);
 | 
			
		||||
                                  std::string,     noise,
 | 
			
		||||
                                  std::string,     action,
 | 
			
		||||
                                  std::string,     eigenPack,
 | 
			
		||||
                                  std::string,     solver,
 | 
			
		||||
                                  A2AVectorsIoPar, output);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <typename FImpl, typename Pack>
 | 
			
		||||
@@ -240,13 +248,17 @@ void TA2AVectors<FImpl, Pack>::execute(void)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // I/O if necessary
 | 
			
		||||
    if (!par().output.empty())
 | 
			
		||||
    if (!par().output.filestem.empty())
 | 
			
		||||
    {
 | 
			
		||||
        startTimer("V I/O");
 | 
			
		||||
        A2AVectorsIo::write(par().output + "_v", v, par().multiFile, vm().getTrajectory());
 | 
			
		||||
        A2AVectorsIo::write(par().output.filestem + "_v", v, 
 | 
			
		||||
                            par().output.multiFile, vm().getTrajectory(), 
 | 
			
		||||
                            par().output.maxRetry);
 | 
			
		||||
        stopTimer("V I/O");
 | 
			
		||||
        startTimer("W I/O");
 | 
			
		||||
        A2AVectorsIo::write(par().output + "_w", w, par().multiFile, vm().getTrajectory());
 | 
			
		||||
        A2AVectorsIo::write(par().output.filestem + "_w", w, 
 | 
			
		||||
                            par().output.multiFile, vm().getTrajectory(), 
 | 
			
		||||
                            par().output.maxRetry);
 | 
			
		||||
        stopTimer("W I/O");
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user