diff --git a/Grid/parallelIO/BinaryIO.cc b/Grid/parallelIO/BinaryIO.cc new file mode 100644 index 00000000..221a7fe8 --- /dev/null +++ b/Grid/parallelIO/BinaryIO.cc @@ -0,0 +1,3 @@ +#include + +int Grid::BinaryIO::latticeWriteMaxRetry = -1; diff --git a/Grid/parallelIO/BinaryIO.h b/Grid/parallelIO/BinaryIO.h index a60fe962..ac82af11 100644 --- a/Grid/parallelIO/BinaryIO.h +++ b/Grid/parallelIO/BinaryIO.h @@ -81,6 +81,7 @@ inline void removeWhitespace(std::string &key) /////////////////////////////////////////////////////////////////////////////////////////////////// class BinaryIO { public: + static int latticeWriteMaxRetry; ///////////////////////////////////////////////////////////////////////////// // more byte manipulation helpers @@ -583,6 +584,8 @@ PARALLEL_CRITICAL typedef typename vobj::Realified::scalar_type word; word w=0; GridBase *grid = Umu._grid; uint64_t lsites = grid->lSites(); + int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry); + bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0); std::vector scalardata(lsites); std::vector iodata(lsites); // Munge, checksum, byte order in here @@ -598,8 +601,28 @@ PARALLEL_CRITICAL grid->Barrier(); timer.Stop(); - IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, - nersc_csum,scidac_csuma,scidac_csumb); + while (attemptsLeft >= 0) + { + grid->Barrier(); + IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, + nersc_csum,scidac_csuma,scidac_csumb); + if (checkWrite) + { + std::vector ckiodata(lsites); + uint32_t cknersc_csum, ckscidac_csuma, ckscidac_csumb; + + std::cout << GridLogMessage << "writeLatticeObject: read back object to check" << std::endl; + grid->Barrier(); + IOobject(w,grid,ckiodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC, + cknersc_csum,ckscidac_csuma,ckscidac_csumb); + if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb)) + { + std::cout << GridLogMessage << "writeLatticeObject: checksum failure in test read (" << attemptsLeft << " write attempt(s) remaining)" << std::endl; + } + } + attemptsLeft--; + } + std::cout<