From 0064685bd7e522493933ef91abbf113a271140d1 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 10 May 2018 17:15:31 +0100 Subject: [PATCH] Added Scidac format with checksums to RNG files --- lib/parallelIO/BinaryIO.h | 36 +++- lib/parallelIO/IldgIO.h | 157 +++++++++++++++++- .../hmc/checkpointers/ScidacCheckpointer.h | 14 +- 3 files changed, 199 insertions(+), 8 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index a60fe962..ae7cfa0c 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -362,6 +362,13 @@ PARALLEL_CRITICAL ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0); ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); + + MPI_Offset os; + MPI_File_get_position(fh, &os); + MPI_File_get_byte_offset(fh, os, &disp); + offset = disp; + + MPI_File_close(&fh); MPI_Type_free(&fileArray); MPI_Type_free(&localArray); @@ -370,11 +377,13 @@ PARALLEL_CRITICAL #endif } else { std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : " - << iodata.size() * sizeof(fobj) << " bytes" << std::endl; + << iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl; std::ifstream fin; - fin.open(file, std::ios::binary | std::ios::in); - if (control & BINARYIO_MASTER_APPEND) + fin.open(file, std::ios::binary | std::ios::in); + if (0)//control & BINARYIO_MASTER_APPEND) { + // Note Guido. Crosscheck this for the RNG case + // why the negative offset? fin.seekg(-sizeof(fobj), fin.end); } else @@ -382,6 +391,7 @@ PARALLEL_CRITICAL fin.seekg(offset + myrank * lsites * sizeof(fobj)); } fin.read((char *)&iodata[0], iodata.size() * sizeof(fobj)); + offset = fin.tellg(); assert(fin.fail() == 0); fin.close(); } @@ -638,6 +648,11 @@ PARALLEL_CRITICAL IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC, nersc_csum,scidac_csuma,scidac_csumb); + std::cout << GridLogMessage << "RNG file nersc_checksum " << std::hex << nersc_csum << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file scidac_checksuma " << std::hex << scidac_csuma << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file scidac_checksumb " << std::hex << scidac_csumb << std::dec << std::endl; + + timer.Start(); parallel_for(uint64_t lidx=0;lidx tmp(RngStateCount); @@ -656,6 +671,11 @@ PARALLEL_CRITICAL serial.SetState(tmp,0); } + std::cout << GridLogMessage << "RNG file checksum t " << std::hex << nersc_csum_tmp << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file checksuma t " << std::hex << scidac_csuma_tmp << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file checksumb t " << std::hex << scidac_csumb_tmp << std::dec << std::endl; + + nersc_csum = nersc_csum + nersc_csum_tmp; scidac_csuma = scidac_csuma ^ scidac_csuma_tmp; scidac_csumb = scidac_csumb ^ scidac_csumb_tmp; @@ -706,6 +726,11 @@ PARALLEL_CRITICAL IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, nersc_csum,scidac_csuma,scidac_csumb); + + std::cout << GridLogMessage << "RNG file checksum " << std::hex << nersc_csum << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file checksuma " << std::hex << scidac_csuma << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file checksumb " << std::hex << scidac_csumb << std::dec << std::endl; + iodata.resize(1); { std::vector tmp(RngStateCount); @@ -715,6 +740,11 @@ PARALLEL_CRITICAL IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND, nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp); + std::cout << GridLogMessage << "RNG file checksum t " << std::hex << nersc_csum_tmp << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file checksuma t " << std::hex << scidac_csuma_tmp << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file checksumb t " << std::hex << scidac_csumb_tmp << std::dec << std::endl; + + nersc_csum = nersc_csum + nersc_csum_tmp; scidac_csuma = scidac_csuma ^ scidac_csuma_tmp; scidac_csumb = scidac_csumb ^ scidac_csumb_tmp; diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 90c05546..d4a123bb 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -238,10 +238,52 @@ class GridLimeReader : public BinaryIO { // Verify checksums ///////////////////////////////////////////// assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1); + std::cout << GridLogMessage<< " readLimeLatticeBinaryObject checksums match ! " < RNGstate; + + uint64_t PayloadSize = sizeof(RNGstate) * (pRNG._grid->_gsites+1); + + assert(PayloadSize == file_bytes);// Must match or user error + uint64_t offset= ftello(File); + std::cout << GridLogDebug << " ReadLatticeObject from offset "<IsBoss() ); + + const int RngStateCount = GridSerialRNG::RngStateCount; + typedef std::array RNGstate; + + //////////////////////////////////////////// + // Create record header + //////////////////////////////////////////// + int err; + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + uint64_t PayloadSize = sizeof(RNGstate) * (grid->_gsites+1); + std::cout << GridLogDebug << "Computed payload size " << PayloadSize << std::endl; + if ( boss_node ) { + createLimeRecordHeader(record_name, 0, 0, PayloadSize); + fflush(File); + } + + //////////////////////////////////////////////// + // Check all nodes agree on file position + //////////////////////////////////////////////// + uint64_t offset1; + if ( boss_node ) { + offset1 = ftello(File); + } + grid->Broadcast(0,(void *)&offset1,sizeof(offset1)); + + /////////////////////////////////////////// + // The above is collective. Write by other means into the binary record + /////////////////////////////////////////// + uint64_t offset = offset1; + BinaryIO::writeRNG(sRNG, pRNG,filename, offset, nersc_csum,scidac_csuma,scidac_csumb); + + /////////////////////////////////////////// + // Wind forward and close the record + /////////////////////////////////////////// + if ( boss_node ) { + fseek(File,0,SEEK_END); + uint64_t offset2 = ftello(File); + std::cout << GridLogDebug << " now at offset "<=0); + } + //////////////////////////////////////// + // Write checksum element, propagaing forward from the BinaryIO + // Always pair a checksum with a binary object, and close message + //////////////////////////////////////// + scidacChecksum checksum; + std::stringstream streama; streama << std::hex << scidac_csuma; + std::stringstream streamb; streamb << std::hex << scidac_csumb; + checksum.suma= streama.str(); + checksum.sumb= streamb.str(); + if ( boss_node ) { + writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM)); + } + } }; class ScidacWriter : public GridLimeWriter { @@ -445,6 +559,27 @@ class ScidacWriter : public GridLimeWriter { writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); } } + + void writeScidacRNGRecord(GridSerialRNG &sRNG, GridParallelRNG &pRNG) + { + GridBase *grid = pRNG._grid; + FieldMetaData header; + + header.floating_point = "IEEE64BIG"; + header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac + GridMetaData(grid,header); + MachineCharacteristics(header); + + ////////////////////////////////////////////// + // Fill the Lime file record by record + ////////////////////////////////////////////// + if ( this->boss_node ) { + writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message + } + // Collective call + writeLimeRNGObject(sRNG, pRNG,std::string(ILDG_BINARY_DATA)); // Closes message with checksum + } + //////////////////////////////////////////////// // Write generic lattice field in scidac format //////////////////////////////////////////////// @@ -473,6 +608,7 @@ class ScidacWriter : public GridLimeWriter { // Collective call writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum } + }; @@ -486,8 +622,27 @@ class ScidacReader : public GridLimeReader { readLimeObject(_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); readLimeObject(_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); } + //////////////////////////////////////////////// - // Write generic lattice field in scidac format + // Read RNGobject in scidac format + //////////////////////////////////////////////// + void readScidacRNGRecord(GridSerialRNG &sRNG, GridParallelRNG &pRNG) + { + GridBase * grid = pRNG._grid; + + //////////////////////////////////////// + // fill the Grid header + //////////////////////////////////////// + FieldMetaData header; + + ////////////////////////////////////////////// + // Fill the Lime file record by record + ////////////////////////////////////////////// + readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message + readLimeRNGObject(sRNG, pRNG, std::string(ILDG_BINARY_DATA)); + } + //////////////////////////////////////////////// + // Read generic lattice field in scidac format //////////////////////////////////////////////// template void readScidacFieldRecord(Lattice &field,userRecord &_userRecord) diff --git a/lib/qcd/hmc/checkpointers/ScidacCheckpointer.h b/lib/qcd/hmc/checkpointers/ScidacCheckpointer.h index 0867b882..e0b180fb 100644 --- a/lib/qcd/hmc/checkpointers/ScidacCheckpointer.h +++ b/lib/qcd/hmc/checkpointers/ScidacCheckpointer.h @@ -80,8 +80,12 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer { this->build_filenames(traj, Params, config, rng); GridBase *grid = U._grid; uint32_t nersc_csum,scidac_csuma,scidac_csumb; - BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); ScidacWriter _ScidacWriter(grid->IsBoss()); + _ScidacWriter.open(rng); + _ScidacWriter.writeScidacRNGRecord(sRNG, pRNG); + _ScidacWriter.close(); + + //BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); _ScidacWriter.open(config); _ScidacWriter.writeScidacFieldRecord(U, MData); _ScidacWriter.close(); @@ -102,10 +106,12 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer { uint32_t nersc_csum,scidac_csuma,scidac_csumb; - BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); - - Metadata md_content; ScidacReader _ScidacReader; + //BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); + _ScidacReader.open(rng); + _ScidacReader.readScidacRNGRecord(sRNG, pRNG); + _ScidacReader.close(); + Metadata md_content; _ScidacReader.open(config); _ScidacReader.readScidacFieldRecord(U,md_content); // format from the header _ScidacReader.close();