1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 17:25:37 +01:00

4GB clean the offsets in parallel IO for multifile records

This commit is contained in:
Peter Boyle 2018-03-16 21:54:56 +00:00
parent 01568b0e62
commit 888838473a
3 changed files with 53 additions and 38 deletions

View File

@ -91,7 +91,7 @@ class BinaryIO {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
GridBase *grid = lat._grid; GridBase *grid = lat._grid;
int lsites = grid->lSites(); uint64_t lsites = grid->lSites();
std::vector<sobj> scalardata(lsites); std::vector<sobj> scalardata(lsites);
unvectorizeToLexOrdArray(scalardata,lat); unvectorizeToLexOrdArray(scalardata,lat);
@ -160,7 +160,9 @@ class BinaryIO {
/* /*
* Scidac csum is rather more heavyweight * Scidac csum is rather more heavyweight
* FIXME -- 128^3 x 256 x 16 will overflow.
*/ */
int global_site; int global_site;
Lexicographic::CoorFromIndex(coor,local_site,local_vol); Lexicographic::CoorFromIndex(coor,local_site,local_vol);
@ -261,7 +263,7 @@ class BinaryIO {
GridBase *grid, GridBase *grid,
std::vector<fobj> &iodata, std::vector<fobj> &iodata,
std::string file, std::string file,
Integer offset, uint64_t offset,
const std::string &format, int control, const std::string &format, int control,
uint32_t &nersc_csum, uint32_t &nersc_csum,
uint32_t &scidac_csuma, uint32_t &scidac_csuma,
@ -523,7 +525,7 @@ class BinaryIO {
static inline void readLatticeObject(Lattice<vobj> &Umu, static inline void readLatticeObject(Lattice<vobj> &Umu,
std::string file, std::string file,
munger munge, munger munge,
Integer offset, uint64_t offset,
const std::string &format, const std::string &format,
uint32_t &nersc_csum, uint32_t &nersc_csum,
uint32_t &scidac_csuma, uint32_t &scidac_csuma,
@ -533,7 +535,7 @@ class BinaryIO {
typedef typename vobj::Realified::scalar_type word; word w=0; typedef typename vobj::Realified::scalar_type word; word w=0;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
int lsites = grid->lSites(); uint64_t lsites = grid->lSites();
std::vector<sobj> scalardata(lsites); std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
@ -544,7 +546,7 @@ class BinaryIO {
GridStopWatch timer; GridStopWatch timer;
timer.Start(); timer.Start();
parallel_for(int x=0;x<lsites;x++) munge(iodata[x], scalardata[x]); parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
vectorizeFromLexOrdArray(scalardata,Umu); vectorizeFromLexOrdArray(scalardata,Umu);
grid->Barrier(); grid->Barrier();
@ -560,7 +562,7 @@ class BinaryIO {
static inline void writeLatticeObject(Lattice<vobj> &Umu, static inline void writeLatticeObject(Lattice<vobj> &Umu,
std::string file, std::string file,
munger munge, munger munge,
Integer offset, uint64_t offset,
const std::string &format, const std::string &format,
uint32_t &nersc_csum, uint32_t &nersc_csum,
uint32_t &scidac_csuma, uint32_t &scidac_csuma,
@ -569,7 +571,7 @@ class BinaryIO {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
typedef typename vobj::Realified::scalar_type word; word w=0; typedef typename vobj::Realified::scalar_type word; word w=0;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
int lsites = grid->lSites(); uint64_t lsites = grid->lSites();
std::vector<sobj> scalardata(lsites); std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
@ -580,7 +582,7 @@ class BinaryIO {
GridStopWatch timer; timer.Start(); GridStopWatch timer; timer.Start();
unvectorizeToLexOrdArray(scalardata,Umu); unvectorizeToLexOrdArray(scalardata,Umu);
parallel_for(int x=0;x<lsites;x++) munge(scalardata[x],iodata[x]); parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
grid->Barrier(); grid->Barrier();
timer.Stop(); timer.Stop();
@ -597,7 +599,7 @@ class BinaryIO {
static inline void readRNG(GridSerialRNG &serial, static inline void readRNG(GridSerialRNG &serial,
GridParallelRNG &parallel, GridParallelRNG &parallel,
std::string file, std::string file,
Integer offset, uint64_t offset,
uint32_t &nersc_csum, uint32_t &nersc_csum,
uint32_t &scidac_csuma, uint32_t &scidac_csuma,
uint32_t &scidac_csumb) uint32_t &scidac_csumb)
@ -610,8 +612,8 @@ class BinaryIO {
std::string format = "IEEE32BIG"; std::string format = "IEEE32BIG";
GridBase *grid = parallel._grid; GridBase *grid = parallel._grid;
int gsites = grid->gSites(); uint64_t gsites = grid->gSites();
int lsites = grid->lSites(); uint64_t lsites = grid->lSites();
uint32_t nersc_csum_tmp = 0; uint32_t nersc_csum_tmp = 0;
uint32_t scidac_csuma_tmp = 0; uint32_t scidac_csuma_tmp = 0;
@ -626,7 +628,7 @@ class BinaryIO {
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
timer.Start(); timer.Start();
parallel_for(int lidx=0;lidx<lsites;lidx++){ parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
std::vector<RngStateType> tmp(RngStateCount); std::vector<RngStateType> tmp(RngStateCount);
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin()); std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
parallel.SetState(tmp,lidx); parallel.SetState(tmp,lidx);
@ -659,7 +661,7 @@ class BinaryIO {
static inline void writeRNG(GridSerialRNG &serial, static inline void writeRNG(GridSerialRNG &serial,
GridParallelRNG &parallel, GridParallelRNG &parallel,
std::string file, std::string file,
Integer offset, uint64_t offset,
uint32_t &nersc_csum, uint32_t &nersc_csum,
uint32_t &scidac_csuma, uint32_t &scidac_csuma,
uint32_t &scidac_csumb) uint32_t &scidac_csumb)
@ -670,8 +672,8 @@ class BinaryIO {
typedef std::array<RngStateType,RngStateCount> RNGstate; typedef std::array<RngStateType,RngStateCount> RNGstate;
GridBase *grid = parallel._grid; GridBase *grid = parallel._grid;
int gsites = grid->gSites(); uint64_t gsites = grid->gSites();
int lsites = grid->lSites(); uint64_t lsites = grid->lSites();
uint32_t nersc_csum_tmp; uint32_t nersc_csum_tmp;
uint32_t scidac_csuma_tmp; uint32_t scidac_csuma_tmp;
@ -684,7 +686,7 @@ class BinaryIO {
timer.Start(); timer.Start();
std::vector<RNGstate> iodata(lsites); std::vector<RNGstate> iodata(lsites);
parallel_for(int lidx=0;lidx<lsites;lidx++){ parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
std::vector<RngStateType> tmp(RngStateCount); std::vector<RngStateType> tmp(RngStateCount);
parallel.GetState(tmp,lidx); parallel.GetState(tmp,lidx);
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin()); std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());

View File

@ -337,6 +337,20 @@ class GridLimeWriter : public BinaryIO {
template<class vobj> template<class vobj>
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name) void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
{ {
////////////////////////////////////////////////////////////////////
// NB: FILE and iostream are jointly writing disjoint sequences in the
// the same file through different file handles (integer units).
//
// These are both buffered, so why I think this code is right is as follows.
//
// i) write record header to FILE *File, telegraphing the size; flush
// ii) ftello reads the offset from FILE *File .
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
// Closes iostream and flushes.
// iv) fseek on FILE * to end of this disjoint section.
// v) Continue writing scidac record.
////////////////////////////////////////////////////////////////////
//////////////////////////////////////////// ////////////////////////////////////////////
// Create record header // Create record header
//////////////////////////////////////////// ////////////////////////////////////////////
@ -350,25 +364,24 @@ class GridLimeWriter : public BinaryIO {
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl; // std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "W Payload expected " <<PayloadSize<<std::endl; // std::cout << "W Payload expected " <<PayloadSize<<std::endl;
//////////////////////////////////////////////////////////////////// fflush(File);
// NB: FILE and iostream are jointly writing disjoint sequences in the
// the same file through different file handles (integer units). ///////////////////////////////////////////
// // Write by other means into the binary record
// These are both buffered, so why I think this code is right is as follows. ///////////////////////////////////////////
// uint64_t offset1 = ftello(File); // std::cout << " Writing to offset "<<offset1 << std::endl;
// i) write record header to FILE *File, telegraphing the size.
// ii) ftello reads the offset from FILE *File .
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
// Closes iostream and flushes.
// iv) fseek on FILE * to end of this disjoint section.
// v) Continue writing scidac record.
////////////////////////////////////////////////////////////////////
uint64_t offset = ftello(File);
// std::cout << " Writing to offset "<<offset << std::endl;
std::string format = getFormatString<vobj>(); std::string format = getFormatString<vobj>();
BinarySimpleMunger<sobj,sobj> munge; BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
// fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<<offset << std::endl;
///////////////////////////////////////////
// Wind forward and close the record
///////////////////////////////////////////
fseek(File,0,SEEK_END);
unt64_t offset2 = ftello(File); // std::cout << " now at offset "<<offset2 << std::endl;
assert((offset2-offset1) == PayloadSize);
err=limeWriterCloseRecord(LimeW); assert(err>=0); err=limeWriterCloseRecord(LimeW); assert(err>=0);
//////////////////////////////////////// ////////////////////////////////////////

View File

@ -57,7 +57,7 @@ namespace Grid {
// for the header-reader // for the header-reader
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field) static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
{ {
int offset=0; uint64_t offset=0;
std::map<std::string,std::string> header; std::map<std::string,std::string> header;
std::string line; std::string line;
@ -139,7 +139,7 @@ namespace Grid {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
int offset = readHeader(file,Umu._grid,header); uint64_t offset = readHeader(file,Umu._grid,header);
FieldMetaData clone(header); FieldMetaData clone(header);
@ -236,7 +236,7 @@ namespace Grid {
GaugeStatistics(Umu,header); GaugeStatistics(Umu,header);
MachineCharacteristics(header); MachineCharacteristics(header);
int offset; uint64_t offset;
truncate(file); truncate(file);
@ -278,7 +278,7 @@ namespace Grid {
header.plaquette=0.0; header.plaquette=0.0;
MachineCharacteristics(header); MachineCharacteristics(header);
int offset; uint64_t offset;
#ifdef RNG_RANLUX #ifdef RNG_RANLUX
header.floating_point = std::string("UINT64"); header.floating_point = std::string("UINT64");
@ -313,7 +313,7 @@ namespace Grid {
GridBase *grid = parallel._grid; GridBase *grid = parallel._grid;
int offset = readHeader(file,grid,header); uint64_t offset = readHeader(file,grid,header);
FieldMetaData clone(header); FieldMetaData clone(header);