diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc index 45edbb07..1fa84dfb 100644 --- a/lib/communicator/SharedMemoryMPI.cc +++ b/lib/communicator/SharedMemoryMPI.cc @@ -226,6 +226,48 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) }; #endif // MMAP +#ifdef GRID_MPI3_SHM_NONE +void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) +{ + std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<lSites(); + uint64_t lsites = grid->lSites(); std::vector scalardata(lsites); unvectorizeToLexOrdArray(scalardata,lat); @@ -160,7 +160,9 @@ class BinaryIO { /* * Scidac csum is rather more heavyweight + * FIXME -- 128^3 x 256 x 16 will overflow. */ + int global_site; Lexicographic::CoorFromIndex(coor,local_site,local_vol); @@ -261,7 +263,7 @@ class BinaryIO { GridBase *grid, std::vector &iodata, std::string file, - Integer offset, + uint64_t offset, const std::string &format, int control, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -523,7 +525,7 @@ class BinaryIO { static inline void readLatticeObject(Lattice &Umu, std::string file, munger munge, - Integer offset, + uint64_t offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -533,7 +535,7 @@ class BinaryIO { typedef typename vobj::Realified::scalar_type word; word w=0; GridBase *grid = Umu._grid; - int lsites = grid->lSites(); + uint64_t lsites = grid->lSites(); std::vector scalardata(lsites); std::vector iodata(lsites); // Munge, checksum, byte order in here @@ -544,7 +546,7 @@ class BinaryIO { GridStopWatch timer; timer.Start(); - parallel_for(int x=0;xBarrier(); @@ -560,7 +562,7 @@ class BinaryIO { static inline void writeLatticeObject(Lattice &Umu, std::string file, munger munge, - Integer offset, + uint64_t offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -569,7 +571,7 @@ class BinaryIO { typedef typename vobj::scalar_object sobj; typedef typename vobj::Realified::scalar_type word; word w=0; GridBase *grid = Umu._grid; - int lsites = grid->lSites(); + uint64_t lsites = grid->lSites(); std::vector scalardata(lsites); std::vector iodata(lsites); // Munge, checksum, byte order in here @@ -580,7 +582,7 @@ class BinaryIO { GridStopWatch timer; timer.Start(); unvectorizeToLexOrdArray(scalardata,Umu); - parallel_for(int x=0;xBarrier(); timer.Stop(); @@ -597,7 +599,7 @@ class BinaryIO { static inline void readRNG(GridSerialRNG &serial, GridParallelRNG ¶llel, std::string file, - Integer offset, + uint64_t offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb) @@ -610,8 +612,8 @@ class BinaryIO { std::string format = "IEEE32BIG"; GridBase *grid = parallel._grid; - int gsites = grid->gSites(); - int lsites = grid->lSites(); + uint64_t gsites = grid->gSites(); + uint64_t lsites = grid->lSites(); uint32_t nersc_csum_tmp = 0; uint32_t scidac_csuma_tmp = 0; @@ -626,7 +628,7 @@ class BinaryIO { nersc_csum,scidac_csuma,scidac_csumb); timer.Start(); - parallel_for(int lidx=0;lidx tmp(RngStateCount); std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin()); parallel.SetState(tmp,lidx); @@ -659,7 +661,7 @@ class BinaryIO { static inline void writeRNG(GridSerialRNG &serial, GridParallelRNG ¶llel, std::string file, - Integer offset, + uint64_t offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb) @@ -670,8 +672,8 @@ class BinaryIO { typedef std::array RNGstate; GridBase *grid = parallel._grid; - int gsites = grid->gSites(); - int lsites = grid->lSites(); + uint64_t gsites = grid->gSites(); + uint64_t lsites = grid->lSites(); uint32_t nersc_csum_tmp; uint32_t scidac_csuma_tmp; @@ -684,7 +686,7 @@ class BinaryIO { timer.Start(); std::vector iodata(lsites); - parallel_for(int lidx=0;lidx tmp(RngStateCount); parallel.GetState(tmp,lidx); std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin()); diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index b0bd7e2c..b81d1e43 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -337,6 +337,20 @@ class GridLimeWriter : public BinaryIO { template void writeLimeLatticeBinaryObject(Lattice &field,std::string record_name) { + //////////////////////////////////////////////////////////////////// + // NB: FILE and iostream are jointly writing disjoint sequences in the + // the same file through different file handles (integer units). + // + // These are both buffered, so why I think this code is right is as follows. + // + // i) write record header to FILE *File, telegraphing the size; flush + // ii) ftello reads the offset from FILE *File . + // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. + // Closes iostream and flushes. + // iv) fseek on FILE * to end of this disjoint section. + // v) Continue writing scidac record. + //////////////////////////////////////////////////////////////////// + //////////////////////////////////////////// // Create record header //////////////////////////////////////////// @@ -350,25 +364,24 @@ class GridLimeWriter : public BinaryIO { // std::cout << "W Gsites " <_gsites<(); BinarySimpleMunger munge; - BinaryIO::writeLatticeObject(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); - // fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb); + + /////////////////////////////////////////// + // Wind forward and close the record + /////////////////////////////////////////// + fseek(File,0,SEEK_END); + uint64_t offset2 = ftello(File); // std::cout << " now at offset "<=0); //////////////////////////////////////// diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index 786839f2..e2c2bc39 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -57,7 +57,7 @@ namespace Grid { // for the header-reader static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field) { - int offset=0; + uint64_t offset=0; std::map header; std::string line; @@ -139,7 +139,7 @@ namespace Grid { typedef Lattice > GaugeField; GridBase *grid = Umu._grid; - int offset = readHeader(file,Umu._grid,header); + uint64_t offset = readHeader(file,Umu._grid,header); FieldMetaData clone(header); @@ -236,7 +236,7 @@ namespace Grid { GaugeStatistics(Umu,header); MachineCharacteristics(header); - int offset; + uint64_t offset; truncate(file); @@ -278,7 +278,7 @@ namespace Grid { header.plaquette=0.0; MachineCharacteristics(header); - int offset; + uint64_t offset; #ifdef RNG_RANLUX header.floating_point = std::string("UINT64"); @@ -313,7 +313,7 @@ namespace Grid { GridBase *grid = parallel._grid; - int offset = readHeader(file,grid,header); + uint64_t offset = readHeader(file,grid,header); FieldMetaData clone(header); diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index 3dff4b90..b55b66d9 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -180,7 +180,6 @@ int main (int argc, char ** argv) { GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4); GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4); - GridRedBlackCartesian * CoarseGrid5rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid5); // Gauge field LatticeGaugeField Umu(UGrid); @@ -206,7 +205,7 @@ int main (int argc, char ** argv) { const int nbasis= 60; assert(nbasis==Ns1); - LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd); + LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd); std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; assert( (Params.doFine)||(Params.doFineRead));