Merge branch 'develop' into feature/hadrons

2025-09-18 09:11:04 +01:00 · 2018-03-19 13:30:21 +00:00
parent 5a31e747c9 6c6d43eb4e
commit 41d6cab033
5 changed files with 97 additions and 41 deletions
--- a/lib/communicator/SharedMemoryMPI.cc
+++ b/lib/communicator/SharedMemoryMPI.cc
@@ -226,6 +226,48 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 };
 #endif // MMAP
 #ifdef GRID_MPI3_SHM_NONE
 void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 {
  std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
  assert(_ShmSetup==1);
  assert(_ShmAlloc==0);
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
  // allocate the shared windows for our group
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
  MPI_Barrier(WorldShmComm);
  WorldShmCommBufs.resize(WorldShmSize);
  ////////////////////////////////////////////////////////////////////////////////////////////
  // Hugetlbf and others map filesystems as mappable huge pages
  ////////////////////////////////////////////////////////////////////////////////////////////
  char shm_name [NAME_MAX];
  assert(WorldShmSize == 1);
  for(int r=0;r<WorldShmSize;r++){
    int fd=-1;
    int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
 #ifdef MAP_POPULATE    
    mmap_flag|=MAP_POPULATE;
 #endif
 #ifdef MAP_HUGETLB
    if ( flags ) mmap_flag |= MAP_HUGETLB;
 #endif
    void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0); 
    if ( ptr == (void *)MAP_FAILED ) {    
      printf("mmap %s failed\n",shm_name);
      perror("failed mmap");      assert(0);    
    }
    assert(((uint64_t)ptr&0x3F)==0);
    close(fd);
    WorldShmCommBufs[r] =ptr;
    std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
  }
  _ShmAlloc=1;
  _ShmAllocBytes  = bytes;
 };
 #endif // MMAP
 #ifdef GRID_MPI3_SHMOPEN
 ////////////////////////////////////////////////////////////////////////////////////////////
 // POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
@@ -246,7 +288,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
      size_t size = bytes;
-      sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
+      sprintf(shm_name,"/myGrid_mpi3_shm_%d_%d",WorldNode,r);
      shm_unlink(shm_name);
      int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
--- a/lib/parallelIO/BinaryIO.h
+++ b/lib/parallelIO/BinaryIO.h
@@ -91,7 +91,7 @@ class BinaryIO {
    typedef typename vobj::scalar_object sobj;
    GridBase *grid = lat._grid;
-    int lsites = grid->lSites();
+    uint64_t lsites = grid->lSites();
    std::vector<sobj> scalardata(lsites); 
    unvectorizeToLexOrdArray(scalardata,lat);    
@@ -160,7 +160,9 @@ class BinaryIO {
 	/* 
 	 * Scidac csum  is rather more heavyweight
 	 * FIXME -- 128^3 x 256 x 16 will overflow.
 	 */
 	int global_site;
 	Lexicographic::CoorFromIndex(coor,local_site,local_vol);
@@ -261,7 +263,7 @@ class BinaryIO {
 			      GridBase *grid,
 			      std::vector<fobj> &iodata,
 			      std::string file,
-			      Integer offset,
+			      uint64_t offset,
 			      const std::string &format, int control,
 			      uint32_t &nersc_csum,
 			      uint32_t &scidac_csuma,
@@ -523,7 +525,7 @@ class BinaryIO {
  static inline void readLatticeObject(Lattice<vobj> &Umu,
 				       std::string file,
 				       munger munge,
-				       Integer offset,
+				       uint64_t offset,
 				       const std::string &format,
 				       uint32_t &nersc_csum,
 				       uint32_t &scidac_csuma,
@@ -533,7 +535,7 @@ class BinaryIO {
    typedef typename vobj::Realified::scalar_type word;    word w=0;
    GridBase *grid = Umu._grid;
-    int lsites = grid->lSites();
+    uint64_t lsites = grid->lSites();
    std::vector<sobj> scalardata(lsites); 
    std::vector<fobj>     iodata(lsites); // Munge, checksum, byte order in here
@@ -544,7 +546,7 @@ class BinaryIO {
    GridStopWatch timer; 
    timer.Start();
-    parallel_for(int x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
+    parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
    vectorizeFromLexOrdArray(scalardata,Umu);    
    grid->Barrier();
@@ -560,7 +562,7 @@ class BinaryIO {
    static inline void writeLatticeObject(Lattice<vobj> &Umu,
 					  std::string file,
 					  munger munge,
-					  Integer offset,
+					  uint64_t offset,
 					  const std::string &format,
 					  uint32_t &nersc_csum,
 					  uint32_t &scidac_csuma,
@@ -569,7 +571,7 @@ class BinaryIO {
    typedef typename vobj::scalar_object sobj;
    typedef typename vobj::Realified::scalar_type word;    word w=0;
    GridBase *grid = Umu._grid;
-    int lsites = grid->lSites();
+    uint64_t lsites = grid->lSites();
    std::vector<sobj> scalardata(lsites); 
    std::vector<fobj>     iodata(lsites); // Munge, checksum, byte order in here
@@ -580,7 +582,7 @@ class BinaryIO {
    GridStopWatch timer; timer.Start();
    unvectorizeToLexOrdArray(scalardata,Umu);    
-    parallel_for(int x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
+    parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
    grid->Barrier();
    timer.Stop();
@@ -597,7 +599,7 @@ class BinaryIO {
  static inline void readRNG(GridSerialRNG &serial,
 			     GridParallelRNG &parallel,
 			     std::string file,
-			     Integer offset,
+			     uint64_t offset,
 			     uint32_t &nersc_csum,
 			     uint32_t &scidac_csuma,
 			     uint32_t &scidac_csumb)
@@ -610,8 +612,8 @@ class BinaryIO {
    std::string format = "IEEE32BIG";
    GridBase *grid = parallel._grid;
-    int gsites = grid->gSites();
+    uint64_t gsites = grid->gSites();
-    int lsites = grid->lSites();
+    uint64_t lsites = grid->lSites();
    uint32_t nersc_csum_tmp   = 0;
    uint32_t scidac_csuma_tmp = 0;
@@ -626,7 +628,7 @@ class BinaryIO {
 	     nersc_csum,scidac_csuma,scidac_csumb);
    timer.Start();
-    parallel_for(int lidx=0;lidx<lsites;lidx++){
+    parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
      std::vector<RngStateType> tmp(RngStateCount);
      std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
      parallel.SetState(tmp,lidx);
@@ -659,7 +661,7 @@ class BinaryIO {
  static inline void writeRNG(GridSerialRNG &serial,
 			      GridParallelRNG &parallel,
 			      std::string file,
-			      Integer offset,
+			      uint64_t offset,
 			      uint32_t &nersc_csum,
 			      uint32_t &scidac_csuma,
 			      uint32_t &scidac_csumb)
@@ -670,8 +672,8 @@ class BinaryIO {
    typedef std::array<RngStateType,RngStateCount> RNGstate;
    GridBase *grid = parallel._grid;
-    int gsites = grid->gSites();
+    uint64_t gsites = grid->gSites();
-    int lsites = grid->lSites();
+    uint64_t lsites = grid->lSites();
    uint32_t nersc_csum_tmp;
    uint32_t scidac_csuma_tmp;
@@ -684,7 +686,7 @@ class BinaryIO {
    timer.Start();
    std::vector<RNGstate> iodata(lsites);
-    parallel_for(int lidx=0;lidx<lsites;lidx++){
+    parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
      std::vector<RngStateType> tmp(RngStateCount);
      parallel.GetState(tmp,lidx);
      std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
--- a/lib/parallelIO/IldgIO.h
+++ b/lib/parallelIO/IldgIO.h
@@ -337,6 +337,20 @@ class GridLimeWriter : public BinaryIO {
  template<class vobj>
  void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
  {
    ////////////////////////////////////////////////////////////////////
    // NB: FILE and iostream are jointly writing disjoint sequences in the
    // the same file through different file handles (integer units).
    // 
    // These are both buffered, so why I think this code is right is as follows.
    //
    // i)  write record header to FILE *File, telegraphing the size; flush
    // ii) ftello reads the offset from FILE *File . 
    // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
    //      Closes iostream and flushes.
    // iv) fseek on FILE * to end of this disjoint section.
    //  v) Continue writing scidac record.
    ////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////
    // Create record header
    ////////////////////////////////////////////
@@ -350,25 +364,24 @@ class GridLimeWriter : public BinaryIO {
    //    std::cout << "W Gsites "           <<field._grid->_gsites<<std::endl;
    //    std::cout << "W Payload expected " <<PayloadSize<<std::endl;
-    ////////////////////////////////////////////////////////////////////
+    fflush(File);
-    // NB: FILE and iostream are jointly writing disjoint sequences in the
+
-    // the same file through different file handles (integer units).
+    ///////////////////////////////////////////
-    // 
+    // Write by other means into the binary record
-    // These are both buffered, so why I think this code is right is as follows.
+    ///////////////////////////////////////////
-    //
+    uint64_t offset1 = ftello(File);    //    std::cout << " Writing to offset "<<offset1 << std::endl;
    // i)  write record header to FILE *File, telegraphing the size. 
    // ii) ftello reads the offset from FILE *File .
    // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
    //      Closes iostream and flushes.
    // iv) fseek on FILE * to end of this disjoint section.
    //  v) Continue writing scidac record.
    ////////////////////////////////////////////////////////////////////
    uint64_t offset = ftello(File);
    //    std::cout << " Writing to offset "<<offset << std::endl;
    std::string format = getFormatString<vobj>();
    BinarySimpleMunger<sobj,sobj> munge;
-    BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
+    BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
-    //    fseek(File,0,SEEK_END);    offset = ftello(File);std::cout << " offset now "<<offset << std::endl;
+
    ///////////////////////////////////////////
    // Wind forward and close the record
    ///////////////////////////////////////////
    fseek(File,0,SEEK_END);             
    uint64_t offset2 = ftello(File);     //    std::cout << " now at offset "<<offset2 << std::endl;
    assert((offset2-offset1) == PayloadSize);
    err=limeWriterCloseRecord(LimeW);  assert(err>=0);
    ////////////////////////////////////////
--- a/lib/parallelIO/NerscIO.h
+++ b/lib/parallelIO/NerscIO.h
@@ -57,7 +57,7 @@ namespace Grid {
      // for the header-reader
      static inline int readHeader(std::string file,GridBase *grid,  FieldMetaData &field)
      {
-      int offset=0;
+      uint64_t offset=0;
      std::map<std::string,std::string> header;
      std::string line;
@@ -139,7 +139,7 @@ namespace Grid {
      typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
      GridBase *grid = Umu._grid;
-      int offset = readHeader(file,Umu._grid,header);
+      uint64_t offset = readHeader(file,Umu._grid,header);
      FieldMetaData clone(header);
@@ -236,7 +236,7 @@ namespace Grid {
 	GaugeStatistics(Umu,header);
 	MachineCharacteristics(header);
-	int offset;
+	uint64_t offset;
 	truncate(file);
@@ -278,7 +278,7 @@ namespace Grid {
 	header.plaquette=0.0;
 	MachineCharacteristics(header);
-	int offset;
+	uint64_t offset;
 #ifdef RNG_RANLUX
 	header.floating_point = std::string("UINT64");
@@ -313,7 +313,7 @@ namespace Grid {
 	GridBase *grid = parallel._grid;
-	int offset = readHeader(file,grid,header);
+	uint64_t offset = readHeader(file,grid,header);
 	FieldMetaData clone(header);
--- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc
+++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc
@@ -180,7 +180,6 @@ int main (int argc, char ** argv) {
  GridCartesian         * CoarseGrid4    = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
  GridRedBlackCartesian * CoarseGrid4rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
  GridCartesian         * CoarseGrid5    = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
  GridRedBlackCartesian * CoarseGrid5rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid5);
  // Gauge field
  LatticeGaugeField Umu(UGrid);
@@ -206,7 +205,7 @@ int main (int argc, char ** argv) {
  const int nbasis= 60;
  assert(nbasis==Ns1);
-  LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd);
+  LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd);
  std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
  assert( (Params.doFine)||(Params.doFineRead));