From e504260f3d8381376f9de60028b41c6685e0b93c Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 22 Jun 2017 18:53:11 +0100 Subject: [PATCH] Able to run a test job splitting into multiple MPI subdomains. --- lib/algorithms/iterative/ConjugateGradient.h | 4 +- lib/cartesian/Cartesian_base.h | 3 - lib/cartesian/Cartesian_full.h | 2 +- lib/communicator/Communicator_mpi.cc | 54 +++++---- lib/parallelIO/IldgIO.h | 115 ++++++++++++++++--- lib/parallelIO/IldgIOtypes.h | 4 + lib/parallelIO/MetaData.h | 1 + lib/qcd/utils/SpaceTimeGrid.cc | 19 +-- 8 files changed, 138 insertions(+), 64 deletions(-) diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index ed453161..5c968e04 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -52,8 +52,8 @@ class ConjugateGradient : public OperatorFunction { MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){}; - void operator()(LinearOperatorBase &Linop, const Field &src, - Field &psi) { + void operator()(LinearOperatorBase &Linop, const Field &src, Field &psi) { + psi.checkerboard = src.checkerboard; conformable(psi, src); diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index 25041d17..a7719ec4 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -211,9 +211,6 @@ public: assert(lidx & gcoor,int & gidx){ gidx=0; int mult=1; diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index bced0791..eb388f84 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -67,7 +67,7 @@ public: GridCartesian(const std::vector &dimensions, const std::vector &simd_layout, const std::vector &processor_grid, - GridCartesian &parent) : GridBase(processor_grid,parent) + const GridCartesian &parent) : GridBase(processor_grid,parent) { Init(dimensions,simd_layout,processor_grid); } diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 28a270a0..7879f518 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -56,46 +56,52 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { InitFromMPICommunicator(processors,communicator_world); - std::cout << "Passed communicator world to a new communicator" < &processors,const CartesianCommunicator &parent) { _ndimension = processors.size(); assert(_ndimension = parent._ndimension); - + ////////////////////////////////////////////////////////////////////////////////////////////////////// // split the communicator ////////////////////////////////////////////////////////////////////////////////////////////////////// - std::vector ratio(_ndimension); - std::vector rcoor(_ndimension); - std::vector scoor(_ndimension); + int Nparent; + MPI_Comm_size(parent.communicator,&Nparent); - int Nsubcomm=1; - int Nsubrank=1; + int childsize=1; for(int d=0;d<_ndimension;d++) { - ratio[d] = parent._processors[d] / processors[d]; - rcoor[d] = parent._processor_coor[d] / processors[d]; - scoor[d] = parent._processor_coor[d] % processors[d]; - assert(ratio[d] * processors[d] == parent._processors[d]); // must exactly subdivide - Nsubcomm *= ratio[d]; - Nsubrank *= processors[d]; + childsize *= processors[d]; } + int Nchild = Nparent/childsize; + assert (childsize * Nchild == Nparent); - int rlex, slex; - Lexicographic::IndexFromCoor(rcoor,rlex,ratio); - Lexicographic::IndexFromCoor(scoor,slex,processors); + int prank; MPI_Comm_rank(parent.communicator,&prank); + int crank = prank % childsize; + int ccomm = prank / childsize; MPI_Comm comm_split; - if ( Nsubcomm > 1 ) { - int ierr= MPI_Comm_split(communicator_world, rlex, slex,&comm_split); + if ( Nchild > 1 ) { + + std::cout << GridLogMessage<<"Child communicator of "<< parent.communicator< &processors, ////////////////////////////////////////////////////////////////////////////////////////////////////// void CartesianCommunicator::InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base) { - if ( communicator_base != communicator_world ) { - std::cout << "Cartesian communicator created with a non-world communicator"<(); - // std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix<_gsites; + + // std::cout << "R sizeof(sobj)= " <_gsites< munge; - BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); ///////////////////////////////////////////// // Insist checksum is next record ///////////////////////////////////////////// - readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name); + readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM)); ///////////////////////////////////////////// // Verify checksums @@ -242,9 +252,14 @@ class GridLimeReader : public BinaryIO { // should this be a do while; can we miss a first record?? while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { + // std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" < xmlc(nbytes+1,'\0'); limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); XmlReader RD(&xmlc[0],""); @@ -302,14 +317,18 @@ class GridLimeWriter : public BinaryIO { write(WR,object_name,object); xmlstring = WR.XmlString(); } + // std::cout << "WriteLimeObject" << record_name <(record_name.c_str()), nbytes); + assert(h!= NULL); err=limeWriteRecordHeader(h, LimeW); assert(err>=0); err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); err=limeWriterCloseRecord(LimeW); assert(err>=0); limeDestroyHeader(h); + // std::cout << " File offset is now"<_gsites; createLimeRecordHeader(record_name, 0, 0, PayloadSize); + + // std::cout << "W sizeof(sobj)" <_gsites<(); BinarySimpleMunger munge; BinaryIO::writeLatticeObject(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); @@ -354,7 +379,7 @@ class GridLimeWriter : public BinaryIO { checksum.suma= streama.str(); checksum.sumb= streamb.str(); std::cout << GridLogMessage<<" writing scidac checksums "< + template void writeScidacFieldRecord(Lattice &field,userRecord _userRecord) { - typedef typename vobj::scalar_object sobj; - uint64_t nbytes; GridBase * grid = field._grid; //////////////////////////////////////// @@ -397,6 +420,66 @@ class ScidacWriter : public GridLimeWriter { } }; + +class ScidacReader : public GridLimeReader { + public: + + template + void readScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile) + { + scidacFile _scidacFile(grid); + readLimeObject(_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); + readLimeObject(_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); + } + //////////////////////////////////////////////// + // Write generic lattice field in scidac format + //////////////////////////////////////////////// + template + void readScidacFieldRecord(Lattice &field,userRecord &_userRecord) + { + typedef typename vobj::scalar_object sobj; + GridBase * grid = field._grid; + + //////////////////////////////////////// + // fill the Grid header + //////////////////////////////////////// + FieldMetaData header; + scidacRecord _scidacRecord; + scidacFile _scidacFile; + + ////////////////////////////////////////////// + // Fill the Lime file record by record + ////////////////////////////////////////////// + readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message + readLimeObject(_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); + readLimeObject(_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); + readLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); + } + void skipPastBinaryRecord(void) { + std::string rec_name(ILDG_BINARY_DATA); + while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { + if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) { + skipPastObjectRecord(std::string(SCIDAC_CHECKSUM)); + return; + } + } + } + void skipPastObjectRecord(std::string rec_name) { + while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { + if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) { + return; + } + } + } + void skipScidacFieldRecord() { + skipPastObjectRecord(std::string(GRID_FORMAT)); + skipPastObjectRecord(std::string(SCIDAC_RECORD_XML)); + skipPastObjectRecord(std::string(SCIDAC_PRIVATE_RECORD_XML)); + skipPastBinaryRecord(); + } +}; + + class IldgWriter : public ScidacWriter { public: @@ -425,8 +508,6 @@ class IldgWriter : public ScidacWriter { typedef iLorentzColourMatrix vobj; typedef typename vobj::scalar_object sobj; - uint64_t nbytes; - //////////////////////////////////////// // fill the Grid header //////////////////////////////////////// diff --git a/lib/parallelIO/IldgIOtypes.h b/lib/parallelIO/IldgIOtypes.h index c3a5321c..53664b49 100644 --- a/lib/parallelIO/IldgIOtypes.h +++ b/lib/parallelIO/IldgIOtypes.h @@ -64,6 +64,10 @@ namespace Grid { // file compatability, so should be correct to assume the undocumented but defacto file structure. ///////////////////////////////////////////////////////////////////////////////// +struct emptyUserRecord : Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(emptyUserRecord,int,dummy); +}; + //////////////////////// // Scidac private file xml // 1.1416 16 16 32 0 diff --git a/lib/parallelIO/MetaData.h b/lib/parallelIO/MetaData.h index 6d45d0a5..8fc0d777 100644 --- a/lib/parallelIO/MetaData.h +++ b/lib/parallelIO/MetaData.h @@ -104,6 +104,7 @@ namespace Grid { header.nd = nd; header.dimension.resize(nd); header.boundary.resize(nd); + header.data_start = 0; for(int d=0;d_fdimensions[d]; } diff --git a/lib/qcd/utils/SpaceTimeGrid.cc b/lib/qcd/utils/SpaceTimeGrid.cc index cbbe0aee..b2b5d9c8 100644 --- a/lib/qcd/utils/SpaceTimeGrid.cc +++ b/lib/qcd/utils/SpaceTimeGrid.cc @@ -60,7 +60,7 @@ GridCartesian *SpaceTimeGrid::makeFiveDimGrid(int Ls,const GridCartesian simd5.push_back(FourDimGrid->_simd_layout[d]); mpi5.push_back(FourDimGrid->_processors[d]); } - return new GridCartesian(latt5,simd5,mpi5); + return new GridCartesian(latt5,simd5,mpi5,*FourDimGrid); } @@ -68,15 +68,8 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimRedBlackGrid(int Ls,const GridC { int N4=FourDimGrid->_ndimension; int cbd=1; - // std::vector latt5(1,Ls); - // std::vector simd5(1,1); - // std::vector mpi5(1,1); std::vector cb5(1,0); - for(int d=0;d_fdimensions[d]); - // simd5.push_back(FourDimGrid->_simd_layout[d]); - // mpi5.push_back(FourDimGrid->_processors[d]); cb5.push_back( 1); } GridCartesian *tmp = makeFiveDimGrid(Ls,FourDimGrid); @@ -100,7 +93,7 @@ GridCartesian *SpaceTimeGrid::makeFiveDimDWFGrid(int Ls,const GridCartes simd5.push_back(1); mpi5.push_back(FourDimGrid->_processors[d]); } - return new GridCartesian(latt5,simd5,mpi5); + return new GridCartesian(latt5,simd5,mpi5,*FourDimGrid); } /////////////////////////////////////////////////// // Interface is inefficient and forces the deletion @@ -111,15 +104,7 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(int Ls,const Gr int N4=FourDimGrid->_ndimension; int cbd=1; std::vector cb5(1,0); - // int nsimd = FourDimGrid->Nsimd(); - // std::vector latt5(1,Ls); - // std::vector simd5(1,nsimd); - // std::vector mpi5(1,1); - for(int d=0;d_fdimensions[d]); - // simd5.push_back(1); - // mpi5.push_back(FourDimGrid->_processors[d]); cb5.push_back(1); } GridCartesian *tmp = makeFiveDimDWFGrid(Ls,FourDimGrid);