1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Able to run a test job splitting into multiple MPI subdomains.

This commit is contained in:
paboyle 2017-06-22 18:53:11 +01:00
parent 5e4bea8f20
commit e504260f3d
8 changed files with 138 additions and 64 deletions

View File

@ -52,8 +52,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
MaxIterations(maxit), MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){}; ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
Field &psi) {
psi.checkerboard = src.checkerboard; psi.checkerboard = src.checkerboard;
conformable(psi, src); conformable(psi, src);

View File

@ -211,9 +211,6 @@ public:
assert(lidx<lSites()); assert(lidx<lSites());
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
} }
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){ void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
gidx=0; gidx=0;
int mult=1; int mult=1;

View File

@ -67,7 +67,7 @@ public:
GridCartesian(const std::vector<int> &dimensions, GridCartesian(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout, const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid, const std::vector<int> &processor_grid,
GridCartesian &parent) : GridBase(processor_grid,parent) const GridCartesian &parent) : GridBase(processor_grid,parent)
{ {
Init(dimensions,simd_layout,processor_grid); Init(dimensions,simd_layout,processor_grid);
} }

View File

@ -56,46 +56,52 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{ {
InitFromMPICommunicator(processors,communicator_world); InitFromMPICommunicator(processors,communicator_world);
std::cout << "Passed communicator world to a new communicator" <<std::endl; // std::cout << "Passed communicator world to a new communicator" <<communicator<<std::endl;
} }
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent) CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
{ {
_ndimension = processors.size(); _ndimension = processors.size();
assert(_ndimension = parent._ndimension); assert(_ndimension = parent._ndimension);
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
// split the communicator // split the communicator
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
std::vector<int> ratio(_ndimension); int Nparent;
std::vector<int> rcoor(_ndimension); MPI_Comm_size(parent.communicator,&Nparent);
std::vector<int> scoor(_ndimension);
int Nsubcomm=1; int childsize=1;
int Nsubrank=1;
for(int d=0;d<_ndimension;d++) { for(int d=0;d<_ndimension;d++) {
ratio[d] = parent._processors[d] / processors[d]; childsize *= processors[d];
rcoor[d] = parent._processor_coor[d] / processors[d];
scoor[d] = parent._processor_coor[d] % processors[d];
assert(ratio[d] * processors[d] == parent._processors[d]); // must exactly subdivide
Nsubcomm *= ratio[d];
Nsubrank *= processors[d];
} }
int Nchild = Nparent/childsize;
assert (childsize * Nchild == Nparent);
int rlex, slex; int prank; MPI_Comm_rank(parent.communicator,&prank);
Lexicographic::IndexFromCoor(rcoor,rlex,ratio); int crank = prank % childsize;
Lexicographic::IndexFromCoor(scoor,slex,processors); int ccomm = prank / childsize;
MPI_Comm comm_split; MPI_Comm comm_split;
if ( Nsubcomm > 1 ) { if ( Nchild > 1 ) {
int ierr= MPI_Comm_split(communicator_world, rlex, slex,&comm_split);
std::cout << GridLogMessage<<"Child communicator of "<< parent.communicator<<std::endl;
std::cout << GridLogMessage<<" parent grid ";
for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" child grid ";
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
std::cout<<std::endl;
int ierr= MPI_Comm_split(parent.communicator, ccomm,crank,&comm_split);
assert(ierr==0); assert(ierr==0);
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
// Declare victory // Declare victory
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << "Divided communicator "<< parent._Nprocessors<<" into " std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
<<Nsubcomm <<" communicators with " << Nsubrank << " ranks"<<std::endl; <<Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
} else { } else {
comm_split = communicator_world; comm_split=parent.communicator;
// std::cout << "Passed parental communicator to a new communicator" <<std::endl;
} }
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
@ -110,9 +116,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base) void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
{ {
if ( communicator_base != communicator_world ) { // if ( communicator_base != communicator_world ) {
std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl; // std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl;
} // }
_ndimension = processors.size(); _ndimension = processors.size();
_processor_coor.resize(_ndimension); _processor_coor.resize(_ndimension);

View File

@ -84,10 +84,6 @@ namespace QCD {
stream << "GRID_"; stream << "GRID_";
stream << ScidacWordMnemonic<stype>(); stream << ScidacWordMnemonic<stype>();
// std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix<<std::endl;
// std::cout << " Spin N/S/V/M : " << _SpinN <<" "<<_SpinScalar <<"/"<<_SpinVector <<"/"<<_SpinMatrix<<std::endl;
// std::cout << " Colour N/S/V/M : " << _ColourN <<" "<<_ColourScalar <<"/"<<_ColourVector <<"/"<<_ColourMatrix<<std::endl;
if ( _LorentzVector ) stream << "_LorentzVector"<<_LorentzN; if ( _LorentzVector ) stream << "_LorentzVector"<<_LorentzN;
if ( _LorentzMatrix ) stream << "_LorentzMatrix"<<_LorentzN; if ( _LorentzMatrix ) stream << "_LorentzMatrix"<<_LorentzN;
@ -210,19 +206,33 @@ class GridLimeReader : public BinaryIO {
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
std::cout << GridLogMessage << limeReaderType(LimeR) <<std::endl; uint64_t file_bytes =limeReaderBytes(LimeR);
if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
// std::cout << GridLogMessage << limeReaderType(LimeR) << " "<< file_bytes <<" bytes "<<std::endl;
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
// std::cout << GridLogMessage<< " readLimeLatticeBinaryObject matches ! " <<std::endl;
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
// std::cout << "R sizeof(sobj)= " <<sizeof(sobj)<<std::endl;
// std::cout << "R Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "R Payload expected " <<PayloadSize<<std::endl;
// std::cout << "R file size " <<file_bytes <<std::endl;
assert(PayloadSize == file_bytes);// Must match or user error
off_t offset= ftell(File); off_t offset= ftell(File);
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
BinarySimpleMunger<sobj,sobj> munge; BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
///////////////////////////////////////////// /////////////////////////////////////////////
// Insist checksum is next record // Insist checksum is next record
///////////////////////////////////////////// /////////////////////////////////////////////
readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name); readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
///////////////////////////////////////////// /////////////////////////////////////////////
// Verify checksums // Verify checksums
@ -242,9 +252,14 @@ class GridLimeReader : public BinaryIO {
// should this be a do while; can we miss a first record?? // should this be a do while; can we miss a first record??
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) { if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
// std::cout << GridLogMessage<< " readLimeObject matches ! " <<std::endl;
std::vector<char> xmlc(nbytes+1,'\0'); std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
XmlReader RD(&xmlc[0],""); XmlReader RD(&xmlc[0],"");
@ -302,14 +317,18 @@ class GridLimeWriter : public BinaryIO {
write(WR,object_name,object); write(WR,object_name,object);
xmlstring = WR.XmlString(); xmlstring = WR.XmlString();
} }
// std::cout << "WriteLimeObject" << record_name <<std::endl;
uint64_t nbytes = xmlstring.size(); uint64_t nbytes = xmlstring.size();
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
int err; int err;
LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); assert(h!= NULL); LimeRecordHeader *h = limeCreateHeader(MB, ME,const_cast<char *>(record_name.c_str()), nbytes);
assert(h!= NULL);
err=limeWriteRecordHeader(h, LimeW); assert(err>=0); err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0); err=limeWriterCloseRecord(LimeW); assert(err>=0);
limeDestroyHeader(h); limeDestroyHeader(h);
// std::cout << " File offset is now"<<ftell(File) << std::endl;
} }
//////////////////////////////////////////// ////////////////////////////////////////////
// Write a generic lattice field and csum // Write a generic lattice field and csum
@ -326,6 +345,11 @@ class GridLimeWriter : public BinaryIO {
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites; uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
createLimeRecordHeader(record_name, 0, 0, PayloadSize); createLimeRecordHeader(record_name, 0, 0, PayloadSize);
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// NB: FILE and iostream are jointly writing disjoint sequences in the // NB: FILE and iostream are jointly writing disjoint sequences in the
// the same file through different file handles (integer units). // the same file through different file handles (integer units).
@ -340,6 +364,7 @@ class GridLimeWriter : public BinaryIO {
// v) Continue writing scidac record. // v) Continue writing scidac record.
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
off_t offset = ftell(File); off_t offset = ftell(File);
// std::cout << " Writing to offset "<<offset << std::endl;
std::string format = getFormatString<vobj>(); std::string format = getFormatString<vobj>();
BinarySimpleMunger<sobj,sobj> munge; BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
@ -354,7 +379,7 @@ class GridLimeWriter : public BinaryIO {
checksum.suma= streama.str(); checksum.suma= streama.str();
checksum.sumb= streamb.str(); checksum.sumb= streamb.str();
std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl; std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
writeLimeObject(0,1,checksum,std::string("scidacChecksum" ),std::string(SCIDAC_CHECKSUM)); writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
} }
}; };
@ -371,11 +396,9 @@ class ScidacWriter : public GridLimeWriter {
//////////////////////////////////////////////// ////////////////////////////////////////////////
// Write generic lattice field in scidac format // Write generic lattice field in scidac format
//////////////////////////////////////////////// ////////////////////////////////////////////////
template <class vobj, class userRecord> template <class vobj, class userRecord>
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord) void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)
{ {
typedef typename vobj::scalar_object sobj;
uint64_t nbytes;
GridBase * grid = field._grid; GridBase * grid = field._grid;
//////////////////////////////////////// ////////////////////////////////////////
@ -397,6 +420,66 @@ class ScidacWriter : public GridLimeWriter {
} }
}; };
class ScidacReader : public GridLimeReader {
public:
template<class SerialisableUserFile>
void readScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
{
scidacFile _scidacFile(grid);
readLimeObject(_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
readLimeObject(_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
}
////////////////////////////////////////////////
// Write generic lattice field in scidac format
////////////////////////////////////////////////
template <class vobj, class userRecord>
void readScidacFieldRecord(Lattice<vobj> &field,userRecord &_userRecord)
{
typedef typename vobj::scalar_object sobj;
GridBase * grid = field._grid;
////////////////////////////////////////
// fill the Grid header
////////////////////////////////////////
FieldMetaData header;
scidacRecord _scidacRecord;
scidacFile _scidacFile;
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
readLimeObject(_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
readLimeObject(_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
readLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));
}
void skipPastBinaryRecord(void) {
std::string rec_name(ILDG_BINARY_DATA);
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
skipPastObjectRecord(std::string(SCIDAC_CHECKSUM));
return;
}
}
}
void skipPastObjectRecord(std::string rec_name) {
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
return;
}
}
}
void skipScidacFieldRecord() {
skipPastObjectRecord(std::string(GRID_FORMAT));
skipPastObjectRecord(std::string(SCIDAC_RECORD_XML));
skipPastObjectRecord(std::string(SCIDAC_PRIVATE_RECORD_XML));
skipPastBinaryRecord();
}
};
class IldgWriter : public ScidacWriter { class IldgWriter : public ScidacWriter {
public: public:
@ -425,8 +508,6 @@ class IldgWriter : public ScidacWriter {
typedef iLorentzColourMatrix<vsimd> vobj; typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
uint64_t nbytes;
//////////////////////////////////////// ////////////////////////////////////////
// fill the Grid header // fill the Grid header
//////////////////////////////////////// ////////////////////////////////////////

View File

@ -64,6 +64,10 @@ namespace Grid {
// file compatability, so should be correct to assume the undocumented but defacto file structure. // file compatability, so should be correct to assume the undocumented but defacto file structure.
///////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////
struct emptyUserRecord : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(emptyUserRecord,int,dummy);
};
//////////////////////// ////////////////////////
// Scidac private file xml // Scidac private file xml
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile> // <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>

View File

@ -104,6 +104,7 @@ namespace Grid {
header.nd = nd; header.nd = nd;
header.dimension.resize(nd); header.dimension.resize(nd);
header.boundary.resize(nd); header.boundary.resize(nd);
header.data_start = 0;
for(int d=0;d<nd;d++) { for(int d=0;d<nd;d++) {
header.dimension[d] = grid->_fdimensions[d]; header.dimension[d] = grid->_fdimensions[d];
} }

View File

@ -60,7 +60,7 @@ GridCartesian *SpaceTimeGrid::makeFiveDimGrid(int Ls,const GridCartesian
simd5.push_back(FourDimGrid->_simd_layout[d]); simd5.push_back(FourDimGrid->_simd_layout[d]);
mpi5.push_back(FourDimGrid->_processors[d]); mpi5.push_back(FourDimGrid->_processors[d]);
} }
return new GridCartesian(latt5,simd5,mpi5); return new GridCartesian(latt5,simd5,mpi5,*FourDimGrid);
} }
@ -68,15 +68,8 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimRedBlackGrid(int Ls,const GridC
{ {
int N4=FourDimGrid->_ndimension; int N4=FourDimGrid->_ndimension;
int cbd=1; int cbd=1;
// std::vector<int> latt5(1,Ls);
// std::vector<int> simd5(1,1);
// std::vector<int> mpi5(1,1);
std::vector<int> cb5(1,0); std::vector<int> cb5(1,0);
for(int d=0;d<N4;d++){ for(int d=0;d<N4;d++){
// latt5.push_back(FourDimGrid->_fdimensions[d]);
// simd5.push_back(FourDimGrid->_simd_layout[d]);
// mpi5.push_back(FourDimGrid->_processors[d]);
cb5.push_back( 1); cb5.push_back( 1);
} }
GridCartesian *tmp = makeFiveDimGrid(Ls,FourDimGrid); GridCartesian *tmp = makeFiveDimGrid(Ls,FourDimGrid);
@ -100,7 +93,7 @@ GridCartesian *SpaceTimeGrid::makeFiveDimDWFGrid(int Ls,const GridCartes
simd5.push_back(1); simd5.push_back(1);
mpi5.push_back(FourDimGrid->_processors[d]); mpi5.push_back(FourDimGrid->_processors[d]);
} }
return new GridCartesian(latt5,simd5,mpi5); return new GridCartesian(latt5,simd5,mpi5,*FourDimGrid);
} }
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
// Interface is inefficient and forces the deletion // Interface is inefficient and forces the deletion
@ -111,15 +104,7 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(int Ls,const Gr
int N4=FourDimGrid->_ndimension; int N4=FourDimGrid->_ndimension;
int cbd=1; int cbd=1;
std::vector<int> cb5(1,0); std::vector<int> cb5(1,0);
// int nsimd = FourDimGrid->Nsimd();
// std::vector<int> latt5(1,Ls);
// std::vector<int> simd5(1,nsimd);
// std::vector<int> mpi5(1,1);
for(int d=0;d<N4;d++){ for(int d=0;d<N4;d++){
// latt5.push_back(FourDimGrid->_fdimensions[d]);
// simd5.push_back(1);
// mpi5.push_back(FourDimGrid->_processors[d]);
cb5.push_back(1); cb5.push_back(1);
} }
GridCartesian *tmp = makeFiveDimDWFGrid(Ls,FourDimGrid); GridCartesian *tmp = makeFiveDimDWFGrid(Ls,FourDimGrid);