1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

Able to run a test job splitting into multiple MPI subdomains.

This commit is contained in:
paboyle 2017-06-22 18:53:11 +01:00
parent 5e4bea8f20
commit e504260f3d
8 changed files with 138 additions and 64 deletions

View File

@ -52,8 +52,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
Field &psi) {
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);

View File

@ -211,9 +211,6 @@ public:
assert(lidx<lSites());
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
}
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
gidx=0;
int mult=1;

View File

@ -67,7 +67,7 @@ public:
GridCartesian(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,
GridCartesian &parent) : GridBase(processor_grid,parent)
const GridCartesian &parent) : GridBase(processor_grid,parent)
{
Init(dimensions,simd_layout,processor_grid);
}

View File

@ -56,46 +56,52 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
InitFromMPICommunicator(processors,communicator_world);
std::cout << "Passed communicator world to a new communicator" <<std::endl;
// std::cout << "Passed communicator world to a new communicator" <<communicator<<std::endl;
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
{
_ndimension = processors.size();
assert(_ndimension = parent._ndimension);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// split the communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::vector<int> ratio(_ndimension);
std::vector<int> rcoor(_ndimension);
std::vector<int> scoor(_ndimension);
int Nparent;
MPI_Comm_size(parent.communicator,&Nparent);
int Nsubcomm=1;
int Nsubrank=1;
int childsize=1;
for(int d=0;d<_ndimension;d++) {
ratio[d] = parent._processors[d] / processors[d];
rcoor[d] = parent._processor_coor[d] / processors[d];
scoor[d] = parent._processor_coor[d] % processors[d];
assert(ratio[d] * processors[d] == parent._processors[d]); // must exactly subdivide
Nsubcomm *= ratio[d];
Nsubrank *= processors[d];
childsize *= processors[d];
}
int Nchild = Nparent/childsize;
assert (childsize * Nchild == Nparent);
int rlex, slex;
Lexicographic::IndexFromCoor(rcoor,rlex,ratio);
Lexicographic::IndexFromCoor(scoor,slex,processors);
int prank; MPI_Comm_rank(parent.communicator,&prank);
int crank = prank % childsize;
int ccomm = prank / childsize;
MPI_Comm comm_split;
if ( Nsubcomm > 1 ) {
int ierr= MPI_Comm_split(communicator_world, rlex, slex,&comm_split);
if ( Nchild > 1 ) {
std::cout << GridLogMessage<<"Child communicator of "<< parent.communicator<<std::endl;
std::cout << GridLogMessage<<" parent grid ";
for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" child grid ";
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
std::cout<<std::endl;
int ierr= MPI_Comm_split(parent.communicator, ccomm,crank,&comm_split);
assert(ierr==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Declare victory
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << "Divided communicator "<< parent._Nprocessors<<" into "
<<Nsubcomm <<" communicators with " << Nsubrank << " ranks"<<std::endl;
std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
<<Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
} else {
comm_split = communicator_world;
comm_split=parent.communicator;
// std::cout << "Passed parental communicator to a new communicator" <<std::endl;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -110,9 +116,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
//////////////////////////////////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
{
if ( communicator_base != communicator_world ) {
std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl;
}
// if ( communicator_base != communicator_world ) {
// std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl;
// }
_ndimension = processors.size();
_processor_coor.resize(_ndimension);

View File

@ -84,10 +84,6 @@ namespace QCD {
stream << "GRID_";
stream << ScidacWordMnemonic<stype>();
// std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix<<std::endl;
// std::cout << " Spin N/S/V/M : " << _SpinN <<" "<<_SpinScalar <<"/"<<_SpinVector <<"/"<<_SpinMatrix<<std::endl;
// std::cout << " Colour N/S/V/M : " << _ColourN <<" "<<_ColourScalar <<"/"<<_ColourVector <<"/"<<_ColourMatrix<<std::endl;
if ( _LorentzVector ) stream << "_LorentzVector"<<_LorentzN;
if ( _LorentzMatrix ) stream << "_LorentzMatrix"<<_LorentzN;
@ -210,19 +206,33 @@ class GridLimeReader : public BinaryIO {
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
std::cout << GridLogMessage << limeReaderType(LimeR) <<std::endl;
if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
uint64_t file_bytes =limeReaderBytes(LimeR);
// std::cout << GridLogMessage << limeReaderType(LimeR) << " "<< file_bytes <<" bytes "<<std::endl;
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
// std::cout << GridLogMessage<< " readLimeLatticeBinaryObject matches ! " <<std::endl;
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
// std::cout << "R sizeof(sobj)= " <<sizeof(sobj)<<std::endl;
// std::cout << "R Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "R Payload expected " <<PayloadSize<<std::endl;
// std::cout << "R file size " <<file_bytes <<std::endl;
assert(PayloadSize == file_bytes);// Must match or user error
off_t offset= ftell(File);
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
/////////////////////////////////////////////
// Insist checksum is next record
/////////////////////////////////////////////
readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name);
readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
/////////////////////////////////////////////
// Verify checksums
@ -242,9 +252,14 @@ class GridLimeReader : public BinaryIO {
// should this be a do while; can we miss a first record??
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
// std::cout << GridLogMessage<< " readLimeObject matches ! " <<std::endl;
std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
XmlReader RD(&xmlc[0],"");
@ -302,14 +317,18 @@ class GridLimeWriter : public BinaryIO {
write(WR,object_name,object);
xmlstring = WR.XmlString();
}
// std::cout << "WriteLimeObject" << record_name <<std::endl;
uint64_t nbytes = xmlstring.size();
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
int err;
LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); assert(h!= NULL);
LimeRecordHeader *h = limeCreateHeader(MB, ME,const_cast<char *>(record_name.c_str()), nbytes);
assert(h!= NULL);
err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
limeDestroyHeader(h);
// std::cout << " File offset is now"<<ftell(File) << std::endl;
}
////////////////////////////////////////////
// Write a generic lattice field and csum
@ -326,6 +345,11 @@ class GridLimeWriter : public BinaryIO {
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
////////////////////////////////////////////////////////////////////
// NB: FILE and iostream are jointly writing disjoint sequences in the
// the same file through different file handles (integer units).
@ -340,6 +364,7 @@ class GridLimeWriter : public BinaryIO {
// v) Continue writing scidac record.
////////////////////////////////////////////////////////////////////
off_t offset = ftell(File);
// std::cout << " Writing to offset "<<offset << std::endl;
std::string format = getFormatString<vobj>();
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
@ -354,7 +379,7 @@ class GridLimeWriter : public BinaryIO {
checksum.suma= streama.str();
checksum.sumb= streamb.str();
std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
writeLimeObject(0,1,checksum,std::string("scidacChecksum" ),std::string(SCIDAC_CHECKSUM));
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
}
};
@ -371,11 +396,9 @@ class ScidacWriter : public GridLimeWriter {
////////////////////////////////////////////////
// Write generic lattice field in scidac format
////////////////////////////////////////////////
template <class vobj, class userRecord>
template <class vobj, class userRecord>
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)
{
typedef typename vobj::scalar_object sobj;
uint64_t nbytes;
GridBase * grid = field._grid;
////////////////////////////////////////
@ -397,6 +420,66 @@ class ScidacWriter : public GridLimeWriter {
}
};
class ScidacReader : public GridLimeReader {
public:
template<class SerialisableUserFile>
void readScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
{
scidacFile _scidacFile(grid);
readLimeObject(_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
readLimeObject(_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
}
////////////////////////////////////////////////
// Write generic lattice field in scidac format
////////////////////////////////////////////////
template <class vobj, class userRecord>
void readScidacFieldRecord(Lattice<vobj> &field,userRecord &_userRecord)
{
typedef typename vobj::scalar_object sobj;
GridBase * grid = field._grid;
////////////////////////////////////////
// fill the Grid header
////////////////////////////////////////
FieldMetaData header;
scidacRecord _scidacRecord;
scidacFile _scidacFile;
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
readLimeObject(_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
readLimeObject(_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
readLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));
}
void skipPastBinaryRecord(void) {
std::string rec_name(ILDG_BINARY_DATA);
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
skipPastObjectRecord(std::string(SCIDAC_CHECKSUM));
return;
}
}
}
void skipPastObjectRecord(std::string rec_name) {
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
return;
}
}
}
void skipScidacFieldRecord() {
skipPastObjectRecord(std::string(GRID_FORMAT));
skipPastObjectRecord(std::string(SCIDAC_RECORD_XML));
skipPastObjectRecord(std::string(SCIDAC_PRIVATE_RECORD_XML));
skipPastBinaryRecord();
}
};
class IldgWriter : public ScidacWriter {
public:
@ -425,8 +508,6 @@ class IldgWriter : public ScidacWriter {
typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj;
uint64_t nbytes;
////////////////////////////////////////
// fill the Grid header
////////////////////////////////////////

View File

@ -64,6 +64,10 @@ namespace Grid {
// file compatability, so should be correct to assume the undocumented but defacto file structure.
/////////////////////////////////////////////////////////////////////////////////
struct emptyUserRecord : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(emptyUserRecord,int,dummy);
};
////////////////////////
// Scidac private file xml
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>

View File

@ -104,6 +104,7 @@ namespace Grid {
header.nd = nd;
header.dimension.resize(nd);
header.boundary.resize(nd);
header.data_start = 0;
for(int d=0;d<nd;d++) {
header.dimension[d] = grid->_fdimensions[d];
}

View File

@ -60,7 +60,7 @@ GridCartesian *SpaceTimeGrid::makeFiveDimGrid(int Ls,const GridCartesian
simd5.push_back(FourDimGrid->_simd_layout[d]);
mpi5.push_back(FourDimGrid->_processors[d]);
}
return new GridCartesian(latt5,simd5,mpi5);
return new GridCartesian(latt5,simd5,mpi5,*FourDimGrid);
}
@ -68,15 +68,8 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimRedBlackGrid(int Ls,const GridC
{
int N4=FourDimGrid->_ndimension;
int cbd=1;
// std::vector<int> latt5(1,Ls);
// std::vector<int> simd5(1,1);
// std::vector<int> mpi5(1,1);
std::vector<int> cb5(1,0);
for(int d=0;d<N4;d++){
// latt5.push_back(FourDimGrid->_fdimensions[d]);
// simd5.push_back(FourDimGrid->_simd_layout[d]);
// mpi5.push_back(FourDimGrid->_processors[d]);
cb5.push_back( 1);
}
GridCartesian *tmp = makeFiveDimGrid(Ls,FourDimGrid);
@ -100,7 +93,7 @@ GridCartesian *SpaceTimeGrid::makeFiveDimDWFGrid(int Ls,const GridCartes
simd5.push_back(1);
mpi5.push_back(FourDimGrid->_processors[d]);
}
return new GridCartesian(latt5,simd5,mpi5);
return new GridCartesian(latt5,simd5,mpi5,*FourDimGrid);
}
///////////////////////////////////////////////////
// Interface is inefficient and forces the deletion
@ -111,15 +104,7 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(int Ls,const Gr
int N4=FourDimGrid->_ndimension;
int cbd=1;
std::vector<int> cb5(1,0);
// int nsimd = FourDimGrid->Nsimd();
// std::vector<int> latt5(1,Ls);
// std::vector<int> simd5(1,nsimd);
// std::vector<int> mpi5(1,1);
for(int d=0;d<N4;d++){
// latt5.push_back(FourDimGrid->_fdimensions[d]);
// simd5.push_back(1);
// mpi5.push_back(FourDimGrid->_processors[d]);
cb5.push_back(1);
}
GridCartesian *tmp = makeFiveDimDWFGrid(Ls,FourDimGrid);