diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index e2af0545..154567fc 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -35,37 +35,27 @@ Author: paboyle #endif #include #include -// 64bit endian swap is a portability pain -#ifndef __has_builtin // Optional of course. -#define __has_builtin(x) 0 // Compatibility with non-clang compilers. -#endif -#if HAVE_DECL_BE64TOH -#undef Grid_ntohll -#define Grid_ntohll be64toh -#endif -#if HAVE_DECL_NTOHLL -#undef Grid_ntohll -#define Grid_ntohll ntohll -#endif - -#ifndef Grid_ntohll +inline uint32_t byte_reverse32(uint32_t f) { + f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; + return f; +} +inline uint64_t byte_reverse64(uint64_t f) { + uint64_t g; + g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; + g = g << 32; + f = f >> 32; + g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; + return g; +} #if BYTE_ORDER == BIG_ENDIAN - -#define Grid_ntohll(A) (A) - -#else - -#if __has_builtin(__builtin_bswap64) -#define Grid_ntohll(A) __builtin_bswap64(A) +inline uint64_t Grid_ntohll(uint64_t A) { return A; } #else -#error -#endif - -#endif - +inline uint64_t Grid_ntohll(uint64_t A) { + return byte_reverse64(A); +} #endif namespace Grid { @@ -195,7 +185,7 @@ class BinaryIO { std::vector site({x,y,z,t}); if (grid->IsBoss()) { - fin.read((char *)&file_object, sizeof(file_object)); + fin.read((char *)&file_object, sizeof(file_object));assert( fin.fail()==0); bytes += sizeof(file_object); if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object)); if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object)); @@ -211,11 +201,13 @@ class BinaryIO { std::cout<Broadcast(0,(void *)&csum,sizeof(csum)); return csum; } template - static inline uint32_t writeObjectSerial(Lattice &Umu,std::string file,munger munge,int offset,const std::string & format) + static inline uint32_t writeObjectSerial(Lattice &Umu,std::string file,munger munge,int offset, + const std::string & format) { typedef typename vobj::scalar_object sobj; @@ -231,7 +223,7 @@ class BinaryIO { ////////////////////////////////////////////////// std::cout<< GridLogMessage<< "Serial write I/O "<< file<IsBoss() ) { fout.open(file,std::ios::binary|std::ios::out|std::ios::in); @@ -255,23 +247,24 @@ class BinaryIO { if ( grid->IsBoss() ) { - if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); - if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); - if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); - if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); + if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); + if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); + if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); + if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); - // NB could gather an xstrip as an optimisation. - fout.write((char *)&file_object,sizeof(file_object)); - bytes+=sizeof(file_object); + // NB could gather an xstrip as an optimisation. + fout.write((char *)&file_object,sizeof(file_object));assert( fout.fail()==0); + bytes+=sizeof(file_object); } }}}} timer.Stop(); std::cout<Broadcast(0,(void *)&csum,sizeof(csum)); return csum; } - + static inline uint32_t writeRNGSerial(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file,int offset) { typedef typename GridSerialRNG::RngStateType RngStateType; @@ -305,23 +298,23 @@ class BinaryIO { int l_idx=parallel.generator_idx(o_idx,i_idx); if( rank == grid->ThisRank() ){ - // std::cout << "rank" << rank<<" Getting state for index "<Broadcast(rank,(void *)&saved[0],bytes); if ( grid->IsBoss() ) { - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - fout.write((char *)&saved[0],bytes); + Uint32Checksum((uint32_t *)&saved[0],bytes,csum); + fout.write((char *)&saved[0],bytes);assert( fout.fail()==0); } - + } if ( grid->IsBoss() ) { serial.GetState(saved,0); Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - fout.write((char *)&saved[0],bytes); + fout.write((char *)&saved[0],bytes);assert( fout.fail()==0); } grid->Broadcast(0,(void *)&csum,sizeof(csum)); return csum; @@ -355,20 +348,20 @@ class BinaryIO { int l_idx=parallel.generator_idx(o_idx,i_idx); if ( grid->IsBoss() ) { - fin.read((char *)&saved[0],bytes); - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); + fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); + Uint32Checksum((uint32_t *)&saved[0],bytes,csum); } grid->Broadcast(0,(void *)&saved[0],bytes); if( rank == grid->ThisRank() ){ - parallel.SetState(saved,l_idx); + parallel.SetState(saved,l_idx); } } if ( grid->IsBoss() ) { - fin.read((char *)&saved[0],bytes); + fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); serial.SetState(saved,0); Uint32Checksum((uint32_t *)&saved[0],bytes,csum); } @@ -380,7 +373,8 @@ class BinaryIO { template - static inline uint32_t readObjectParallel(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) + static inline uint32_t readObjectParallel(Lattice &Umu,std::string file,munger munge,int offset, + const std::string &format) { typedef typename vobj::scalar_object sobj; @@ -415,15 +409,15 @@ class BinaryIO { if ( d == 0 ) parallel[d] = 0; if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; - - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; + + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; } @@ -434,9 +428,9 @@ class BinaryIO { std::cout<< std::dec ; std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; } std::cout << std::endl; } @@ -472,8 +466,8 @@ class BinaryIO { Lexicographic::CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site + lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site } ///////////////////////// @@ -488,28 +482,28 @@ class BinaryIO { //////////////////////////////// if (myrank == iorank) { - fin.seekg(offset+g_idx*sizeof(fileObj)); - fin.read((char *)&fileObj,sizeof(fileObj)); - bytes+=sizeof(fileObj); + fin.seekg(offset+g_idx*sizeof(fileObj)); + fin.read((char *)&fileObj,sizeof(fileObj));assert( fin.fail()==0); + bytes+=sizeof(fileObj); - if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); - - munge(fileObj,siteObj,csum); - + if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); + + munge(fileObj,siteObj,csum); + } - + // Possibly do transport through pt2pt if ( rank != iorank ) { - if ( (myrank == rank) || (myrank==iorank) ) { - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); - } + if ( (myrank == rank) || (myrank==iorank) ) { + grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); + } } // Poke at destination if ( myrank == rank ) { - pokeLocalSite(siteObj,Umu,lsite); + pokeLocalSite(siteObj,Umu,lsite); } grid->Barrier(); // necessary? } @@ -520,7 +514,7 @@ class BinaryIO { timer.Stop(); std::cout< - static inline uint32_t writeObjectParallel(Lattice &Umu,std::string file,munger munge,int offset,const std::string & format) + static inline uint32_t writeObjectParallel(Lattice &Umu,std::string file,munger munge,int offset, + const std::string & format) { typedef typename vobj::scalar_object sobj; GridBase *grid = Umu._grid; @@ -558,15 +553,15 @@ class BinaryIO { if ( d!= grid->_ndimension-1 ) parallel[d] = 0; if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; @@ -577,13 +572,13 @@ class BinaryIO { grid->GlobalSum(tmp); std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; } std::cout << std::endl; } - + GridStopWatch timer; timer.Start(); uint64_t bytes=0; @@ -619,8 +614,8 @@ class BinaryIO { Lexicographic::CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site + lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site } @@ -640,36 +635,36 @@ class BinaryIO { // Pair of nodes may need to do pt2pt send if ( rank != iorank ) { // comms is necessary - if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it - // Send to IOrank - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); - } + if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it + // Send to IOrank + grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); + } } grid->Barrier(); // necessary? if (myrank == iorank) { - munge(siteObj,fileObj,csum); - - if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); - - fout.seekp(offset+g_idx*sizeof(fileObj)); - fout.write((char *)&fileObj,sizeof(fileObj)); - bytes+=sizeof(fileObj); + munge(siteObj,fileObj,csum); + + if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); + + fout.seekp(offset+g_idx*sizeof(fileObj)); + fout.write((char *)&fileObj,sizeof(fileObj));assert( fout.fail()==0); + bytes+=sizeof(fileObj); } } - + grid->GlobalSum(csum); grid->GlobalSum(bytes); - + timer.Stop(); std::cout<0) { std::string key=line.substr(0,eq); @@ -345,24 +347,24 @@ static inline void readConfiguration(Lattice > &Umu, // munger is a function of if ( header.data_type == std::string("4D_SU3_GAUGE") ) { if ( ieee32 || ieee32big ) { - // csum=BinaryIO::readObjectSerial, LorentzColour2x3F> - csum=BinaryIO::readObjectParallel, LorentzColour2x3F> + csum=BinaryIO::readObjectSerial, LorentzColour2x3F> + // csum=BinaryIO::readObjectParallel, LorentzColour2x3F> (Umu,file,Nersc3x2munger(), offset,format); } if ( ieee64 || ieee64big ) { - //csum=BinaryIO::readObjectSerial, LorentzColour2x3D> - csum=BinaryIO::readObjectParallel, LorentzColour2x3D> + csum=BinaryIO::readObjectSerial, LorentzColour2x3D> + // csum=BinaryIO::readObjectParallel, LorentzColour2x3D> (Umu,file,Nersc3x2munger(),offset,format); } } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { if ( ieee32 || ieee32big ) { - //csum=BinaryIO::readObjectSerial,LorentzColourMatrixF> - csum=BinaryIO::readObjectParallel,LorentzColourMatrixF> + csum=BinaryIO::readObjectSerial,LorentzColourMatrixF> + //csum=BinaryIO::readObjectParallel,LorentzColourMatrixF> (Umu,file,NerscSimpleMunger(),offset,format); } if ( ieee64 || ieee64big ) { - // csum=BinaryIO::readObjectSerial,LorentzColourMatrixD> - csum=BinaryIO::readObjectParallel,LorentzColourMatrixD> + csum=BinaryIO::readObjectSerial,LorentzColourMatrixD> + // csum=BinaryIO::readObjectParallel,LorentzColourMatrixD> (Umu,file,NerscSimpleMunger(),offset,format); } } else { @@ -371,12 +373,17 @@ static inline void readConfiguration(Lattice > &Umu, NerscStatistics(Umu,clone); + std::cout< @@ -416,19 +423,8 @@ static inline void writeConfiguration(Lattice > &Umu Nersc3x2unmunger munge; BinaryIO::Uint32Checksum(Umu, munge,header.checksum); offset = writeHeader(header,file); - csum=BinaryIO::writeObjectSerial(Umu,file,munge,offset,header.floating_point); - - std::string file1 = file+"para"; - int offset1 = writeHeader(header,file1); - int csum1=BinaryIO::writeObjectParallel(Umu,file1,munge,offset,header.floating_point); - //int csum1=BinaryIO::writeObjectSerial(Umu,file1,munge,offset,header.floating_point); - - - std::cout << GridLogMessage << " TESTING PARALLEL WRITE offsets " << offset1 << " "<< offset << std::endl; - std::cout << GridLogMessage << " TESTING PARALLEL WRITE csums " << csum1 << " "<(Umu,file,munge,offset,header.floating_point); + csum=BinaryIO::writeObjectParallel(Umu,file,munge,offset,header.floating_point); } else { header.floating_point = std::string("IEEE64BIG");