/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/parallelIO/BinaryIO.h Copyright (C) 2015 Author: Peter Boyle Author: paboyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef GRID_BINARY_IO_H #define GRID_BINARY_IO_H #ifdef HAVE_ENDIAN_H #include #endif #include #include // 64bit endian swap is a portability pain #ifndef __has_builtin // Optional of course. #define __has_builtin(x) 0 // Compatibility with non-clang compilers. #endif #if HAVE_DECL_BE64TOH #undef Grid_ntohll #define Grid_ntohll be64toh #endif #if HAVE_DECL_NTOHLL #undef Grid_ntohll #define Grid_ntohll ntohll #endif #ifndef Grid_ntohll #if BYTE_ORDER == BIG_ENDIAN #define Grid_ntohll(A) (A) #else #if __has_builtin(__builtin_bswap64) #define Grid_ntohll(A) __builtin_bswap64(A) #else #error #endif #endif #endif namespace Grid { // A little helper inline void removeWhitespace(std::string &key) { key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end()); } class BinaryIO { public: // Network is big endian static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);} static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);} static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);} static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);} static inline void be32toh_v(void *file_object,uint32_t bytes) { uint32_t * f = (uint32_t *)file_object; for(int i=0;i*sizeof(uint32_t)>8) | ((f&0xFF000000UL)>>24) ; fp[i] = ntohl(f); } } // BE is same as network static inline void be64toh_v(void *file_object,uint32_t bytes) { uint64_t * f = (uint64_t *)file_object; for(int i=0;i*sizeof(uint64_t)>8) | ((f&0xFF000000UL)>>24) ; g = g << 32; f = f >> 32; g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; fp[i] = Grid_ntohll(g); } } template static inline void Uint32Checksum(Lattice &lat,munger munge,uint32_t &csum) { typedef typename vobj::scalar_object sobj; GridBase *grid = lat._grid ; std::cout < lcoor; for(int l=0;llSites();l++){ grid->CoorFromIndex(lcoor,l,grid->_ldimensions); peekLocalSite(siteObj,lat,lcoor); munge(siteObj,fileObj,csum); } grid->GlobalSum(csum); } static inline void Uint32Checksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum) { for(int i=0;i*sizeof(uint32_t) static inline uint32_t readObjectSerial(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) { typedef typename vobj::scalar_object sobj; GridBase *grid = Umu._grid; std::cout<< GridLogMessage<< "Serial read I/O "<< file<< std::endl; int ieee32big = (format == std::string("IEEE32BIG")); int ieee32 = (format == std::string("IEEE32")); int ieee64big = (format == std::string("IEEE64BIG")); int ieee64 = (format == std::string("IEEE64")); // Find the location of each site and send to primary node // Take loop order from Chroma; defines loop order now that NERSC doc no longer // available (how short sighted is that?) std::ifstream fin(file,std::ios::binary|std::ios::in); fin.seekg(offset); Umu = zero; uint32_t csum=0; fobj file_object; sobj munged; for(int t=0;t_fdimensions[3];t++){ for(int z=0;z_fdimensions[2];z++){ for(int y=0;y_fdimensions[1];y++){ for(int x=0;x_fdimensions[0];x++){ std::vector site({x,y,z,t}); if ( grid->IsBoss() ) { fin.read((char *)&file_object,sizeof(file_object)); if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object)); if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object)); if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object)); if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object)); munge(file_object,munged,csum); } // The boss who read the file has their value poked pokeSite(munged,Umu,site); }}}} return csum; } template static inline uint32_t writeObjectSerial(Lattice &Umu,std::string file,munger munge,int offset,const std::string & format) { typedef typename vobj::scalar_object sobj; GridBase *grid = Umu._grid; int ieee32big = (format == std::string("IEEE32BIG")); int ieee32 = (format == std::string("IEEE32")); int ieee64big = (format == std::string("IEEE64BIG")); int ieee64 = (format == std::string("IEEE64")); ////////////////////////////////////////////////// // Serialise through node zero ////////////////////////////////////////////////// std::cout<< GridLogMessage<< "Serial write I/O "<< file<IsBoss() ) { fout.open(file,std::ios::binary|std::ios::out|std::ios::in); fout.seekp(offset); } uint32_t csum=0; fobj file_object; sobj unmunged; for(int t=0;t_fdimensions[3];t++){ for(int z=0;z_fdimensions[2];z++){ for(int y=0;y_fdimensions[1];y++){ for(int x=0;x_fdimensions[0];x++){ std::vector site({x,y,z,t}); // peek & write peekSite(unmunged,Umu,site); munge(unmunged,file_object,csum); if ( grid->IsBoss() ) { if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); fout.write((char *)&file_object,sizeof(file_object)); } }}}} return csum; } static inline uint32_t writeRNGSerial(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file,int offset) { typedef typename GridSerialRNG::RngStateType RngStateType; const int RngStateCount = GridSerialRNG::RngStateCount; GridBase *grid = parallel._grid; int gsites = grid->_gsites; ////////////////////////////////////////////////// // Serialise through node zero ////////////////////////////////////////////////// std::cout<< GridLogMessage<< "Serial RNG write I/O "<< file<IsBoss() ) { fout.open(file,std::ios::binary|std::ios::out|std::ios::in); fout.seekp(offset); } uint32_t csum=0; std::vector saved(RngStateCount); int bytes = sizeof(RngStateType)*saved.size(); std::vector gcoor; for(int gidx=0;gidxGlobalIndexToGlobalCoor(gidx,gcoor); grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); int l_idx=parallel.generator_idx(o_idx,i_idx); if( rank == grid->ThisRank() ){ // std::cout << "rank" << rank<<" Getting state for index "<Broadcast(rank,(void *)&saved[0],bytes); if ( grid->IsBoss() ) { Uint32Checksum((uint32_t *)&saved[0],bytes,csum); fout.write((char *)&saved[0],bytes); } } if ( grid->IsBoss() ) { serial.GetState(saved,0); Uint32Checksum((uint32_t *)&saved[0],bytes,csum); fout.write((char *)&saved[0],bytes); } return csum; } static inline uint32_t readRNGSerial(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file,int offset) { typedef typename GridSerialRNG::RngStateType RngStateType; const int RngStateCount = GridSerialRNG::RngStateCount; GridBase *grid = parallel._grid; int gsites = grid->_gsites; ////////////////////////////////////////////////// // Serialise through node zero ////////////////////////////////////////////////// std::cout<< GridLogMessage<< "Serial RNG read I/O "<< file< saved(RngStateCount); int bytes = sizeof(RngStateType)*saved.size(); std::vector gcoor; for(int gidx=0;gidxGlobalIndexToGlobalCoor(gidx,gcoor); grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); int l_idx=parallel.generator_idx(o_idx,i_idx); if ( grid->IsBoss() ) { fin.read((char *)&saved[0],bytes); Uint32Checksum((uint32_t *)&saved[0],bytes,csum); } grid->Broadcast(0,(void *)&saved[0],bytes); if( rank == grid->ThisRank() ){ parallel.SetState(saved,l_idx); } } if ( grid->IsBoss() ) { fin.read((char *)&saved[0],bytes); serial.SetState(saved,0); Uint32Checksum((uint32_t *)&saved[0],bytes,csum); } return csum; } template static inline uint32_t readObjectParallel(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) { typedef typename vobj::scalar_object sobj; GridBase *grid = Umu._grid; int ieee32big = (format == std::string("IEEE32BIG")); int ieee32 = (format == std::string("IEEE32")); int ieee64big = (format == std::string("IEEE64BIG")); int ieee64 = (format == std::string("IEEE64")); // Take into account block size of parallel file systems want about // 4-16MB chunks. // Ideally one reader/writer per xy plane and read these contiguously // with comms from nominated I/O nodes. std::ifstream fin; int nd = grid->_ndimension; std::vector parallel(nd,1); std::vector ioproc (nd); std::vector start(nd); std::vector range(nd); for(int d=0;dCheckerBoarded(d) == 0); } uint64_t slice_vol = 1; int IOnode = 1; for(int d=0;d_ndimension;d++) { if ( d==0 ) parallel[d] = 0; if (parallel[d]) { range[d] = grid->_ldimensions[d]; start[d] = grid->_processor_coor[d]*range[d]; ioproc[d]= grid->_processor_coor[d]; } else { range[d] = grid->_gdimensions[d]; start[d] = 0; ioproc[d]= 0; if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; } { uint32_t tmp = IOnode; grid->GlobalSum(tmp); std::cout<< std::dec ; std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <_ndimension;d++){ std::cout<< range[d]; if( d< grid->_ndimension-1 ) std::cout<< " x "; } std::cout << std::endl; } int myrank = grid->ThisRank(); int iorank = grid->RankFromProcessorCoor(ioproc); if ( IOnode ) { fin.open(file,std::ios::binary|std::ios::in); } ////////////////////////////////////////////////////////// // Find the location of each site and send to primary node // Take loop order from Chroma; defines loop order now that NERSC doc no longer // available (how short sighted is that?) ////////////////////////////////////////////////////////// Umu = zero; uint32_t csum=0; fobj fileObj; sobj siteObj; // need to implement these loops in Nd independent way with a lexico conversion for(int tlex=0;tlex tsite(nd); // temporary mixed up site std::vector gsite(nd); std::vector lsite(nd); std::vector iosite(nd); grid->CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site gsite[d] = tsite[d]+start[d]; // global site } ///////////////////////// // Get the rank of owner of data ///////////////////////// int rank, o_idx,i_idx, g_idx; grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); grid->GlobalCoorToGlobalIndex(gsite,g_idx); //////////////////////////////// // iorank reads from the seek //////////////////////////////// if (myrank == iorank) { fin.seekg(offset+g_idx*sizeof(fileObj)); fin.read((char *)&fileObj,sizeof(fileObj)); if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); munge(fileObj,siteObj,csum); if ( rank != myrank ) { grid->SendTo((void *)&siteObj,rank,sizeof(siteObj)); } else { pokeLocalSite(siteObj,Umu,lsite); } } else { if ( myrank == rank ) { grid->RecvFrom((void *)&siteObj,iorank,sizeof(siteObj)); pokeLocalSite(siteObj,Umu,lsite); } } grid->Barrier(); // necessary? } grid->GlobalSum(csum); return csum; } ////////////////////////////////////////////////////////// // Parallel writer ////////////////////////////////////////////////////////// template static inline uint32_t writeObjectParallel(Lattice &Umu,std::string file,munger munge,int offset,const std::string & format) { typedef typename vobj::scalar_object sobj; GridBase *grid = Umu._grid; int ieee32big = (format == std::string("IEEE32BIG")); int ieee32 = (format == std::string("IEEE32")); int ieee64big = (format == std::string("IEEE64BIG")); int ieee64 = (format == std::string("IEEE64")); int nd = grid->_ndimension; for(int d=0;dCheckerBoarded(d) == 0); } std::vector parallel(nd,1); std::vector ioproc (nd); std::vector start(nd); std::vector range(nd); uint64_t slice_vol = 1; int IOnode = 1; for(int d=0;d_ndimension;d++) { if ( d==0 ) parallel[d] = 0; if (parallel[d]) { range[d] = grid->_ldimensions[d]; start[d] = grid->_processor_coor[d]*range[d]; ioproc[d]= grid->_processor_coor[d]; } else { range[d] = grid->_gdimensions[d]; start[d] = 0; ioproc[d]= 0; if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; } { uint32_t tmp = IOnode; grid->GlobalSum(tmp); std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <_ndimension;d++){ std::cout<< range[d]; if( d< grid->_ndimension-1 ) std::cout<< " x "; } std::cout << std::endl; } int myrank = grid->ThisRank(); int iorank = grid->RankFromProcessorCoor(ioproc); // Take into account block size of parallel file systems want about // 4-16MB chunks. // Ideally one reader/writer per xy plane and read these contiguously // with comms from nominated I/O nodes. std::ofstream fout; if ( IOnode ) fout.open(file,std::ios::binary|std::ios::in|std::ios::out); ////////////////////////////////////////////////////////// // Find the location of each site and send to primary node // Take loop order from Chroma; defines loop order now that NERSC doc no longer // available (how short sighted is that?) ////////////////////////////////////////////////////////// uint32_t csum=0; fobj fileObj; sobj siteObj; // need to implement these loops in Nd independent way with a lexico conversion for(int tlex=0;tlex tsite(nd); // temporary mixed up site std::vector gsite(nd); std::vector lsite(nd); std::vector iosite(nd); grid->CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site gsite[d] = tsite[d]+start[d]; // global site } ///////////////////////// // Get the rank of owner of data ///////////////////////// int rank, o_idx,i_idx, g_idx; grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); grid->GlobalCoorToGlobalIndex(gsite,g_idx); //////////////////////////////// // iorank writes from the seek //////////////////////////////// if (myrank == iorank) { if ( rank != myrank ) { grid->RecvFrom((void *)&siteObj,rank,sizeof(siteObj)); } else { peekLocalSite(siteObj,Umu,lsite); } munge(siteObj,fileObj,csum); if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); fout.seekp(offset+g_idx*sizeof(fileObj)); fout.write((char *)&fileObj,sizeof(fileObj)); } else { if ( myrank == rank ) { peekLocalSite(siteObj,Umu,lsite); grid->SendTo((void *)&siteObj,iorank,sizeof(siteObj)); } } grid->Barrier(); // necessary// or every 16 packets to rate throttle?? } grid->GlobalSum(csum); return csum; } }; } #endif