diff --git a/TODO b/TODO index 698ff4ef..97728f95 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,7 @@ TODO: => Clean up HMC +- Reunitarise - Link smearing/boundary conds; Policy class based implementation * Support different boundary conditions (finite temp, chem. potential ... ) * Support different fermion representations? diff --git a/lib/Grid.h b/lib/Grid.h index 052916f2..7ce85e77 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -45,6 +45,7 @@ #include #include #include +#include #include #include diff --git a/lib/Init.cc b/lib/Init.cc index 2eeee6b4..6457b4b2 100644 --- a/lib/Init.cc +++ b/lib/Init.cc @@ -177,7 +177,7 @@ void Grid_init(int *argc,char ***argv) Grid_quiesce_nodes(); } if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){ - WilsonFermionStatic::HandOptDslash=1; + QCD::WilsonFermionStatic::HandOptDslash=1; } if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){ LebesgueOrder::UseLebesgueOrder=1; diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 61e19993..2ebee872 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -87,6 +87,14 @@ class CartesianCommunicator { void *recv, int recv_from_rank, int bytes); + + void RecvFrom(void *recv, + int recv_from_rank, + int bytes); + void SendTo(void *xmit, + int xmit_to_rank, + int bytes); + void SendToRecvFromBegin(std::vector &list, void *xmit, int xmit_to_rank, diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 5dd34705..764ab669 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -81,13 +81,30 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit, SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); SendToRecvFromComplete(reqs); } +void CartesianCommunicator::RecvFrom(void *recv, + int from, + int bytes) +{ + MPI_Status stat; + int ierr=MPI_Recv(recv, bytes, MPI_CHAR,from,from,communicator,&stat); + assert(ierr==0); +} +void CartesianCommunicator::SendTo(void *xmit, + int dest, + int bytes) +{ + int rank = _processor; // used for tag; must know who it comes from + int ierr = MPI_Send(xmit, bytes, MPI_CHAR,dest,_processor,communicator); + assert(ierr==0); +} + // Basic Halo comms primitive - void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) +void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, + void *xmit, + int dest, + void *recv, + int from, + int bytes) { MPI_Request xrq; MPI_Request rrq; @@ -100,7 +117,6 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit, list.push_back(xrq); list.push_back(rrq); - } void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) { diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index d7eb9453..bf0e2dd6 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -22,6 +22,20 @@ void CartesianCommunicator::GlobalSum(double &){} void CartesianCommunicator::GlobalSum(uint32_t &){} void CartesianCommunicator::GlobalSumVector(double *,int N){} +void CartesianCommunicator::RecvFrom(void *recv, + int recv_from_rank, + int bytes) +{ + assert(0); +} +void CartesianCommunicator::SendTo(void *xmit, + int xmit_to_rank, + int bytes) +{ + assert(0); +} + + // Basic Halo comms primitive -- should never call in single node void CartesianCommunicator::SendToRecvFrom(void *xmit, int dest, diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h new file mode 100644 index 00000000..18f460c2 --- /dev/null +++ b/lib/parallelIO/BinaryIO.h @@ -0,0 +1,513 @@ +#ifndef GRID_BINARY_IO_H +#define GRID_BINARY_IO_H + +#ifdef HAVE_ENDIAN_H +#include +#endif + + +#include + +// 64bit endian swap is a portability pain +#ifndef __has_builtin // Optional of course. +#define __has_builtin(x) 0 // Compatibility with non-clang compilers. +#endif + +#if HAVE_DECL_BE64TOH +#undef Grid_ntohll +#define Grid_ntohll be64toh +#endif + +#if HAVE_DECL_NTOHLL +#undef Grid_ntohll +#define Grid_ntohll ntohll +#endif + +#ifndef Grid_ntohll + +#if BYTE_ORDER == BIG_ENDIAN + +#define Grid_ntohll(A) (A) + +#else + +#if __has_builtin(__builtin_bswap64) +#define Grid_ntohll(A) __builtin_bswap64(A) +#else +#error +#endif + +#endif + +#endif + +namespace Grid { + + // A little helper + inline void removeWhitespace(std::string &key) + { + key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end()); + } + +class BinaryIO { + + public: + + + // Network is big endian + static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);} + static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);} + static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);} + static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);} + + static inline void be32toh_v(void *file_object,uint32_t bytes) + { + uint32_t * f = (uint32_t *)file_object; + for(int i=0;i*sizeof(uint32_t)>8) | ((f&0xFF000000UL)>>24) ; + fp[i] = ntohl(f); + } + } + + // BE is same as network + static inline void be64toh_v(void *file_object,uint32_t bytes) + { + uint64_t * f = (uint64_t *)file_object; + for(int i=0;i*sizeof(uint64_t)>8) | ((f&0xFF000000UL)>>24) ; + g = g << 32; + f = f >> 32; + g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; + fp[i] = Grid_ntohll(g); + } + } + + template static inline void Uint32Checksum(Lattice &lat,munger munge,uint32_t &csum) + { + typedef typename vobj::scalar_object sobj; + GridBase *grid = lat._grid ; + + sobj siteObj; + fobj fileObj; + + csum = 0; + std::vector lcoor; + for(int l=0;llSites();l++){ + grid->CoorFromIndex(lcoor,l,grid->_ldimensions); + peekLocalSite(siteObj,lat,lcoor); + munge(siteObj,fileObj,csum); + } + grid->GlobalSum(csum); + } + + static inline void Uint32Checksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum) + { + for(int i=0;i*sizeof(uint32_t) + static inline uint32_t readObjectSerial(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) + { + typedef typename vobj::scalar_object sobj; + + GridBase *grid = Umu._grid; + + std::cout<< GridLogMessage<< "Serial read I/O "<< file<< std::endl; + + int ieee32big = (format == std::string("IEEE32BIG")); + int ieee32 = (format == std::string("IEEE32")); + int ieee64big = (format == std::string("IEEE64BIG")); + int ieee64 = (format == std::string("IEEE64")); + + // Find the location of each site and send to primary node + // Take loop order from Chroma; defines loop order now that NERSC doc no longer + // available (how short sighted is that?) + std::ifstream fin(file,std::ios::binary|std::ios::in); + fin.seekg(offset); + + Umu = zero; + uint32_t csum=0; + fobj file_object; + sobj munged; + + for(int t=0;t_fdimensions[3];t++){ + for(int z=0;z_fdimensions[2];z++){ + for(int y=0;y_fdimensions[1];y++){ + for(int x=0;x_fdimensions[0];x++){ + + std::vector site({x,y,z,t}); + + if ( grid->IsBoss() ) { + fin.read((char *)&file_object,sizeof(file_object)); + + if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object)); + if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object)); + if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object)); + if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object)); + + munge(file_object,munged,csum); + } + // The boss who read the file has their value poked + pokeSite(munged,Umu,site); + }}}} + return csum; + } + + template + static inline uint32_t writeObjectSerial(Lattice &Umu,std::string file,munger munge,int offset,const std::string & format) + { + typedef typename vobj::scalar_object sobj; + + GridBase *grid = Umu._grid; + + int ieee32big = (format == std::string("IEEE32BIG")); + int ieee32 = (format == std::string("IEEE32")); + int ieee64big = (format == std::string("IEEE64BIG")); + int ieee64 = (format == std::string("IEEE64")); + + ////////////////////////////////////////////////// + // Serialise through node zero + ////////////////////////////////////////////////// + std::cout<< GridLogMessage<< "Serial write I/O "<< file<IsBoss() ) { + fout.open(file,std::ios::binary|std::ios::out|std::ios::in); + fout.seekp(offset); + } + + uint32_t csum=0; + fobj file_object; + sobj unmunged; + for(int t=0;t_fdimensions[3];t++){ + for(int z=0;z_fdimensions[2];z++){ + for(int y=0;y_fdimensions[1];y++){ + for(int x=0;x_fdimensions[0];x++){ + + std::vector site({x,y,z,t}); + // peek & write + peekSite(unmunged,Umu,site); + + munge(unmunged,file_object,csum); + + + if ( grid->IsBoss() ) { + + if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); + if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); + if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); + if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); + + fout.write((char *)&file_object,sizeof(file_object)); + } + }}}} + + return csum; + } + + template + static inline uint32_t readObjectParallel(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) + { + typedef typename vobj::scalar_object sobj; + + GridBase *grid = Umu._grid; + + int ieee32big = (format == std::string("IEEE32BIG")); + int ieee32 = (format == std::string("IEEE32")); + int ieee64big = (format == std::string("IEEE64BIG")); + int ieee64 = (format == std::string("IEEE64")); + + + // Take into account block size of parallel file systems want about + // 4-16MB chunks. + // Ideally one reader/writer per xy plane and read these contiguously + // with comms from nominated I/O nodes. + std::ifstream fin; + + int nd = grid->_ndimension; + std::vector parallel(nd,1); + std::vector ioproc (nd); + std::vector start(nd); + std::vector range(nd); + + for(int d=0;dCheckerBoarded(d) == 0); + } + + uint64_t slice_vol = 1; + + int IOnode = 1; + for(int d=0;d_ndimension;d++) { + + if ( d==0 ) parallel[d] = 0; + if (parallel[d]) { + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; + } else { + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; + + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + } + slice_vol = slice_vol * range[d]; + } + + { + uint32_t tmp = IOnode; + grid->GlobalSum(tmp); + std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <_ndimension;d++){ + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; + } + std::cout << std::endl; + } + + int myrank = grid->ThisRank(); + int iorank = grid->RankFromProcessorCoor(ioproc); + + if ( IOnode ) { + fin.open(file,std::ios::binary|std::ios::in); + } + + ////////////////////////////////////////////////////////// + // Find the location of each site and send to primary node + // Take loop order from Chroma; defines loop order now that NERSC doc no longer + // available (how short sighted is that?) + ////////////////////////////////////////////////////////// + Umu = zero; + uint32_t csum=0; + fobj fileObj; + sobj siteObj; + + // need to implement these loops in Nd independent way with a lexico conversion + for(int tlex=0;tlex tsite(nd); // temporary mixed up site + std::vector gsite(nd); + std::vector lsite(nd); + std::vector iosite(nd); + + grid->CoorFromIndex(tsite,tlex,range); + + for(int d=0;d_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site + } + + ///////////////////////// + // Get the rank of owner of data + ///////////////////////// + int rank, o_idx,i_idx, g_idx; + grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); + grid->GlobalCoorToGlobalIndex(gsite,g_idx); + + //////////////////////////////// + // iorank reads from the seek + //////////////////////////////// + if (myrank == iorank) { + + fin.seekg(offset+g_idx*sizeof(fileObj)); + fin.read((char *)&fileObj,sizeof(fileObj)); + + if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); + + munge(fileObj,siteObj,csum); + + if ( rank != myrank ) { + grid->SendTo((void *)&siteObj,rank,sizeof(siteObj)); + } else { + pokeLocalSite(siteObj,Umu,lsite); + } + + } else { + if ( myrank == rank ) { + grid->RecvFrom((void *)&siteObj,iorank,sizeof(siteObj)); + pokeLocalSite(siteObj,Umu,lsite); + } + } + grid->Barrier(); // necessary? + } + + grid->GlobalSum(csum); + + return csum; + } + + ////////////////////////////////////////////////////////// + // Parallel writer + ////////////////////////////////////////////////////////// + template + static inline uint32_t writeObjectParallel(Lattice &Umu,std::string file,munger munge,int offset,const std::string & format) + { + typedef typename vobj::scalar_object sobj; + GridBase *grid = Umu._grid; + + int ieee32big = (format == std::string("IEEE32BIG")); + int ieee32 = (format == std::string("IEEE32")); + int ieee64big = (format == std::string("IEEE64BIG")); + int ieee64 = (format == std::string("IEEE64")); + + int nd = grid->_ndimension; + for(int d=0;dCheckerBoarded(d) == 0); + } + + std::vector parallel(nd,1); + std::vector ioproc (nd); + std::vector start(nd); + std::vector range(nd); + + uint64_t slice_vol = 1; + + int IOnode = 1; + + for(int d=0;d_ndimension;d++) { + + if ( d==0 ) parallel[d] = 0; + + if (parallel[d]) { + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; + } else { + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; + + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + } + + slice_vol = slice_vol * range[d]; + } + + { + uint32_t tmp = IOnode; + grid->GlobalSum(tmp); + std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <_ndimension;d++){ + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; + } + std::cout << std::endl; + } + + int myrank = grid->ThisRank(); + int iorank = grid->RankFromProcessorCoor(ioproc); + + // Take into account block size of parallel file systems want about + // 4-16MB chunks. + // Ideally one reader/writer per xy plane and read these contiguously + // with comms from nominated I/O nodes. + std::ofstream fout; + if ( IOnode ) fout.open(file,std::ios::binary|std::ios::in|std::ios::out); + + ////////////////////////////////////////////////////////// + // Find the location of each site and send to primary node + // Take loop order from Chroma; defines loop order now that NERSC doc no longer + // available (how short sighted is that?) + ////////////////////////////////////////////////////////// + + uint32_t csum=0; + fobj fileObj; + sobj siteObj; + + + // need to implement these loops in Nd independent way with a lexico conversion + for(int tlex=0;tlex tsite(nd); // temporary mixed up site + std::vector gsite(nd); + std::vector lsite(nd); + std::vector iosite(nd); + + grid->CoorFromIndex(tsite,tlex,range); + + for(int d=0;d_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site + } + + + ///////////////////////// + // Get the rank of owner of data + ///////////////////////// + int rank, o_idx,i_idx, g_idx; + grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); + grid->GlobalCoorToGlobalIndex(gsite,g_idx); + + //////////////////////////////// + // iorank writes from the seek + //////////////////////////////// + if (myrank == iorank) { + + if ( rank != myrank ) { + grid->RecvFrom((void *)&siteObj,rank,sizeof(siteObj)); + } else { + peekLocalSite(siteObj,Umu,lsite); + } + + munge(siteObj,fileObj,csum); + + if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); + + fout.seekp(offset+g_idx*sizeof(fileObj)); + fout.write((char *)&fileObj,sizeof(fileObj)); + + } else { + if ( myrank == rank ) { + peekLocalSite(siteObj,Umu,lsite); + grid->SendTo((void *)&siteObj,iorank,sizeof(siteObj)); + } + } + grid->Barrier(); // necessary// or every 16 packets to rate throttle?? + } + + grid->GlobalSum(csum); + + return csum; + } + +}; + +} + +#endif diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index 5339e5c8..ac17ede2 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -7,57 +7,23 @@ #include #include -#ifdef HAVE_ENDIAN_H -#include -#endif - - -#include - -// 64bit endian swap is a portability pain -#ifndef __has_builtin // Optional of course. -#define __has_builtin(x) 0 // Compatibility with non-clang compilers. -#endif - -#if HAVE_DECL_BE64TOH -#undef Grid_ntohll -#define Grid_ntohll be64toh -#endif - -#if HAVE_DECL_NTOHLL -#undef Grid_ntohll -#define Grid_ntohll ntohll -#endif - -#ifndef Grid_ntohll - -#if BYTE_ORDER == BIG_ENDIAN - -#define Grid_ntohll(A) (A) - -#else - -#if __has_builtin(__builtin_bswap64) -#define Grid_ntohll(A) __builtin_bswap64(A) -#else -#error -#endif - -#endif - -#endif +#include +#include +#include namespace Grid { +namespace QCD { - using namespace QCD; +using namespace Grid; //////////////////////////////////////////////////////////////////////////////// // Some data types for intermediate storage //////////////////////////////////////////////////////////////////////////////// template using iLorentzColour2x3 = iVector, 2>, 4 >; -typedef iLorentzColour2x3 LorentzColour2x3; -typedef iLorentzColour2x3 LorentzColour2x3F; -typedef iLorentzColour2x3 LorentzColour2x3D; + + typedef iLorentzColour2x3 LorentzColour2x3; + typedef iLorentzColour2x3 LorentzColour2x3F; + typedef iLorentzColour2x3 LorentzColour2x3D; //////////////////////////////////////////////////////////////////////////////// // header specification/interpretation @@ -86,50 +52,173 @@ class NerscField { }; +////////////////////////////////////////////////////////////////////// +// Bit and Physical Checksumming and QA of data +////////////////////////////////////////////////////////////////////// + +inline void NerscGrid(GridBase *grid,NerscField &header) +{ + assert(grid->_ndimension==4); + for(int d=0;d<4;d++) { + header.dimension[d] = grid->_fdimensions[d]; + } + for(int d=0;d<4;d++) { + header.boundary[d] = std::string("PERIODIC"); + } +} +template +inline void NerscStatistics(GaugeField & data,NerscField &header) +{ + header.link_trace=Grid::QCD::WilsonLoops::linkTrace(data); + header.plaquette =Grid::QCD::WilsonLoops::avgPlaquette(data); +} + +inline void NerscMachineCharacteristics(NerscField &header) +{ + // Who + struct passwd *pw = getpwuid (getuid()); + if (pw) header.creator = std::string(pw->pw_name); + + // When + std::time_t t = std::time(nullptr); + std::tm tm = *std::localtime(&t); + std::ostringstream oss; + oss << std::put_time(&tm, "%c %Z"); + header.creation_date = oss.str(); + header.archive_date = header.creation_date; + + // What + struct utsname name; uname(&name); + header.creator_hardware = std::string(name.nodename)+"-"; + header.creator_hardware+= std::string(name.machine)+"-"; + header.creator_hardware+= std::string(name.sysname)+"-"; + header.creator_hardware+= std::string(name.release); + +} +////////////////////////////////////////////////////////////////////// +// Utilities ; these are QCD aware +////////////////////////////////////////////////////////////////////// + inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum) + { + BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum); + } + inline void reconstruct3(LorentzColourMatrix & cm) + { + const int x=0; + const int y=1; + const int z=2; + for(int mu=0;mu<4;mu++){ + cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy + cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz + cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx + } + } + + template + struct NerscSimpleMunger{ + + void operator() (fobj &in,sobj &out,uint32_t &csum){ + + for(int mu=0;mu<4;mu++){ + for(int i=0;i<3;i++){ + for(int j=0;j<3;j++){ + out(mu)()(i,j) = in(mu)()(i,j); + }}} + NerscChecksum((uint32_t *)&in,sizeof(in),csum); + }; + }; + + template + struct NerscSimpleUnmunger{ + void operator() (sobj &in,fobj &out,uint32_t &csum){ + for(int mu=0;mu + struct Nersc3x2munger{ + void operator() (fobj &in,sobj &out,uint32_t &csum){ + + NerscChecksum((uint32_t *)&in,sizeof(in),csum); + + for(int mu=0;mu<4;mu++){ + for(int i=0;i<2;i++){ + for(int j=0;j<3;j++){ + out(mu)()(i,j) = in(mu)(i)(j); + }} + } + reconstruct3(out); + } + }; + + template + struct Nersc3x2unmunger{ + + void operator() (sobj &in,fobj &out,uint32_t &csum){ + + + for(int mu=0;mu<4;mu++){ + for(int i=0;i<2;i++){ + for(int j=0;j<3;j++){ + out(mu)(i)(j) = in(mu)()(i,j); + }} + } + + NerscChecksum((uint32_t *)&out,sizeof(out),csum); + + } + }; + + //////////////////////////////////////////////////////////////////////////////// // Write and read from fstream; comput header offset for payload //////////////////////////////////////////////////////////////////////////////// -inline unsigned int writeNerscHeader(NerscField &field,std::string file) -{ - std::ofstream fout(file,std::ios::out); +class NerscIO : public BinaryIO { + public: + + static inline unsigned int writeHeader(NerscField &field,std::string file) + { + std::ofstream fout(file,std::ios::out); - fout.seekp(0,std::ios::beg); - fout << "BEGIN_HEADER" << std::endl; - fout << "HDR_VERSION = " << field.hdr_version << std::endl; - fout << "DATATYPE = " << field.data_type << std::endl; - fout << "STORAGE_FORMAT = " << field.storage_format << std::endl; + fout.seekp(0,std::ios::beg); + fout << "BEGIN_HEADER" << std::endl; + fout << "HDR_VERSION = " << field.hdr_version << std::endl; + fout << "DATATYPE = " << field.data_type << std::endl; + fout << "STORAGE_FORMAT = " << field.storage_format << std::endl; - for(int i=0;i<4;i++){ - fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; - } - // just to keep the space and write it later - fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; - fout << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; - for(int i=0;i<4;i++){ - fout << "BOUNDARY_"< header; @@ -163,7 +252,6 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field) ////////////////////////////////////////////////// // chomp the values ////////////////////////////////////////////////// - field.hdr_version = header["HDR_VERSION"]; field.data_type = header["DATATYPE"]; field.storage_format = header["STORAGE_FORMAT"]; @@ -200,314 +288,22 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field) } -////////////////////////////////////////////////////////////////////// -// Utilities -////////////////////////////////////////////////////////////////////// -inline void reconstruct3(LorentzColourMatrix & cm) -{ - const int x=0; - const int y=1; - const int z=2; - for(int mu=0;mu<4;mu++){ - cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy - cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz - cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx - } -} - void inline be32toh_v(void *file_object,uint32_t bytes) - { - uint32_t * f = (uint32_t *)file_object; - for(int i=0;i*sizeof(uint32_t)>8) | ((f&0xFF000000UL)>>24) ; - fp[i] = ntohl(f); - } - } - void inline be64toh_v(void *file_object,uint32_t bytes) - { - uint64_t * f = (uint64_t *)file_object; - for(int i=0;i*sizeof(uint64_t)>8) | ((f&0xFF000000UL)>>24) ; - g = g << 32; - f = f >> 32; - g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; - fp[i] = ntohl(g); - } - } - -inline void NerscChecksum(uint32_t *buf,uint32_t buf_size,uint32_t &csum) -{ - for(int i=0;i*sizeof(uint32_t) - struct NerscSimpleMunger{ - void operator() (fobj &in,sobj &out,uint32_t &csum){ - - for(int mu=0;mu<4;mu++){ - for(int i=0;i<3;i++){ - for(int j=0;j<3;j++){ - out(mu)()(i,j) = in(mu)()(i,j); - }}} - - NerscChecksum((uint32_t *)&in,sizeof(in),csum); - }; - }; - template - struct NerscSimpleUnmunger{ - void operator() (sobj &in,fobj &out,uint32_t &csum){ - for(int mu=0;mu<4;mu++){ - for(int i=0;i<3;i++){ - for(int j=0;j<3;j++){ - out(mu)()(i,j) = in(mu)()(i,j); - }}} - NerscChecksum((uint32_t *)&out,sizeof(out),csum); - }; - }; - - template - struct Nersc3x2munger{ - void operator() (fobj &in,sobj &out,uint32_t &csum){ - - NerscChecksum((uint32_t *)&in,sizeof(in),csum); - - for(int mu=0;mu<4;mu++){ - for(int i=0;i<2;i++){ - for(int j=0;j<3;j++){ - out(mu)()(i,j) = in(mu)(i)(j); - }} - } - reconstruct3(out); - } - }; - - template - struct Nersc3x2unmunger{ - - void operator() (sobj &in,fobj &out,uint32_t &csum){ - - NerscChecksum((uint32_t *)&out,sizeof(out),csum); - - for(int mu=0;mu<4;mu++){ - for(int i=0;i<2;i++){ - for(int j=0;j<3;j++){ - out(mu)(i)(j) = in(mu)()(i,j); - }} - } - } -}; - -//////////////////////////////////////////////////////////////////////////// -// Template wizardry to map types to strings for NERSC in an extensible way -//////////////////////////////////////////////////////////////////////////// - template struct NerscDataType { - static void DataType (std::string &str) { str = std::string("4D_BINARY_UNKNOWN"); }; - static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG"); }; - }; - - template<> struct NerscDataType > { - static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); }; - static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG");}; - }; - - template<> struct NerscDataType > { - static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); }; - static void FloatingPoint(std::string &str) { str = std::string("IEEE32BIG");}; - }; - -////////////////////////////////////////////////////////////////////// -// Bit and Physical Checksumming and QA of data -////////////////////////////////////////////////////////////////////// -/* -template inline uint32_t NerscChecksum(Lattice & data) -{ - uint32_t sum; - for(int ss=0;ssOsites();ss++){ - uint32_t *iptr = (uint32_t *)& data._odata[0] ; - for(int i=0;iglobalSum(sum); - return sum; -} -*/ -template inline void NerscPhysicalCharacteristics(Lattice & data,NerscField &header) -{ - header.data_type = NerscDataType::DataType; - header.floating_point = NerscDataType::FloatingPoint; - return; -} - - template<> inline void NerscPhysicalCharacteristics(LatticeGaugeField & data,NerscField &header) -{ - NerscDataType::DataType(header.data_type); - NerscDataType::FloatingPoint(header.floating_point); - header.link_trace=1.0; - header.plaquette =1.0; -} - -template inline void NerscStatisics(Lattice & data,NerscField &header) -{ - assert(data._grid->_ndimension==4); - - for(int d=0;d<4;d++) - header.dimension[d] = data._grid->_fdimensions[d]; - - // compute checksum and any physical properties contained for this type - // header.checksum = NerscChecksum(data); - - NerscPhysicalCharacteristics(data,header); -} - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Now the meat: the object readers ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -template -inline void readNerscObject(Lattice &Umu,std::string file,munger munge,int offset,std::string &format) + +template +static inline void readConfiguration(Lattice > &Umu,NerscField& header,std::string file) { + typedef Lattice > GaugeField; + typedef Lattice > GaugeMatrix; + GridBase *grid = Umu._grid; + int offset = readHeader(file,Umu._grid,header); - int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); - int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); - - // Find the location of each site and send to primary node - // for(int site=0; site < Layout::vol(); ++site){ - // multi1d coord = crtesn(site, Layout::lattSize()); - // for(int dd=0; dd_fdimensions[3];t++){ - for(int z=0;z_fdimensions[2];z++){ - for(int y=0;y_fdimensions[1];y++){ - for(int x=0;x_fdimensions[0];x++){ - - std::vector site({x,y,z,t}); - - if ( grid->IsBoss() ) { - fin.read((char *)&file_object,sizeof(file_object)); - - if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object)); - if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object)); - if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object)); - if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object)); - - munge(file_object,munged,csum); - } - // The boss who read the file has their value poked - pokeSite(munged,Umu,site); - }}}} - } -} - -template -inline void writeNerscObject(Lattice &Umu,std::string file,munger munge,int offset, - int sequence,double lt,double pl) -{ - GridBase *grid = Umu._grid; - NerscField header; - - ////////////////////////////////////////////////// - // First write the header; this is in wrong place - ////////////////////////////////////////////////// - assert(grid->_ndimension == 4); - for(int d=0;d<4;d++){ - header.dimension[d]=grid->_fdimensions[d]; - header.boundary [d]=std::string("PERIODIC");; - } - header.hdr_version=std::string("WHATDAHECK"); - // header.storage_format=storage_format::string; // use template specialisation - // header.data_type=data_type::string; - header.storage_format=std::string("debug"); - header.data_type =std::string("debug"); - - //FIXME; use template specialisation to fill these out - header.link_trace =lt; - header.plaquette =pl; - header.checksum =0; - - // - header.sequence_number =sequence; - header.ensemble_id =std::string("UKQCD"); - header.ensemble_label =std::string("UKQCD"); - header.creator =std::string("Tadahito"); - header.creator_hardware=std::string("BlueGene/Q"); - header.creation_date =std::string("AnnoDomini"); - header.archive_date =std::string("AnnoDomini"); - header.floating_point =std::string("IEEE64BIG"); - // header.data_start=; - // unsigned int checksum; - - ////////////////////////////////////////////////// - // Now write the body - ////////////////////////////////////////////////// - { - std::ofstream fout(file,std::ios::binary|std::ios::out); - fout.seekp(offset); - - Umu = zero; - uint32_t csum=0; - fobj file_object; - sobj unmunged; - for(int t=0;t_fdimensions[3];t++){ - for(int z=0;z_fdimensions[2];z++){ - for(int y=0;y_fdimensions[1];y++){ - for(int x=0;x_fdimensions[0];x++){ - std::vector site({x,y,z,t}); - peekSite(unmunged,Umu,site); - munge(unmunged,file_object,csum); - // broadcast & insert - fout.write((char *)&file_object,sizeof(file_object)); - }}}} - } -} - - - -inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std::string file) -{ - GridBase *grid = Umu._grid; - - int offset = readNerscHeader(file,Umu._grid,header); + NerscField clone(header); std::string format(header.floating_point); @@ -516,48 +312,106 @@ inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std int ieee64big = (format == std::string("IEEE64BIG")); int ieee64 = (format == std::string("IEEE64")); + uint32_t csum; // depending on datatype, set up munger; // munger is a function of if ( header.data_type == std::string("4D_SU3_GAUGE") ) { if ( ieee32 || ieee32big ) { - readNerscObject - (Umu,file, - Nersc3x2munger(), - offset,format); + // csum=BinaryIO::readObjectSerial, LorentzColour2x3F> + csum=BinaryIO::readObjectParallel, LorentzColour2x3F> + (Umu,file,Nersc3x2munger(), offset,format); } if ( ieee64 || ieee64big ) { - readNerscObject - (Umu,file, - Nersc3x2munger(), - offset,format); + // csum=BinaryIO::readObjectSerial, LorentzColour2x3D> + csum=BinaryIO::readObjectParallel, LorentzColour2x3D> + (Umu,file,Nersc3x2munger(),offset,format); } } else if ( header.data_type == std::string("4D_SU3_GAUGE_3X3") ) { if ( ieee32 || ieee32big ) { - readNerscObject + // csum=BinaryIO::readObjectSerial,LorentzColourMatrixF> + csum=BinaryIO::readObjectParallel,LorentzColourMatrixF> (Umu,file,NerscSimpleMunger(),offset,format); } if ( ieee64 || ieee64big ) { - readNerscObject + // csum=BinaryIO::readObjectSerial,LorentzColourMatrixD> + csum=BinaryIO::readObjectParallel,LorentzColourMatrixD> (Umu,file,NerscSimpleMunger(),offset,format); } } else { assert(0); } + NerscStatistics(Umu,clone); + + assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); + assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); + assert(csum == header.checksum ); + + std::cout< -inline void writeNerscConfiguration(Lattice &Umu,NerscField &header,std::string file) +template +static inline void writeConfiguration(Lattice > &Umu,std::string file, int two_row,int bits32) { - GridBase &grid = Umu._grid; + typedef Lattice > GaugeField; + typedef Lattice > GaugeMatrix; + typedef iLorentzColourMatrix vobj; + typedef typename vobj::scalar_object sobj; + + // Following should become arguments + NerscField header; + header.sequence_number = 1; + header.ensemble_id = "UKQCD"; + header.ensemble_label = "DWF"; + + typedef LorentzColourMatrixD fobj3D; + typedef LorentzColour2x3D fobj2D; + typedef LorentzColourMatrixF fobj3f; + typedef LorentzColour2x3F fobj2f; + + GridBase *grid = Umu._grid; + + NerscGrid(grid,header); + NerscStatistics(Umu,header); + NerscMachineCharacteristics(header); + + uint32_t csum; + int offset; - NerscStatisics(Umu,header); + if ( two_row ) { - int offset = writeNerscHeader(header,file); + header.floating_point = std::string("IEEE64BIG"); + header.data_type = std::string("4D_SU3_GAUGE"); + Nersc3x2unmunger munge; + BinaryIO::Uint32Checksum(Umu, munge,header.checksum); + offset = writeHeader(header,file); + csum=BinaryIO::writeObjectSerial(Umu,file,munge,offset,header.floating_point); - writeNerscObject(Umu,NerscSimpleMunger(),offset); -} + std::string file1 = file+"para"; + int offset1 = writeHeader(header,file1); + int csum1=BinaryIO::writeObjectParallel(Umu,file1,munge,offset,header.floating_point); + + std::cout << GridLogMessage << " TESTING PARALLEL WRITE offsets " << offset1 << " "<< offset << std::endl; + std::cout << GridLogMessage < munge; + BinaryIO::Uint32Checksum(Umu, munge,header.checksum); + offset = writeHeader(header,file); + csum=BinaryIO::writeObjectSerial(Umu,file,munge,offset,header.floating_point); + } + + std::cout< U(4,Umu._grid); + + LatticeComplex Tr(Umu._grid); Tr=zero; + for(int mu=0;mu(Umu,mu); + Tr = Tr+trace(U[mu]); + } + + TComplex Tp = sum(Tr); + Complex p = TensorRemove(Tp); + + double vol = Umu._grid->gSites(); + + return p.real()/vol/4.0/3.0; + }; ////////////////////////////////////////////////// // the sum over all staples on each site ////////////////////////////////////////////////// diff --git a/scripts/substitute b/scripts/substitute index 7607a115..d5a29c60 100755 --- a/scripts/substitute +++ b/scripts/substitute @@ -10,7 +10,7 @@ shift while (( "$#" )); do echo $1 -sed -e "s:${WAS}:${IS}:g" -i .bak $1 +sed -e "s@${WAS}@${IS}@g" -i .bak $1 shift diff --git a/tests/Test_GaugeAction.cc b/tests/Test_GaugeAction.cc index d779cd20..4533ffe6 100644 --- a/tests/Test_GaugeAction.cc +++ b/tests/Test_GaugeAction.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) NerscField header; std::string file("./ckpoint_lat.4000"); - readNerscConfiguration(Umu,header,file); + NerscIO::readConfiguration(Umu,header,file); for(int mu=0;mu(Umu,mu); diff --git a/tests/Test_cayley_ldop_cr.cc b/tests/Test_cayley_ldop_cr.cc index 2414d6f1..83ec1126 100644 --- a/tests/Test_cayley_ldop_cr.cc +++ b/tests/Test_cayley_ldop_cr.cc @@ -42,7 +42,7 @@ int main (int argc, char ** argv) NerscField header; std::string file("./ckpoint_lat.400"); - readNerscConfiguration(Umu,header,file); + NerscIO::readConfiguration(Umu,header,file); // SU3::ColdConfiguration(RNG4,Umu); // SU3::TepidConfiguration(RNG4,Umu); diff --git a/tests/Test_dwf_hdcr.cc b/tests/Test_dwf_hdcr.cc index 45647936..94b8481a 100644 --- a/tests/Test_dwf_hdcr.cc +++ b/tests/Test_dwf_hdcr.cc @@ -388,7 +388,7 @@ int main (int argc, char ** argv) NerscField header; std::string file("./ckpoint_lat.4000"); - readNerscConfiguration(Umu,header,file); + NerscIO::readConfiguration(Umu,header,file); // SU3::ColdConfiguration(RNG4,Umu); // SU3::TepidConfiguration(RNG4,Umu); diff --git a/tests/Test_nersc_io.cc b/tests/Test_nersc_io.cc index 34821e97..36f17c76 100644 --- a/tests/Test_nersc_io.cc +++ b/tests/Test_nersc_io.cc @@ -28,7 +28,7 @@ int main (int argc, char ** argv) NerscField header; std::string file("./ckpoint_lat.4000"); - readNerscConfiguration(Umu,header,file); + NerscIO::readConfiguration(Umu,header,file); for(int mu=0;mu(Umu,mu); @@ -89,6 +89,13 @@ int main (int argc, char ** argv) TComplex TcP = sum(cPlaq); Complex ll= TensorRemove(TcP); std::cout<