mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 20:14:32 +00:00 
			
		
		
		
	Use a global-X x Local-Y chunksize for parallel binary I/O.
Gives O(32 x 8 x 18*8*8) chunk size on configuration I/O. At 150KB should be getting close to packet sizes and 4MB filesystem block sizes that are reasonably (!?) performant. We shall see once I move this off my laptop and over to BNL and time it.
This commit is contained in:
		| @@ -217,32 +217,34 @@ class BinaryIO { | |||||||
|     Umu = zero; |     Umu = zero; | ||||||
|     uint32_t csum=0; |     uint32_t csum=0; | ||||||
|     uint64_t bytes=0; |     uint64_t bytes=0; | ||||||
|     fobj file_object; |  | ||||||
|     sobj munged; |     int lx = grid->_fdimensions[0]; | ||||||
|      |     std::vector<fobj> file_object(lx); | ||||||
|  |     std::vector<sobj> munged(lx); | ||||||
|     for(int t=0;t<grid->_fdimensions[3];t++){ |     for(int t=0;t<grid->_fdimensions[3];t++){ | ||||||
|     for(int z=0;z<grid->_fdimensions[2];z++){ |     for(int z=0;z<grid->_fdimensions[2];z++){ | ||||||
|     for(int y=0;y<grid->_fdimensions[1];y++){ |     for(int y=0;y<grid->_fdimensions[1];y++){ | ||||||
|     for(int x=0;x<grid->_fdimensions[0];x++){ |     { | ||||||
|  |       bytes += sizeof(fobj)*lx; | ||||||
|       std::vector<int> site({x,y,z,t}); |  | ||||||
|  |  | ||||||
|       if (grid->IsBoss()) { |       if (grid->IsBoss()) { | ||||||
|         fin.read((char *)&file_object, sizeof(file_object));assert( fin.fail()==0); |         fin.read((char *)&file_object[0], sizeof(fobj)*lx); assert( fin.fail()==0); | ||||||
|         bytes += sizeof(file_object); | 	for(int x=0;x<lx;x++){ | ||||||
|         if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object)); | 	  if (ieee32big) be32toh_v((void *)&file_object[x], sizeof(fobj)); | ||||||
|         if (ieee32)    le32toh_v((void *)&file_object, sizeof(file_object)); | 	  if (ieee32)    le32toh_v((void *)&file_object[x], sizeof(fobj)); | ||||||
|         if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object)); | 	  if (ieee64big) be64toh_v((void *)&file_object[x], sizeof(fobj)); | ||||||
|         if (ieee64)    le64toh_v((void *)&file_object, sizeof(file_object)); | 	  if (ieee64)    le64toh_v((void *)&file_object[x], sizeof(fobj)); | ||||||
|  | 	  munge(file_object[x], munged[x], csum); | ||||||
|         munge(file_object, munged, csum); | 	} | ||||||
|  |       } | ||||||
|  |       for(int x=0;x<lx;x++){ | ||||||
|  | 	std::vector<int> site({x,y,z,t}); | ||||||
|  | 	// The boss who read the file has their value poked | ||||||
|  | 	pokeSite(munged[x],Umu,site); | ||||||
|       } |       } | ||||||
|       // The boss who read the file has their value poked |  | ||||||
|       pokeSite(munged,Umu,site); |  | ||||||
|     }}}} |     }}}} | ||||||
|     timer.Stop(); |     timer.Stop(); | ||||||
|     std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " |     std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " | ||||||
|        << (double)bytes/ (double)timer.useconds() <<" MB/s "  <<std::endl; | 	     << (double)bytes/ (double)timer.useconds() <<" MB/s "  <<std::endl; | ||||||
|  |  | ||||||
|     grid->Broadcast(0,(void *)&csum,sizeof(csum)); |     grid->Broadcast(0,(void *)&csum,sizeof(csum)); | ||||||
|     return csum; |     return csum; | ||||||
| @@ -274,31 +276,34 @@ class BinaryIO { | |||||||
|     } |     } | ||||||
|     uint64_t bytes=0; |     uint64_t bytes=0; | ||||||
|     uint32_t csum=0; |     uint32_t csum=0; | ||||||
|     fobj file_object; |     int lx = grid->_fdimensions[0]; | ||||||
|     sobj unmunged; |     std::vector<fobj> file_object(lx); | ||||||
|  |     std::vector<sobj> unmunged(lx); | ||||||
|     for(int t=0;t<grid->_fdimensions[3];t++){ |     for(int t=0;t<grid->_fdimensions[3];t++){ | ||||||
|     for(int z=0;z<grid->_fdimensions[2];z++){ |     for(int z=0;z<grid->_fdimensions[2];z++){ | ||||||
|     for(int y=0;y<grid->_fdimensions[1];y++){ |     for(int y=0;y<grid->_fdimensions[1];y++){ | ||||||
|     for(int x=0;x<grid->_fdimensions[0];x++){ |     { | ||||||
|  |  | ||||||
|       std::vector<int> site({x,y,z,t}); |       std::vector<int> site({0,y,z,t}); | ||||||
|       // peek & write |       // peek & write | ||||||
|       peekSite(unmunged,Umu,site); |       for(int x=0;x<lx;x++){ | ||||||
|  | 	site[0]=x; | ||||||
|       munge(unmunged,file_object,csum); | 	peekSite(unmunged[x],Umu,site); | ||||||
|  |       } | ||||||
|        |        | ||||||
|       if ( grid->IsBoss() ) { |       if ( grid->IsBoss() ) { | ||||||
| 	if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); | 	for(int x=0;x<lx;x++){ | ||||||
| 	if(ieee32)    htole32_v((void *)&file_object,sizeof(file_object)); | 	  munge(unmunged[x],file_object[x],csum); | ||||||
| 	if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); | 	  if(ieee32big) htobe32_v((void *)&file_object[x],sizeof(fobj)); | ||||||
| 	if(ieee64)    htole64_v((void *)&file_object,sizeof(file_object)); | 	  if(ieee32)    htole32_v((void *)&file_object[x],sizeof(fobj)); | ||||||
|  | 	  if(ieee64big) htobe64_v((void *)&file_object[x],sizeof(fobj)); | ||||||
| 	// NB could gather an xstrip as an optimisation. | 	  if(ieee64)    htole64_v((void *)&file_object[x],sizeof(fobj)); | ||||||
| 	fout.write((char *)&file_object,sizeof(file_object));assert( fout.fail()==0); | 	} | ||||||
| 	bytes+=sizeof(file_object); | 	fout.write((char *)&file_object[0],sizeof(fobj)*lx);assert( fout.fail()==0); | ||||||
|  | 	bytes+=sizeof(fobj)*lx; | ||||||
|       } |       } | ||||||
|     }}}} |     }}}} | ||||||
|  |  | ||||||
|     timer.Stop(); |     timer.Stop(); | ||||||
|     std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" " |     std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" " | ||||||
| 	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl; | 	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl; | ||||||
| @@ -370,7 +375,7 @@ class BinaryIO { | |||||||
|  |  | ||||||
|     timer.Stop(); |     timer.Stop(); | ||||||
|  |  | ||||||
|     std::cout << GridLogMessage   << "RNG file checksum " << std::hex << csum << std::dec << std::endl; |     std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; | ||||||
|     std::cout << GridLogMessage << "RNG state saved in " << timer.Elapsed() << std::endl; |     std::cout << GridLogMessage << "RNG state saved in " << timer.Elapsed() << std::endl; | ||||||
|     return csum; |     return csum; | ||||||
|   } |   } | ||||||
| @@ -414,8 +419,6 @@ class BinaryIO { | |||||||
|       grid->GlobalIndexToGlobalCoor(gidx,gcoor); |       grid->GlobalIndexToGlobalCoor(gidx,gcoor); | ||||||
|       grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); |       grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); | ||||||
|       int l_idx=parallel.generator_idx(o_idx,i_idx); |       int l_idx=parallel.generator_idx(o_idx,i_idx); | ||||||
|       //std::cout << GridLogDebug << "l_idx " << l_idx << " o_idx " << o_idx |  | ||||||
|       //          << " i_idx " << i_idx << " rank " << rank << std::endl; |  | ||||||
|  |  | ||||||
|       if ( grid->IsBoss() ) { |       if ( grid->IsBoss() ) { | ||||||
| 	fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); | 	fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); | ||||||
| @@ -460,14 +463,12 @@ class BinaryIO { | |||||||
|     int ieee64    = (format == std::string("IEEE64")); |     int ieee64    = (format == std::string("IEEE64")); | ||||||
|  |  | ||||||
|  |  | ||||||
|     // Take into account block size of parallel file systems want about |  | ||||||
|     // 4-16MB chunks. |  | ||||||
|     // Ideally one reader/writer per xy plane and read these contiguously |     // Ideally one reader/writer per xy plane and read these contiguously | ||||||
|     // with comms from nominated I/O nodes. |     // with comms from nominated I/O nodes. | ||||||
|     std::ifstream fin; |     std::ifstream fin; | ||||||
|  |  | ||||||
|     int nd = grid->_ndimension; |     int nd = grid->_ndimension; | ||||||
|     std::vector<int> parallel(nd,1); |     std::vector<int> parallel(nd,1); parallel[0] = 0; | ||||||
|     std::vector<int> ioproc  (nd); |     std::vector<int> ioproc  (nd); | ||||||
|     std::vector<int> start(nd); |     std::vector<int> start(nd); | ||||||
|     std::vector<int> range(nd); |     std::vector<int> range(nd); | ||||||
| @@ -479,9 +480,15 @@ class BinaryIO { | |||||||
|     uint64_t slice_vol = 1; |     uint64_t slice_vol = 1; | ||||||
|  |  | ||||||
|     int IOnode = 1; |     int IOnode = 1; | ||||||
|     for(int d=0;d<grid->_ndimension;d++) { |     int gstrip = grid->_gdimensions[0]; | ||||||
|  |     int lstrip = grid->_ldimensions[0]; | ||||||
|  |  | ||||||
|       if ( d == 0 ) parallel[d] = 0; |     int chunk ; | ||||||
|  |     if ( nd==1) chunk = gstrip; | ||||||
|  |     else        chunk = gstrip*grid->_ldimensions[1]; | ||||||
|  |  | ||||||
|  |     for(int d=0;d<grid->_ndimension;d++) { | ||||||
|  |        | ||||||
|       if (parallel[d]) { |       if (parallel[d]) { | ||||||
| 	range[d] = grid->_ldimensions[d]; | 	range[d] = grid->_ldimensions[d]; | ||||||
| 	start[d] = grid->_processor_coor[d]*range[d]; | 	start[d] = grid->_processor_coor[d]*range[d]; | ||||||
| @@ -500,13 +507,16 @@ class BinaryIO { | |||||||
|       uint32_t tmp = IOnode; |       uint32_t tmp = IOnode; | ||||||
|       grid->GlobalSum(tmp); |       grid->GlobalSum(tmp); | ||||||
|       std::cout<< std::dec ; |       std::cout<< std::dec ; | ||||||
|       std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice "; |       std::cout<< GridLogMessage<< "Parallel read I/O from "<< file << " with " <<tmp<< " IOnodes for subslice "; | ||||||
|       for(int d=0;d<grid->_ndimension;d++){ |       for(int d=0;d<grid->_ndimension;d++){ | ||||||
| 	std::cout<< range[d]; | 	std::cout<< range[d]; | ||||||
| 	if( d< grid->_ndimension-1 )  | 	if( d< grid->_ndimension-1 )  | ||||||
| 	  std::cout<< " x "; | 	  std::cout<< " x "; | ||||||
|       } |       } | ||||||
|       std::cout << std::endl; |       std::cout << std::endl; | ||||||
|  |       std::cout<< GridLogMessage<< "Parallel I/O local  strip size is "<< lstrip <<std::endl; | ||||||
|  |       std::cout<< GridLogMessage<< "Parallel I/O global strip size is "<< gstrip <<std::endl; | ||||||
|  |       std::cout<< GridLogMessage<< "Parallel I/O chunk size is "<< chunk  <<std::endl; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     GridStopWatch timer; timer.Start(); |     GridStopWatch timer; timer.Start(); | ||||||
| @@ -515,10 +525,11 @@ class BinaryIO { | |||||||
|     int myrank = grid->ThisRank(); |     int myrank = grid->ThisRank(); | ||||||
|     int iorank = grid->RankFromProcessorCoor(ioproc); |     int iorank = grid->RankFromProcessorCoor(ioproc); | ||||||
|  |  | ||||||
|     if (!ILDG.is_ILDG) |     if (!ILDG.is_ILDG) { | ||||||
|     	if ( IOnode ) {  |       if ( IOnode ) {  | ||||||
|     		fin.open(file,std::ios::binary|std::ios::in); | 	fin.open(file,std::ios::binary|std::ios::in); | ||||||
|     	} |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////// | ||||||
|     // Find the location of each site and send to primary node |     // Find the location of each site and send to primary node | ||||||
| @@ -528,16 +539,15 @@ class BinaryIO { | |||||||
|     Umu = zero; |     Umu = zero; | ||||||
|     static uint32_t csum; csum=0;//static for SHMEM |     static uint32_t csum; csum=0;//static for SHMEM | ||||||
|  |  | ||||||
|     fobj fileObj; |     std::vector<fobj> fileObj(chunk); // FIXME | ||||||
|     static sobj siteObj; // Static to place in symmetric region for SHMEM |     std::vector<sobj> siteObj(chunk); // Use comm allocator to place in symmetric region for SHMEM | ||||||
|  |  | ||||||
|       // need to implement these loops in Nd independent way with a lexico conversion |     // need to implement these loops in Nd independent way with a lexico conversion | ||||||
|     for(int tlex=0;tlex<slice_vol;tlex++){ |     for(int tlex=0;tlex<slice_vol;tlex+=chunk){ | ||||||
|  |  | ||||||
|       std::vector<int> tsite(nd); // temporary mixed up site |       std::vector<int> tsite(nd); // temporary mixed up site | ||||||
|       std::vector<int> gsite(nd); |       std::vector<int> gsite(nd); | ||||||
|       std::vector<int> lsite(nd); |       std::vector<int> lsite(nd); | ||||||
|       std::vector<int> iosite(nd); |  | ||||||
|  |  | ||||||
|       Lexicographic::CoorFromIndex(tsite,tlex,range); |       Lexicographic::CoorFromIndex(tsite,tlex,range); | ||||||
|  |  | ||||||
| @@ -546,53 +556,68 @@ class BinaryIO { | |||||||
| 	gsite[d] = tsite[d]+start[d];               // global site | 	gsite[d] = tsite[d]+start[d];               // global site | ||||||
|       } |       } | ||||||
|  |  | ||||||
|  |       /////////////////////////////////////////// | ||||||
|       ///////////////////////// |       // Get the global lexico base of the chunk | ||||||
|       // Get the rank of owner of data |       /////////////////////////////////////////// | ||||||
|       ///////////////////////// |  | ||||||
|       int rank, o_idx,i_idx, g_idx; |       int rank, o_idx,i_idx, g_idx; | ||||||
|       grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); |       grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); | ||||||
|       grid->GlobalCoorToGlobalIndex(gsite,g_idx); |       grid->GlobalCoorToGlobalIndex(gsite,g_idx); | ||||||
|        |  | ||||||
|       //////////////////////////////// |       //////////////////////////////// | ||||||
|       // iorank reads from the seek |       // iorank reads from the seek | ||||||
|       //////////////////////////////// |       //////////////////////////////// | ||||||
|       if (myrank == iorank) { |       if (myrank == iorank) { | ||||||
|  |  | ||||||
|  |  | ||||||
|       	if (ILDG.is_ILDG){ |       	if (ILDG.is_ILDG){ | ||||||
|       		// use C-LIME to populate the record | #ifdef HAVE_LIME | ||||||
|           #ifdef HAVE_LIME | 	  // use C-LIME to populate the record | ||||||
|           uint64_t sizeFO = sizeof(fileObj); |           uint64_t sizeFO = sizeof(fobj)*chunk; | ||||||
|           limeReaderSeek(ILDG.LR, g_idx*sizeFO, SEEK_SET); |           limeReaderSeek(ILDG.LR, g_idx*sizeFO, SEEK_SET); | ||||||
|           int status = limeReaderReadData((void *)&fileObj, &sizeFO, ILDG.LR); |           int status = limeReaderReadData((void *)&fileObj[0], &sizeFO, ILDG.LR); | ||||||
|           #endif | #endif | ||||||
|         } else{ |         } else{ | ||||||
|           fin.seekg(offset+g_idx*sizeof(fileObj)); |           fin.seekg(offset+g_idx*sizeof(fobj)); | ||||||
|           fin.read((char *)&fileObj,sizeof(fileObj)); |           fin.read((char *)&fileObj[0],sizeof(fobj)*chunk); | ||||||
|         } |         } | ||||||
|         bytes+=sizeof(fileObj); |         bytes+=sizeof(fobj)*chunk; | ||||||
|  |  | ||||||
|         if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); |         if(ieee32big) be32toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); | ||||||
|         if(ieee32)    le32toh_v((void *)&fileObj,sizeof(fileObj)); |         if(ieee32)    le32toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); | ||||||
|         if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); |         if(ieee64big) be64toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); | ||||||
|         if(ieee64)    le64toh_v((void *)&fileObj,sizeof(fileObj)); |         if(ieee64)    le64toh_v((void *)&fileObj[0],sizeof(fobj)*chunk); | ||||||
|  |  | ||||||
|         munge(fileObj,siteObj,csum); | 	for(int c=0;c<chunk;c++) munge(fileObj[c],siteObj[c],csum); | ||||||
|  |  | ||||||
|       }  |       }  | ||||||
|        |       | ||||||
|       // Possibly do transport through pt2pt  |       // Possibly do transport through pt2pt  | ||||||
|       if ( rank != iorank ) {  |       for(int cc=0;cc<chunk;cc+=lstrip){ | ||||||
| 	if ( (myrank == rank) || (myrank==iorank) ) { |  | ||||||
| 	  grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); | 	///////////////////////////////// | ||||||
|  | 	// Get the rank of owner of strip | ||||||
|  | 	///////////////////////////////// | ||||||
|  | 	Lexicographic::CoorFromIndex(tsite,tlex+cc,range); | ||||||
|  |  | ||||||
|  | 	for(int d=0;d<nd;d++){ | ||||||
|  | 	  lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site | ||||||
|  | 	  gsite[d] = tsite[d]+start[d];               // global site | ||||||
| 	} | 	} | ||||||
|  | 	grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); | ||||||
|  |  | ||||||
|  | 	if ( rank != iorank ) {  | ||||||
|  | 	  if ( (myrank == rank) || (myrank==iorank) ) { | ||||||
|  | 	    grid->SendRecvPacket((void *)&siteObj[cc],(void *)&siteObj[cc],iorank,rank,sizeof(sobj)*lstrip); | ||||||
|  | 	  } | ||||||
|  | 	} | ||||||
|  | 	// Poke at destination | ||||||
|  | 	if ( myrank == rank ) { | ||||||
|  | 	  for(int x=0;x<lstrip;x++){ | ||||||
|  | 	    lsite[0]=x; | ||||||
|  | 	    pokeLocalSite(siteObj[cc+x],Umu,lsite); | ||||||
|  | 	  } | ||||||
|  | 	} | ||||||
|  | 	grid->Barrier(); // necessary? | ||||||
|       } |       } | ||||||
|       // Poke at destination |  | ||||||
|       if ( myrank == rank ) { |  | ||||||
| 	pokeLocalSite(siteObj,Umu,lsite); |  | ||||||
|       } |  | ||||||
|       grid->Barrier(); // necessary? |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     grid->GlobalSum(csum); |     grid->GlobalSum(csum); | ||||||
| @@ -601,7 +626,7 @@ class BinaryIO { | |||||||
|  |  | ||||||
|     timer.Stop(); |     timer.Stop(); | ||||||
|     std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " |     std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " | ||||||
| 	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl; | 	     <<(double)bytes/timer.useconds() <<" MB/s "  <<std::endl; | ||||||
|     return csum; |     return csum; | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -623,11 +648,8 @@ class BinaryIO { | |||||||
|     int ieee64 = (format == std::string("IEEE64")); |     int ieee64 = (format == std::string("IEEE64")); | ||||||
|  |  | ||||||
|     if (!(ieee32big || ieee32 || ieee64big || ieee64)) { |     if (!(ieee32big || ieee32 || ieee64big || ieee64)) { | ||||||
|       std::cout << GridLogError << "Unrecognized file format " << format |       std::cout << GridLogError << "Unrecognized file format " << format << std::endl; | ||||||
|                 << std::endl; |       std::cout << GridLogError << "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64" << std::endl; | ||||||
|       std::cout << GridLogError |  | ||||||
|                 << "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64" |  | ||||||
|                 << std::endl; |  | ||||||
|       exit(0); |       exit(0); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -715,10 +737,10 @@ class BinaryIO { | |||||||
|     // need to implement these loops in Nd independent way with a lexico |     // need to implement these loops in Nd independent way with a lexico | ||||||
|     // conversion |     // conversion | ||||||
|     for (int tlex = 0; tlex < slice_vol; tlex++) { |     for (int tlex = 0; tlex < slice_vol; tlex++) { | ||||||
|  |  | ||||||
|       std::vector<int> tsite(nd);  // temporary mixed up site |       std::vector<int> tsite(nd);  // temporary mixed up site | ||||||
|       std::vector<int> gsite(nd); |       std::vector<int> gsite(nd); | ||||||
|       std::vector<int> lsite(nd); |       std::vector<int> lsite(nd); | ||||||
|       std::vector<int> iosite(nd); |  | ||||||
|  |  | ||||||
|       Lexicographic::CoorFromIndex(tsite, tlex, range); |       Lexicographic::CoorFromIndex(tsite, tlex, range); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -30,6 +30,9 @@ | |||||||
| #ifndef GRID_NERSC_IO_H | #ifndef GRID_NERSC_IO_H | ||||||
| #define GRID_NERSC_IO_H | #define GRID_NERSC_IO_H | ||||||
|  |  | ||||||
|  | #define PARALLEL_READ | ||||||
|  | #undef PARALLEL_WRITE | ||||||
|  |  | ||||||
| #include <algorithm> | #include <algorithm> | ||||||
| #include <iostream> | #include <iostream> | ||||||
| #include <iomanip> | #include <iomanip> | ||||||
| @@ -326,8 +329,6 @@ namespace Grid { | |||||||
|       ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |       ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|       // Now the meat: the object readers |       // Now the meat: the object readers | ||||||
|       ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |       ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| #define PARALLEL_READ |  | ||||||
| #define PARALLEL_WRITE |  | ||||||
|  |  | ||||||
|       template<class vsimd> |       template<class vsimd> | ||||||
|       static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file) |       static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file) | ||||||
| @@ -399,6 +400,7 @@ namespace Grid { | |||||||
| 	       <<" header    "<<header.plaquette<<std::endl; | 	       <<" header    "<<header.plaquette<<std::endl; | ||||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace |       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace | ||||||
| 	       <<" header    "<<header.link_trace<<std::endl; | 	       <<" header    "<<header.link_trace<<std::endl; | ||||||
|  |  | ||||||
|       assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); |       assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); | ||||||
|       assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); |       assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); | ||||||
|       assert(csum == header.checksum ); |       assert(csum == header.checksum ); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user