1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 22:07:05 +01:00

Use a global-X x Local-Y chunksize for parallel binary I/O.

Gives O(32 x 8 x 18*8*8) chunk size on configuration I/O.

At 150KB should be getting close to packet sizes and 4MB filesystem
block sizes that are reasonably (!?) performant. We shall see once I move
this off my laptop and over to BNL and time it.
This commit is contained in:
paboyle
2017-05-25 11:43:33 +01:00
parent 15e801af3f
commit a8c10b1933
2 changed files with 113 additions and 89 deletions

View File

@ -30,6 +30,9 @@
#ifndef GRID_NERSC_IO_H
#define GRID_NERSC_IO_H
#define PARALLEL_READ
#undef PARALLEL_WRITE
#include <algorithm>
#include <iostream>
#include <iomanip>
@ -326,8 +329,6 @@ namespace Grid {
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now the meat: the object readers
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define PARALLEL_READ
#define PARALLEL_WRITE
template<class vsimd>
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file)
@ -399,6 +400,7 @@ namespace Grid {
<<" header "<<header.plaquette<<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
<<" header "<<header.link_trace<<std::endl;
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(csum == header.checksum );