From 989af658071f5d9fc92adc0d6e0ab9775b3e0e51 Mon Sep 17 00:00:00 2001 From: Daniel Richtmann Date: Mon, 23 Mar 2020 17:33:18 +0100 Subject: [PATCH] Check in parallel reader for openqcd configs --- Grid/parallelIO/MetaData.h | 24 ++ Grid/parallelIO/OpenQcdIO.h | 165 ++++++++---- Grid/parallelIO/OpenQcdIOChromaReference.h | 281 +++++++++++++++++++++ Grid/qcd/hmc/HMC_aggregate.h | 3 + tests/IO/Test_openqcd_io.cc | 51 +++- 5 files changed, 466 insertions(+), 58 deletions(-) create mode 100644 Grid/parallelIO/OpenQcdIOChromaReference.h diff --git a/Grid/parallelIO/MetaData.h b/Grid/parallelIO/MetaData.h index 2e211838..4c1cfbdb 100644 --- a/Grid/parallelIO/MetaData.h +++ b/Grid/parallelIO/MetaData.h @@ -301,6 +301,30 @@ struct GaugeSimpleUnmunger { }; }; +template +struct GaugeDoubleStoredMunger{ + void operator()(fobj &in, sobj &out) { + for (int mu = 0; mu < Nds; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; +}; + +template +struct GaugeDoubleStoredUnmunger { + void operator()(sobj &in, fobj &out) { + for (int mu = 0; mu < Nds; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; +}; + template struct Gauge3x2munger{ void operator() (fobj &in,sobj &out){ diff --git a/Grid/parallelIO/OpenQcdIO.h b/Grid/parallelIO/OpenQcdIO.h index f340e8fc..00911595 100644 --- a/Grid/parallelIO/OpenQcdIO.h +++ b/Grid/parallelIO/OpenQcdIO.h @@ -67,6 +67,10 @@ public: field.dimension[2] = header.Nz; field.dimension[3] = header.Nt; + std::cout << GridLogDebug << "header: " << header << std::endl; + std::cout << GridLogDebug << "grid dimensions: " << grid->_fdimensions << std::endl; + std::cout << GridLogDebug << "file dimensions: " << field.dimension << std::endl; + assert(grid->_ndimension == Nd); for(int d = 0; d < Nd; d++) assert(grid->_fdimensions[d] == field.dimension[d]); @@ -80,74 +84,141 @@ public: static inline void readConfiguration(Lattice>& Umu, FieldMetaData& header, std::string file) { + typedef Lattice> DoubleStoredGaugeField; + + assert(Ns == 4 and Nd == 4 and Nc == 3); + auto grid = dynamic_cast(Umu.Grid()); - assert(grid != nullptr); - assert((grid->_ndimension == Nd) && (Nd == 4)); + assert(grid != nullptr); assert(grid->_ndimension == Nd); uint64_t offset = readHeader(file, Umu.Grid(), header); + FieldMetaData clone(header); - // NOTE: This version is suboptimal because we read in the full file on every rank - std::vector data(grid->gSites() * 4); - { - auto fin = std::fstream(file, std::ios::in | std::ios::binary); - fin.seekg(offset); - fin.read((char *)data.data(), data.size() * sizeof(ColourMatrix)); - fin.close(); - } + std::string format("IEEE64"); // they always store little endian double precsision + uint32_t nersc_csum, scidac_csuma, scidac_csumb; - // global lattice size - Coordinate fdim = grid->FullDimensions(); + GridCartesian* grid_openqcd = createOpenQcdGrid(grid); + GridRedBlackCartesian* grid_rb = SpaceTimeGrid::makeFourDimRedBlackGrid(grid); - // coordinate of this process - Coordinate pcoor; - grid->ProcessorCoorFromRank(CartesianCommunicator::RankWorld(), pcoor); + typedef DoubleStoredColourMatrixD fobj; + typedef typename DoubleStoredGaugeField::vector_object::scalar_object sobj; + typedef typename DoubleStoredGaugeField::vector_object::Realified::scalar_type word; - // loop over local indices - thread_for(idx, grid->lSites(), { - // convert local index to global coordinate - Coordinate lcoor, gcoor; - grid->LocalIndexToLocalCoor(idx, lcoor); - grid->ProcessorCoorLocalCoorToGlobalCoor(pcoor, lcoor, gcoor); + word w = 0; - // openQCD stores links attached to odd sites - bool neg = (gcoor[Xdir] + gcoor[Ydir] + gcoor[Zdir] + gcoor[Tdir]) % 2 != 1; + std::vector iodata(grid_openqcd->lSites()); // Munge, checksum, byte order in here + std::vector scalardata(grid->lSites()); - LorentzColourMatrix site_data; - for (int mu = 0; mu < 4; ++mu) { - // determine the site at which it is stored - Coordinate c = gcoor; - if (neg) - c[mu] = (c[mu] + 1) % grid->FullDimensions()[mu]; + IOobject(w, grid_openqcd, iodata, file, offset, format, BINARYIO_READ | BINARYIO_LEXICOGRAPHIC, + nersc_csum, scidac_csuma, scidac_csumb); - // site-index in the OpenQCD format (which uses t,x,y,z order) - int openqcd_idx = (c[Tdir] * fdim[Xdir] * fdim[Ydir] * fdim[Zdir] - + c[Xdir] * fdim[Ydir] * fdim[Zdir] - + c[Ydir] * fdim[Zdir] - + c[Zdir])/2; - int openqcd_mu = (mu + 1) % 4; + GridStopWatch timer; + timer.Start(); - // pick the colour-matrix out - site_data(mu) = - data[8 * openqcd_idx + 2 * openqcd_mu + (neg ? 1 : 0)](); - } + DoubleStoredGaugeField Umu_ds(grid); - pokeLocalSite(site_data, Umu, lcoor); + auto munge = GaugeDoubleStoredMunger(); + + Coordinate ldim = grid->LocalDimensions(); + thread_for(idx_g, grid->lSites(), { + Coordinate coor; + grid->LocalIndexToLocalCoor(idx_g, coor); + + bool isOdd = grid_rb->CheckerBoard(coor) == Odd; + + if(!isOdd) continue; + + int idx_o = (coor[Tdir] * ldim[Xdir] * ldim[Ydir] * ldim[Zdir] + + coor[Xdir] * ldim[Ydir] * ldim[Zdir] + + coor[Ydir] * ldim[Zdir] + + coor[Zdir])/2; + + munge(iodata[idx_o], scalardata[idx_g]); }); + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: munge overhead " << timer.Elapsed() << std::endl; + + timer.Reset(); timer.Start(); + + vectorizeFromLexOrdArray(scalardata, Umu_ds); + + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: vectorize overhead " << timer.Elapsed() << std::endl; + + timer.Reset(); timer.Start(); + + undoDoubleStore(Umu, Umu_ds); + + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: redistribute overhead " << timer.Elapsed() << std::endl; + GaugeStatistics(Umu, clone); - std::cout << GridLogMessage << "OpenQcd Configuration " << file << " plaquette " - << std::setprecision(15) - << clone.plaquette << " header " << header.plaquette - << " difference " << fabs(clone.plaquette - header.plaquette) - << std::endl; + RealD plaq_diff = fabs(clone.plaquette - header.plaquette); - if(fabs(clone.plaquette - header.plaquette) >= 1.0e-5) std::cout << " Plaquette mismatch " << std::endl; - assert(fabs(clone.plaquette - header.plaquette) < 1.0e-5); + // clang-format off + std::cout << GridLogMessage << "OpenQcd Configuration " << file + << " plaquette " << clone.plaquette + << " header " << header.plaquette + << " difference " << plaq_diff + << std::endl; + // clang-format on + + RealD precTol = (getPrecision::value == 1) ? 2e-7 : 2e-15; + RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code + + if(plaq_diff >= tol) + std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl; + assert(plaq_diff < tol); std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl; } + + template + static inline void writeConfiguration(Lattice>& Umu, + std::string file) { + std::cout << GridLogError << "Writing to openQCD file format is not implemented" << std::endl; + exit(EXIT_FAILURE); + } + +private: + static inline GridCartesian* createOpenQcdGrid(GridCartesian* grid) { + // exploit GridCartesian to be able to still use IOobject + Coordinate gdim = grid->GlobalDimensions(); + Coordinate ldim = grid->LocalDimensions(); + Coordinate pcoor = grid->ThisProcessorCoor(); + + // openqcd does rb on the z direction + gdim[Zdir] /= 2; + ldim[Zdir] /= 2; + + // and has the order T X Y Z (from slowest to fastest) + std::swap(gdim[Xdir], gdim[Zdir]); + std::swap(ldim[Xdir], ldim[Zdir]); + std::swap(pcoor[Xdir], pcoor[Zdir]); + + GridCartesian* ret = SpaceTimeGrid::makeFourDimGrid(gdim, grid->_simd_layout, grid->ProcessorGrid()); + ret->_ldimensions = ldim; + ret->_processor_coor = pcoor; + return ret; + } + + template + static inline void undoDoubleStore(Lattice>& Umu, + Lattice> const& Umu_ds) { + conformable(Umu.Grid(), Umu_ds.Grid()); + Lattice> U(Umu.Grid()); + + // they store T+, T-, X+, X-, Y+, Y-, Z+, Z- + for(int mu_g = 0; mu_g < Nd; ++mu_g) { + int mu_o = (mu_g + 1) % Nd; + U = PeekIndex(Umu_ds, 2 * mu_o) + + Cshift(PeekIndex(Umu_ds, 2 * mu_o + 1), mu_g, +1); + PokeIndex(Umu, U, mu_g); + } + } }; NAMESPACE_END(Grid); diff --git a/Grid/parallelIO/OpenQcdIOChromaReference.h b/Grid/parallelIO/OpenQcdIOChromaReference.h new file mode 100644 index 00000000..bab54fe8 --- /dev/null +++ b/Grid/parallelIO/OpenQcdIOChromaReference.h @@ -0,0 +1,281 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/parallelIO/OpenQcdIOChromaReference.h + +Copyright (C) 2015 - 2020 + +Author: Daniel Richtmann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#define CHECK {std::cerr << __FILE__ << " @l " << __LINE__ << ": CHECK" << grid->ThisRank() << std::endl;} +#define CHECK_VAR(a) { std::cerr << __FILE__ << "@l" << __LINE__ << " on "<< grid->ThisRank() << ": " << __func__ << " " << #a << "=" << (a) << std::endl; } +// #undef CHECK +// #define CHECK + +NAMESPACE_BEGIN(Grid); + +class ParRdr { +private: + bool const swap; + + MPI_Status status; + MPI_File fp; + + int err; + + MPI_Datatype oddSiteType; + MPI_Datatype fileViewType; + + GridBase* grid; + +public: + ParRdr(MPI_Comm comm, std::string const& filename, GridBase* gridPtr) + : swap(false) + , grid(gridPtr) { + err = MPI_File_open(comm, const_cast(filename.c_str()), MPI_MODE_RDONLY, MPI_INFO_NULL, &fp); + assert(err == MPI_SUCCESS); + } + + virtual ~ParRdr() { MPI_File_close(&fp); } + + inline void errInfo(int const err, std::string const& func) { + static char estring[MPI_MAX_ERROR_STRING]; + int eclass = -1, len = 0; + MPI_Error_class(err, &eclass); + MPI_Error_string(err, estring, &len); + std::cerr << func << " - Error " << eclass << ": " << estring << std::endl; + } + + int readHeader(FieldMetaData& field) { + assert((grid->_ndimension == Nd) && (Nd == 4)); + assert(Nc == 3); + + OpenQcdHeader header; + + readBlock(reinterpret_cast(&header), 0, sizeof(OpenQcdHeader), MPI_CHAR); + + header.plaq /= 3.; // TODO change this into normalizationfactor + + // sanity check (should trigger on endian issues) TODO remove? + assert(0 < header.Nt && header.Nt <= 1024); + assert(0 < header.Nx && header.Nx <= 1024); + assert(0 < header.Ny && header.Ny <= 1024); + assert(0 < header.Nz && header.Nz <= 1024); + + field.dimension[0] = header.Nx; + field.dimension[1] = header.Ny; + field.dimension[2] = header.Nz; + field.dimension[3] = header.Nt; + + for(int d = 0; d < Nd; d++) + assert(grid->FullDimensions()[d] == field.dimension[d]); + + field.plaquette = header.plaq; + + field.data_start = sizeof(OpenQcdHeader); + + return field.data_start; + } + + void readBlock(void* const dest, uint64_t const pos, uint64_t const nbytes, MPI_Datatype const datatype) { + err = MPI_File_read_at_all(fp, pos, dest, nbytes, datatype, &status); + errInfo(err, "MPI_File_read_at_all"); + // CHECK_VAR(err) + + int read = -1; + MPI_Get_count(&status, datatype, &read); + // CHECK_VAR(read) + assert(nbytes == (uint64_t)read); + assert(err == MPI_SUCCESS); + } + + void createTypes() { + constexpr int elem_size = Nd * 2 * 2 * Nc * Nc * sizeof(double); // 2_complex 2_fwdbwd + + err = MPI_Type_contiguous(elem_size, MPI_BYTE, &oddSiteType); assert(err == MPI_SUCCESS); + err = MPI_Type_commit(&oddSiteType); assert(err == MPI_SUCCESS); + + Coordinate const L = grid->GlobalDimensions(); + Coordinate const l = grid->LocalDimensions(); + Coordinate const i = grid->ThisProcessorCoor(); + + Coordinate sizes({L[2] / 2, L[1], L[0], L[3]}); + Coordinate subsizes({l[2] / 2, l[1], l[0], l[3]}); + Coordinate starts({i[2] * l[2] / 2, i[1] * l[1], i[0] * l[0], i[3] * l[3]}); + + err = MPI_Type_create_subarray(grid->_ndimension, &sizes[0], &subsizes[0], &starts[0], MPI_ORDER_FORTRAN, oddSiteType, &fileViewType); assert(err == MPI_SUCCESS); + err = MPI_Type_commit(&fileViewType); assert(err == MPI_SUCCESS); + } + + void freeTypes() { + err = MPI_Type_free(&fileViewType); assert(err == MPI_SUCCESS); + err = MPI_Type_free(&oddSiteType); assert(err == MPI_SUCCESS); + } + + bool readGauge(std::vector& domain_buff, FieldMetaData& meta) { + auto hdr_offset = readHeader(meta); + CHECK + createTypes(); + err = MPI_File_set_view(fp, hdr_offset, oddSiteType, fileViewType, "native", MPI_INFO_NULL); errInfo(err, "MPI_File_set_view0"); assert(err == MPI_SUCCESS); + CHECK + int const domainSites = grid->lSites(); + domain_buff.resize(Nd * domainSites); // 2_fwdbwd * 4_Nd * domainSites / 2_onlyodd + + // the actual READ + constexpr uint64_t cm_size = 2 * Nc * Nc * sizeof(double); // 2_complex + constexpr uint64_t os_size = Nd * 2 * cm_size; // 2_fwdbwd + constexpr uint64_t max_elems = std::numeric_limits::max(); // int adressable elems: floor is fine + uint64_t const n_os = domainSites / 2; + + for(uint64_t os_idx = 0; os_idx < n_os;) { + uint64_t const read_os = os_idx + max_elems <= n_os ? max_elems : n_os - os_idx; + uint64_t const cm = os_idx * Nd * 2; + readBlock(&(domain_buff[cm]), os_idx, read_os, oddSiteType); + os_idx += read_os; + } + + CHECK + err = MPI_File_set_view(fp, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); + errInfo(err, "MPI_File_set_view1"); + assert(err == MPI_SUCCESS); + freeTypes(); + + std::cout << GridLogMessage << "read sum: " << n_os * os_size << " bytes" << std::endl; + return true; + } +}; + +class OpenQcdIOChromaReference : public BinaryIO { +public: + template + static inline void readConfiguration(Lattice>& Umu, + Grid::FieldMetaData& header, + std::string file) { + typedef Lattice> DoubledGaugeField; + + assert(Ns == 4 and Nd == 4 and Nc == 3); + + auto grid = Umu.Grid(); + + typedef ColourMatrixD fobj; + + std::vector iodata( + Nd * grid->lSites()); // actual size = 2*Nd*lsites but have only lsites/2 sites in file + + { + ParRdr rdr(MPI_COMM_WORLD, file, grid); + rdr.readGauge(iodata, header); + } // equivalent to using binaryio + + std::vector> Umu_ds_scalar(grid->lSites()); + + copyToLatticeObject(Umu_ds_scalar, iodata, grid); // equivalent to munging + + DoubledGaugeField Umu_ds(grid); + + vectorizeFromLexOrdArray(Umu_ds_scalar, Umu_ds); + + redistribute(Umu, Umu_ds); // equivalent to undoDoublestore + + FieldMetaData clone(header); + + GaugeStatistics(Umu, clone); + + RealD plaq_diff = fabs(clone.plaquette - header.plaquette); + + // clang-format off + std::cout << GridLogMessage << "OpenQcd Configuration " << file + << " plaquette " << clone.plaquette + << " header " << header.plaquette + << " difference " << plaq_diff + << std::endl; + // clang-format on + + RealD precTol = (getPrecision::value == 1) ? 2e-7 : 2e-15; + RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code + + if(plaq_diff >= tol) + std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl; + assert(plaq_diff < tol); + + std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl; + } + +private: + template + static inline void redistribute(Lattice>& Umu, + Lattice> const& Umu_ds) { + Grid::conformable(Umu.Grid(), Umu_ds.Grid()); + Lattice> U(Umu.Grid()); + + U = PeekIndex(Umu_ds, 2) + Cshift(PeekIndex(Umu_ds, 3), 0, +1); PokeIndex(Umu, U, 0); + U = PeekIndex(Umu_ds, 4) + Cshift(PeekIndex(Umu_ds, 5), 1, +1); PokeIndex(Umu, U, 1); + U = PeekIndex(Umu_ds, 6) + Cshift(PeekIndex(Umu_ds, 7), 2, +1); PokeIndex(Umu, U, 2); + U = PeekIndex(Umu_ds, 0) + Cshift(PeekIndex(Umu_ds, 1), 3, +1); PokeIndex(Umu, U, 3); + } + + static inline void copyToLatticeObject(std::vector& u_fb, + std::vector const& node_buff, + GridBase* grid) { + assert(node_buff.size() == Nd * grid->lSites()); + + Coordinate const& l = grid->LocalDimensions(); + + Coordinate coord(Nd); + int& x = coord[0]; + int& y = coord[1]; + int& z = coord[2]; + int& t = coord[3]; + + int buff_idx = 0; + for(t = 0; t < l[3]; ++t) // IMPORTANT: openQCD file ordering + for(x = 0; x < l[0]; ++x) + for(y = 0; y < l[1]; ++y) + for(z = 0; z < l[2]; ++z) { + if((t + z + y + x) % 2 == 0) continue; + + int local_idx; + Lexicographic::IndexFromCoor(coord, local_idx, grid->LocalDimensions()); + for(int mu = 0; mu < 2 * Nd; ++mu) + for(int c1 = 0; c1 < Nc; ++c1) { + for(int c2 = 0; c2 < Nc; ++c2) { + u_fb[local_idx](mu)()(c1,c2) = node_buff[mu+buff_idx]()()(c1,c2); + } + } + buff_idx += 2 * Nd; + } + + assert(node_buff.size() == buff_idx); + } +}; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/hmc/HMC_aggregate.h b/Grid/qcd/hmc/HMC_aggregate.h index 94c745e1..cb510953 100644 --- a/Grid/qcd/hmc/HMC_aggregate.h +++ b/Grid/qcd/hmc/HMC_aggregate.h @@ -40,6 +40,9 @@ directory #include #include #include +#if !defined(GRID_COMMS_NONE) +#include +#endif NAMESPACE_CHECK(Ildg); #include diff --git a/tests/IO/Test_openqcd_io.cc b/tests/IO/Test_openqcd_io.cc index 2a5769bd..83b498c2 100644 --- a/tests/IO/Test_openqcd_io.cc +++ b/tests/IO/Test_openqcd_io.cc @@ -28,28 +28,57 @@ See the full license in the file "LICENSE" in the top level distribution directo #include +#if defined(GRID_COMMS_NONE) +#error This test requires Grid compiled with MPI +#endif + using namespace Grid; int main(int argc, char** argv) { Grid_init(&argc, &argv); - GridCartesian* grid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), - GridDefaultSimd(Nd, vComplexD::Nsimd()), - GridDefaultMpi()); + auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd()); + auto mpi_layout = GridDefaultMpi(); + auto latt_size = GridDefaultLatt(); - LatticeGaugeField Umu(grid); + GridCartesian grid(latt_size, simd_layout, mpi_layout); - FieldMetaData header; + GridParallelRNG pRNG(&grid); - if(!Grid::GridCmdOptionExists(argv, argv + argc, "--config")) { - std::cout << GridLogError << "You need to use --config /path/to/openqcd_config" << std::endl; - abort(); + pRNG.SeedFixedIntegers(std::vector({45, 12, 81, 9})); + + LatticeGaugeField Umu_ref(&grid); + LatticeGaugeField Umu_me(&grid); + LatticeGaugeField Umu_diff(&grid); + + FieldMetaData header_ref; + FieldMetaData header_me; + + Umu_ref = Zero(); + Umu_me = Zero(); + + std::string file("/home/daniel/configs/openqcd/test_16x8_pbcn6"); + + if(GridCmdOptionExists(argv, argv + argc, "--config")) { + file = GridCmdOptionPayload(argv, argv + argc, "--config"); + std::cout << "file: " << file << std::endl; + assert(!file.empty()); } - std::string file = Grid::GridCmdOptionPayload(argv, argv + argc, "--config"); - assert(!file.empty()); + OpenQcdIOChromaReference::readConfiguration(Umu_ref, header_ref, file); + OpenQcdIO::readConfiguration(Umu_me, header_me, file); - OpenQcdIO::readConfiguration(Umu, header, file); + std::cout << GridLogMessage << header_ref << std::endl; + std::cout << GridLogMessage << header_me << std::endl; + + Umu_diff = Umu_ref - Umu_me; + + // clang-format off + std::cout << GridLogMessage + << "norm2(Umu_ref) = " << norm2(Umu_ref) + << " norm2(Umu_me) = " << norm2(Umu_me) + << " norm2(Umu_diff) = " << norm2(Umu_diff) << std::endl; + // clang-format on Grid_finalize(); }