diff --git a/Grid/parallelIO/MetaData.h b/Grid/parallelIO/MetaData.h index 2e211838..4c1cfbdb 100644 --- a/Grid/parallelIO/MetaData.h +++ b/Grid/parallelIO/MetaData.h @@ -301,6 +301,30 @@ struct GaugeSimpleUnmunger { }; }; +template +struct GaugeDoubleStoredMunger{ + void operator()(fobj &in, sobj &out) { + for (int mu = 0; mu < Nds; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; +}; + +template +struct GaugeDoubleStoredUnmunger { + void operator()(sobj &in, fobj &out) { + for (int mu = 0; mu < Nds; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; +}; + template struct Gauge3x2munger{ void operator() (fobj &in,sobj &out){ diff --git a/Grid/parallelIO/OpenQcdIO.h b/Grid/parallelIO/OpenQcdIO.h new file mode 100644 index 00000000..00911595 --- /dev/null +++ b/Grid/parallelIO/OpenQcdIO.h @@ -0,0 +1,224 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/parallelIO/OpenQcdIO.h + +Copyright (C) 2015 - 2020 + +Author: Daniel Richtmann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +struct OpenQcdHeader : Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(OpenQcdHeader, + int, Nt, + int, Nx, + int, Ny, + int, Nz, + double, plaq); +}; + +class OpenQcdIO : public BinaryIO { +public: + static constexpr double normalisationFactor = Nc; // normalisation difference: grid 18, openqcd 6 + + static inline int readHeader(std::string file, GridBase* grid, FieldMetaData& field) { + OpenQcdHeader header; + + { + std::ifstream fin(file, std::ios::in | std::ios::binary); + fin.read(reinterpret_cast(&header), sizeof(OpenQcdHeader)); + assert(!fin.fail()); + field.data_start = fin.tellg(); + fin.close(); + } + + header.plaq /= normalisationFactor; + + // sanity check (should trigger on endian issues) + assert(0 < header.Nt && header.Nt <= 1024); + assert(0 < header.Nx && header.Nx <= 1024); + assert(0 < header.Ny && header.Ny <= 1024); + assert(0 < header.Nz && header.Nz <= 1024); + + field.dimension[0] = header.Nx; + field.dimension[1] = header.Ny; + field.dimension[2] = header.Nz; + field.dimension[3] = header.Nt; + + std::cout << GridLogDebug << "header: " << header << std::endl; + std::cout << GridLogDebug << "grid dimensions: " << grid->_fdimensions << std::endl; + std::cout << GridLogDebug << "file dimensions: " << field.dimension << std::endl; + + assert(grid->_ndimension == Nd); + for(int d = 0; d < Nd; d++) + assert(grid->_fdimensions[d] == field.dimension[d]); + + field.plaquette = header.plaq; + + return field.data_start; + } + + template + static inline void readConfiguration(Lattice>& Umu, + FieldMetaData& header, + std::string file) { + typedef Lattice> DoubleStoredGaugeField; + + assert(Ns == 4 and Nd == 4 and Nc == 3); + + auto grid = dynamic_cast(Umu.Grid()); + assert(grid != nullptr); assert(grid->_ndimension == Nd); + + uint64_t offset = readHeader(file, Umu.Grid(), header); + + FieldMetaData clone(header); + + std::string format("IEEE64"); // they always store little endian double precsision + uint32_t nersc_csum, scidac_csuma, scidac_csumb; + + GridCartesian* grid_openqcd = createOpenQcdGrid(grid); + GridRedBlackCartesian* grid_rb = SpaceTimeGrid::makeFourDimRedBlackGrid(grid); + + typedef DoubleStoredColourMatrixD fobj; + typedef typename DoubleStoredGaugeField::vector_object::scalar_object sobj; + typedef typename DoubleStoredGaugeField::vector_object::Realified::scalar_type word; + + word w = 0; + + std::vector iodata(grid_openqcd->lSites()); // Munge, checksum, byte order in here + std::vector scalardata(grid->lSites()); + + IOobject(w, grid_openqcd, iodata, file, offset, format, BINARYIO_READ | BINARYIO_LEXICOGRAPHIC, + nersc_csum, scidac_csuma, scidac_csumb); + + GridStopWatch timer; + timer.Start(); + + DoubleStoredGaugeField Umu_ds(grid); + + auto munge = GaugeDoubleStoredMunger(); + + Coordinate ldim = grid->LocalDimensions(); + thread_for(idx_g, grid->lSites(), { + Coordinate coor; + grid->LocalIndexToLocalCoor(idx_g, coor); + + bool isOdd = grid_rb->CheckerBoard(coor) == Odd; + + if(!isOdd) continue; + + int idx_o = (coor[Tdir] * ldim[Xdir] * ldim[Ydir] * ldim[Zdir] + + coor[Xdir] * ldim[Ydir] * ldim[Zdir] + + coor[Ydir] * ldim[Zdir] + + coor[Zdir])/2; + + munge(iodata[idx_o], scalardata[idx_g]); + }); + + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: munge overhead " << timer.Elapsed() << std::endl; + + timer.Reset(); timer.Start(); + + vectorizeFromLexOrdArray(scalardata, Umu_ds); + + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: vectorize overhead " << timer.Elapsed() << std::endl; + + timer.Reset(); timer.Start(); + + undoDoubleStore(Umu, Umu_ds); + + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: redistribute overhead " << timer.Elapsed() << std::endl; + + GaugeStatistics(Umu, clone); + + RealD plaq_diff = fabs(clone.plaquette - header.plaquette); + + // clang-format off + std::cout << GridLogMessage << "OpenQcd Configuration " << file + << " plaquette " << clone.plaquette + << " header " << header.plaquette + << " difference " << plaq_diff + << std::endl; + // clang-format on + + RealD precTol = (getPrecision::value == 1) ? 2e-7 : 2e-15; + RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code + + if(plaq_diff >= tol) + std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl; + assert(plaq_diff < tol); + + std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl; + } + + template + static inline void writeConfiguration(Lattice>& Umu, + std::string file) { + std::cout << GridLogError << "Writing to openQCD file format is not implemented" << std::endl; + exit(EXIT_FAILURE); + } + +private: + static inline GridCartesian* createOpenQcdGrid(GridCartesian* grid) { + // exploit GridCartesian to be able to still use IOobject + Coordinate gdim = grid->GlobalDimensions(); + Coordinate ldim = grid->LocalDimensions(); + Coordinate pcoor = grid->ThisProcessorCoor(); + + // openqcd does rb on the z direction + gdim[Zdir] /= 2; + ldim[Zdir] /= 2; + + // and has the order T X Y Z (from slowest to fastest) + std::swap(gdim[Xdir], gdim[Zdir]); + std::swap(ldim[Xdir], ldim[Zdir]); + std::swap(pcoor[Xdir], pcoor[Zdir]); + + GridCartesian* ret = SpaceTimeGrid::makeFourDimGrid(gdim, grid->_simd_layout, grid->ProcessorGrid()); + ret->_ldimensions = ldim; + ret->_processor_coor = pcoor; + return ret; + } + + template + static inline void undoDoubleStore(Lattice>& Umu, + Lattice> const& Umu_ds) { + conformable(Umu.Grid(), Umu_ds.Grid()); + Lattice> U(Umu.Grid()); + + // they store T+, T-, X+, X-, Y+, Y-, Z+, Z- + for(int mu_g = 0; mu_g < Nd; ++mu_g) { + int mu_o = (mu_g + 1) % Nd; + U = PeekIndex(Umu_ds, 2 * mu_o) + + Cshift(PeekIndex(Umu_ds, 2 * mu_o + 1), mu_g, +1); + PokeIndex(Umu, U, mu_g); + } + } +}; + +NAMESPACE_END(Grid); diff --git a/Grid/parallelIO/OpenQcdIOChromaReference.h b/Grid/parallelIO/OpenQcdIOChromaReference.h new file mode 100644 index 00000000..bab54fe8 --- /dev/null +++ b/Grid/parallelIO/OpenQcdIOChromaReference.h @@ -0,0 +1,281 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/parallelIO/OpenQcdIOChromaReference.h + +Copyright (C) 2015 - 2020 + +Author: Daniel Richtmann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#define CHECK {std::cerr << __FILE__ << " @l " << __LINE__ << ": CHECK" << grid->ThisRank() << std::endl;} +#define CHECK_VAR(a) { std::cerr << __FILE__ << "@l" << __LINE__ << " on "<< grid->ThisRank() << ": " << __func__ << " " << #a << "=" << (a) << std::endl; } +// #undef CHECK +// #define CHECK + +NAMESPACE_BEGIN(Grid); + +class ParRdr { +private: + bool const swap; + + MPI_Status status; + MPI_File fp; + + int err; + + MPI_Datatype oddSiteType; + MPI_Datatype fileViewType; + + GridBase* grid; + +public: + ParRdr(MPI_Comm comm, std::string const& filename, GridBase* gridPtr) + : swap(false) + , grid(gridPtr) { + err = MPI_File_open(comm, const_cast(filename.c_str()), MPI_MODE_RDONLY, MPI_INFO_NULL, &fp); + assert(err == MPI_SUCCESS); + } + + virtual ~ParRdr() { MPI_File_close(&fp); } + + inline void errInfo(int const err, std::string const& func) { + static char estring[MPI_MAX_ERROR_STRING]; + int eclass = -1, len = 0; + MPI_Error_class(err, &eclass); + MPI_Error_string(err, estring, &len); + std::cerr << func << " - Error " << eclass << ": " << estring << std::endl; + } + + int readHeader(FieldMetaData& field) { + assert((grid->_ndimension == Nd) && (Nd == 4)); + assert(Nc == 3); + + OpenQcdHeader header; + + readBlock(reinterpret_cast(&header), 0, sizeof(OpenQcdHeader), MPI_CHAR); + + header.plaq /= 3.; // TODO change this into normalizationfactor + + // sanity check (should trigger on endian issues) TODO remove? + assert(0 < header.Nt && header.Nt <= 1024); + assert(0 < header.Nx && header.Nx <= 1024); + assert(0 < header.Ny && header.Ny <= 1024); + assert(0 < header.Nz && header.Nz <= 1024); + + field.dimension[0] = header.Nx; + field.dimension[1] = header.Ny; + field.dimension[2] = header.Nz; + field.dimension[3] = header.Nt; + + for(int d = 0; d < Nd; d++) + assert(grid->FullDimensions()[d] == field.dimension[d]); + + field.plaquette = header.plaq; + + field.data_start = sizeof(OpenQcdHeader); + + return field.data_start; + } + + void readBlock(void* const dest, uint64_t const pos, uint64_t const nbytes, MPI_Datatype const datatype) { + err = MPI_File_read_at_all(fp, pos, dest, nbytes, datatype, &status); + errInfo(err, "MPI_File_read_at_all"); + // CHECK_VAR(err) + + int read = -1; + MPI_Get_count(&status, datatype, &read); + // CHECK_VAR(read) + assert(nbytes == (uint64_t)read); + assert(err == MPI_SUCCESS); + } + + void createTypes() { + constexpr int elem_size = Nd * 2 * 2 * Nc * Nc * sizeof(double); // 2_complex 2_fwdbwd + + err = MPI_Type_contiguous(elem_size, MPI_BYTE, &oddSiteType); assert(err == MPI_SUCCESS); + err = MPI_Type_commit(&oddSiteType); assert(err == MPI_SUCCESS); + + Coordinate const L = grid->GlobalDimensions(); + Coordinate const l = grid->LocalDimensions(); + Coordinate const i = grid->ThisProcessorCoor(); + + Coordinate sizes({L[2] / 2, L[1], L[0], L[3]}); + Coordinate subsizes({l[2] / 2, l[1], l[0], l[3]}); + Coordinate starts({i[2] * l[2] / 2, i[1] * l[1], i[0] * l[0], i[3] * l[3]}); + + err = MPI_Type_create_subarray(grid->_ndimension, &sizes[0], &subsizes[0], &starts[0], MPI_ORDER_FORTRAN, oddSiteType, &fileViewType); assert(err == MPI_SUCCESS); + err = MPI_Type_commit(&fileViewType); assert(err == MPI_SUCCESS); + } + + void freeTypes() { + err = MPI_Type_free(&fileViewType); assert(err == MPI_SUCCESS); + err = MPI_Type_free(&oddSiteType); assert(err == MPI_SUCCESS); + } + + bool readGauge(std::vector& domain_buff, FieldMetaData& meta) { + auto hdr_offset = readHeader(meta); + CHECK + createTypes(); + err = MPI_File_set_view(fp, hdr_offset, oddSiteType, fileViewType, "native", MPI_INFO_NULL); errInfo(err, "MPI_File_set_view0"); assert(err == MPI_SUCCESS); + CHECK + int const domainSites = grid->lSites(); + domain_buff.resize(Nd * domainSites); // 2_fwdbwd * 4_Nd * domainSites / 2_onlyodd + + // the actual READ + constexpr uint64_t cm_size = 2 * Nc * Nc * sizeof(double); // 2_complex + constexpr uint64_t os_size = Nd * 2 * cm_size; // 2_fwdbwd + constexpr uint64_t max_elems = std::numeric_limits::max(); // int adressable elems: floor is fine + uint64_t const n_os = domainSites / 2; + + for(uint64_t os_idx = 0; os_idx < n_os;) { + uint64_t const read_os = os_idx + max_elems <= n_os ? max_elems : n_os - os_idx; + uint64_t const cm = os_idx * Nd * 2; + readBlock(&(domain_buff[cm]), os_idx, read_os, oddSiteType); + os_idx += read_os; + } + + CHECK + err = MPI_File_set_view(fp, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); + errInfo(err, "MPI_File_set_view1"); + assert(err == MPI_SUCCESS); + freeTypes(); + + std::cout << GridLogMessage << "read sum: " << n_os * os_size << " bytes" << std::endl; + return true; + } +}; + +class OpenQcdIOChromaReference : public BinaryIO { +public: + template + static inline void readConfiguration(Lattice>& Umu, + Grid::FieldMetaData& header, + std::string file) { + typedef Lattice> DoubledGaugeField; + + assert(Ns == 4 and Nd == 4 and Nc == 3); + + auto grid = Umu.Grid(); + + typedef ColourMatrixD fobj; + + std::vector iodata( + Nd * grid->lSites()); // actual size = 2*Nd*lsites but have only lsites/2 sites in file + + { + ParRdr rdr(MPI_COMM_WORLD, file, grid); + rdr.readGauge(iodata, header); + } // equivalent to using binaryio + + std::vector> Umu_ds_scalar(grid->lSites()); + + copyToLatticeObject(Umu_ds_scalar, iodata, grid); // equivalent to munging + + DoubledGaugeField Umu_ds(grid); + + vectorizeFromLexOrdArray(Umu_ds_scalar, Umu_ds); + + redistribute(Umu, Umu_ds); // equivalent to undoDoublestore + + FieldMetaData clone(header); + + GaugeStatistics(Umu, clone); + + RealD plaq_diff = fabs(clone.plaquette - header.plaquette); + + // clang-format off + std::cout << GridLogMessage << "OpenQcd Configuration " << file + << " plaquette " << clone.plaquette + << " header " << header.plaquette + << " difference " << plaq_diff + << std::endl; + // clang-format on + + RealD precTol = (getPrecision::value == 1) ? 2e-7 : 2e-15; + RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code + + if(plaq_diff >= tol) + std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl; + assert(plaq_diff < tol); + + std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl; + } + +private: + template + static inline void redistribute(Lattice>& Umu, + Lattice> const& Umu_ds) { + Grid::conformable(Umu.Grid(), Umu_ds.Grid()); + Lattice> U(Umu.Grid()); + + U = PeekIndex(Umu_ds, 2) + Cshift(PeekIndex(Umu_ds, 3), 0, +1); PokeIndex(Umu, U, 0); + U = PeekIndex(Umu_ds, 4) + Cshift(PeekIndex(Umu_ds, 5), 1, +1); PokeIndex(Umu, U, 1); + U = PeekIndex(Umu_ds, 6) + Cshift(PeekIndex(Umu_ds, 7), 2, +1); PokeIndex(Umu, U, 2); + U = PeekIndex(Umu_ds, 0) + Cshift(PeekIndex(Umu_ds, 1), 3, +1); PokeIndex(Umu, U, 3); + } + + static inline void copyToLatticeObject(std::vector& u_fb, + std::vector const& node_buff, + GridBase* grid) { + assert(node_buff.size() == Nd * grid->lSites()); + + Coordinate const& l = grid->LocalDimensions(); + + Coordinate coord(Nd); + int& x = coord[0]; + int& y = coord[1]; + int& z = coord[2]; + int& t = coord[3]; + + int buff_idx = 0; + for(t = 0; t < l[3]; ++t) // IMPORTANT: openQCD file ordering + for(x = 0; x < l[0]; ++x) + for(y = 0; y < l[1]; ++y) + for(z = 0; z < l[2]; ++z) { + if((t + z + y + x) % 2 == 0) continue; + + int local_idx; + Lexicographic::IndexFromCoor(coord, local_idx, grid->LocalDimensions()); + for(int mu = 0; mu < 2 * Nd; ++mu) + for(int c1 = 0; c1 < Nc; ++c1) { + for(int c2 = 0; c2 < Nc; ++c2) { + u_fb[local_idx](mu)()(c1,c2) = node_buff[mu+buff_idx]()()(c1,c2); + } + } + buff_idx += 2 * Nd; + } + + assert(node_buff.size() == buff_idx); + } +}; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/hmc/HMC_aggregate.h b/Grid/qcd/hmc/HMC_aggregate.h index e4d2ce83..cb510953 100644 --- a/Grid/qcd/hmc/HMC_aggregate.h +++ b/Grid/qcd/hmc/HMC_aggregate.h @@ -39,6 +39,10 @@ directory #include #include #include +#include +#if !defined(GRID_COMMS_NONE) +#include +#endif NAMESPACE_CHECK(Ildg); #include diff --git a/Grid/serialisation/BaseIO.h b/Grid/serialisation/BaseIO.h index a9147c49..bf424fc7 100644 --- a/Grid/serialisation/BaseIO.h +++ b/Grid/serialisation/BaseIO.h @@ -97,6 +97,23 @@ namespace Grid { template struct is_tensor_variable : public std::false_type {}; template struct is_tensor_variable::value && !is_tensor_fixed::value>::type> : public std::true_type {}; + + // Helper functions to get the ultimate scalar inside a tensor, and corresponding size + template + inline typename std::enable_if::value, const typename ET::Index>::type + getScalarCount(const ET &eigenTensor) { return eigenTensor.size() * Traits::count; } + template + inline typename std::enable_if::value, const typename ET::Scalar *>::type + getFirstScalar(const ET &eigenTensor) { return eigenTensor.data(); } + template + inline typename std::enable_if::value, typename ET::Scalar *>::type + getFirstScalar(ET &eigenTensor) { return eigenTensor.data(); } + template + inline typename std::enable_if::value, const typename Traits::scalar_type *>::type + getFirstScalar(const ET &eigenTensor) { return eigenTensor.data()->begin(); } + template + inline typename std::enable_if::value, typename Traits::scalar_type *>::type + getFirstScalar(ET &eigenTensor) { return eigenTensor.data()->begin(); } } // Abstract writer/reader classes //////////////////////////////////////////// @@ -128,23 +145,6 @@ namespace Grid { typename std::enable_if::value>::type write(const std::string &s, const ETensor &output); - // Helper functions for Scalar vs Container specialisations - template - inline typename std::enable_if::value, - const typename ETensor::Scalar *>::type - getFirstScalar(const ETensor &output) - { - return output.data(); - } - - template - inline typename std::enable_if::value, - const typename EigenIO::Traits::scalar_type *>::type - getFirstScalar(const ETensor &output) - { - return output.data()->begin(); - } - template inline typename std::enable_if::value, void>::type copyScalars(S * &pCopy, const S &Source) @@ -318,12 +318,12 @@ namespace Grid { TotalDims[TensorRank + i] = Traits::Dimension(i); // If the Tensor isn't in Row-Major order, then we'll need to copy it's data - const bool CopyData{NumElements > 1 && ETensor::Layout != Eigen::StorageOptions::RowMajor}; + const bool CopyData{NumElements > 1 && static_cast( ETensor::Layout ) != static_cast( Eigen::StorageOptions::RowMajor )}; const Scalar * pWriteBuffer; std::vector CopyBuffer; const Index TotalNumElements = NumElements * Traits::count; if( !CopyData ) { - pWriteBuffer = getFirstScalar( output ); + pWriteBuffer = EigenIO::getFirstScalar( output ); } else { // Regardless of the Eigen::Tensor storage order, the copy will be Row Major CopyBuffer.resize( TotalNumElements ); diff --git a/Hadrons/Modules/MContraction/A2AMesonField.hpp b/Hadrons/Modules/MContraction/A2AMesonField.hpp index d3f90959..cf1b9415 100644 --- a/Hadrons/Modules/MContraction/A2AMesonField.hpp +++ b/Hadrons/Modules/MContraction/A2AMesonField.hpp @@ -231,6 +231,11 @@ void TA2AMesonField::execute(void) int block = par().block; int cacheBlock = par().cacheBlock; + if (N_i < block || N_j < block) + { + HADRONS_ERROR(Range, "blockSize must not exceed size of input vector."); + } + LOG(Message) << "Computing all-to-all meson fields" << std::endl; LOG(Message) << "Left: '" << par().left << "' Right: '" << par().right << "'" << std::endl; LOG(Message) << "Momenta:" << std::endl; diff --git a/Hadrons/Modules/MDistil/LapEvec.hpp b/Hadrons/Modules/MDistil/LapEvec.hpp index 9b5197c2..3c1122ca 100644 --- a/Hadrons/Modules/MDistil/LapEvec.hpp +++ b/Hadrons/Modules/MDistil/LapEvec.hpp @@ -263,6 +263,11 @@ void TLapEvec::execute(void) const int Ntlocal{gridHD->LocalDimensions()[Tdir]}; const int Ntfirst{gridHD->LocalStarts()[Tdir]}; uint32_t ConvergenceErrors{0}; + const int NtFull{env().getDim(Tdir)}; + TimesliceEvals Evals{ NtFull, LPar.Nvec }; + for (int t = 0; t < NtFull; t++) + for (int v = 0; v < LPar.Nvec; v++) + Evals.tensor( t, v ) = 0; for (int t = 0; t < Ntlocal; t++ ) { LOG(Message) << "------------------------------------------------------------" << std::endl; @@ -302,6 +307,8 @@ void TLapEvec::execute(void) InsertSliceLocal(eig[t].evec[i],eig4d.evec[i],0,t,Tdir); if(t==0 && Ntfirst==0) eig4d.eval[i] = eig[t].eval[i]; // TODO: Discuss: is this needed? Is there a better way? + if(gridLD->IsBoss()) // Only do this on one node per timeslice, so a global sum will work + Evals.tensor(t + Ntfirst,i) = eig[t].eval[i]; } } GridLogIRL.Active( PreviousIRLLogState ); @@ -322,9 +329,21 @@ void TLapEvec::execute(void) sOperatorXml.append( b->parString() ); sOperatorXml.append( "" ); eig4d.record.operatorXml = sOperatorXml; - sEigenPackName.append("."); - sEigenPackName.append(std::to_string(vm().getTrajectory())); + sEigenPackName.append(1, '.'); + std::size_t NameLen{ sEigenPackName.length() }; + const std::string sTrajNum{std::to_string(vm().getTrajectory())}; + sEigenPackName.append(sTrajNum); eig4d.write(sEigenPackName,false); + // Communicate eig[t].evec to boss-node, save into new object evecs + gridHD->GlobalSumVector(EigenIO::getFirstScalar(Evals.tensor), + static_cast(EigenIO::getScalarCount(Evals.tensor))); + if(gridHD->IsBoss()) + { + sEigenPackName.resize(NameLen); + sEigenPackName.append("evals."); + sEigenPackName.append(sTrajNum); + Evals.write( sEigenPackName ); + } } } diff --git a/Hadrons/Modules/MDistil/Perambulator.cc b/Hadrons/Modules/MDistil/Perambulator.cc index e710698e..0cd8e5e0 100644 --- a/Hadrons/Modules/MDistil/Perambulator.cc +++ b/Hadrons/Modules/MDistil/Perambulator.cc @@ -34,6 +34,7 @@ using namespace Hadrons; using namespace MDistil; template class Grid::Hadrons::MDistil::TPerambulator; +template class Grid::Hadrons::MDistil::TPerambulator; BEGIN_HADRONS_NAMESPACE @@ -53,5 +54,8 @@ const std::array NoiseTensor::DefaultIndexNames__{"nNoise", "nT" const std::string PerambTensor::Name__{"Perambulator"}; const std::array PerambTensor::DefaultIndexNames__{"nT", "nVec", "LI", "nNoise", "nT_inv", "SI"}; +const std::string TimesliceEvals::Name__{"TimesliceEigenValues"}; +const std::array TimesliceEvals::DefaultIndexNames__{"nT", "nVec"}; + END_MODULE_NAMESPACE END_HADRONS_NAMESPACE diff --git a/Hadrons/Modules/MDistil/Perambulator.hpp b/Hadrons/Modules/MDistil/Perambulator.hpp index fb9d16dc..b59d6c51 100644 --- a/Hadrons/Modules/MDistil/Perambulator.hpp +++ b/Hadrons/Modules/MDistil/Perambulator.hpp @@ -74,6 +74,7 @@ protected: }; MODULE_REGISTER_TMP(Perambulator, TPerambulator, MDistil); +MODULE_REGISTER_TMP(ZPerambulator, TPerambulator, MDistil); /****************************************************************************** * TPerambulator implementation * @@ -89,10 +90,22 @@ std::vector TPerambulator::getInput(void) return {par().lapevec, par().solver, par().noise, par().DistilParams}; } +static const std::string UnsmearedSink{ "_unsmeared_sink" }; + template std::vector TPerambulator::getOutput(void) { - return {getName(), getName() + "_unsmeared_sink"}; + // Always return perambulator with name of module + std::string objName{ getName() }; + std::vector output{ objName }; + // If unsmeared sink is specified, then output that as well + const std::string UnsmearedSinkFileName{ par().UnsmearedSinkFileName }; + if( !UnsmearedSinkFileName.empty() ) + { + objName.append( UnsmearedSink ); + output.push_back( objName ); + } + return output; } // setup /////////////////////////////////////////////////////////////////////// @@ -105,9 +118,15 @@ void TPerambulator::setup(void) const bool full_tdil{ dp.TI == Nt }; const int Nt_inv{ full_tdil ? 1 : dp.TI }; - envCreate(PerambTensor, getName(), 1, Nt, dp.nvec, dp.LI, dp.nnoise, Nt_inv, dp.SI); - envCreate(std::vector, getName() + "_unsmeared_sink", 1, - dp.nnoise*dp.LI*Ns*Nt_inv, envGetGrid(FermionField)); + std::string objName{ getName() }; + envCreate(PerambTensor, objName, 1, Nt, dp.nvec, dp.LI, dp.nnoise, Nt_inv, dp.SI); + const std::string UnsmearedSinkFileName{ par().UnsmearedSinkFileName }; + if( !UnsmearedSinkFileName.empty() ) + { + objName.append( UnsmearedSink ); + envCreate(std::vector, objName, 1, dp.nnoise*dp.LI*Ns*Nt_inv, + envGetGrid(FermionField)); + } envTmpLat(LatticeSpinColourVector, "dist_source"); envTmpLat(LatticeSpinColourVector, "source4d"); @@ -139,9 +158,12 @@ void TPerambulator::execute(void) envGetTmp(FermionField, v5dtmp); envGetTmp(FermionField, v5dtmp_sol); auto &noise = envGet(NoiseTensor, par().noise); - auto &perambulator = envGet(PerambTensor, getName()); + std::string objName{ getName() }; + auto &perambulator = envGet(PerambTensor, objName); auto &epack = envGet(LapEvecs, par().lapevec); - auto &unsmeared_sink = envGet(std::vector, getName() + "_unsmeared_sink"); + objName.append( UnsmearedSink ); + const std::string UnsmearedSinkFileName{ par().UnsmearedSinkFileName }; + const bool bSaveUnsmearedSink( !UnsmearedSinkFileName.empty() ); envGetTmp(LatticeSpinColourVector, dist_source); envGetTmp(LatticeSpinColourVector, source4d); envGetTmp(LatticeSpinColourVector, source3d); @@ -153,7 +175,6 @@ void TPerambulator::execute(void) GridCartesian * const grid4d{ env().getGrid() }; // Owned by environment (so I won't delete it) const int Ntlocal{grid4d->LocalDimensions()[3]}; const int Ntfirst{grid4d->LocalStarts()[3]}; - const std::string UnsmearedSinkFileName{ par().UnsmearedSinkFileName }; for (int inoise = 0; inoise < dp.nnoise; inoise++) { @@ -197,8 +218,11 @@ void TPerambulator::execute(void) mat.ExportPhysicalFermionSolution(v5dtmp_sol, v4dtmp); result4d = v4dtmp; } - if (!UnsmearedSinkFileName.empty()) + if( bSaveUnsmearedSink ) + { + auto &unsmeared_sink = envGet(std::vector, objName); unsmeared_sink[inoise+dp.nnoise*(dk+dp.LI*(dt+Nt_inv*ds))] = result4d; + } for (int is = 0; is < Ns; is++) { result4d_nospin = peekSpin(result4d,is); @@ -250,9 +274,10 @@ void TPerambulator::execute(void) } //Save the unsmeared sinks if filename specified - if (!UnsmearedSinkFileName.empty()) + if (bSaveUnsmearedSink) { LOG(Message) << "Writing unsmeared sink to " << UnsmearedSinkFileName << std::endl; + auto &unsmeared_sink = envGet(std::vector, objName); A2AVectorsIo::write(UnsmearedSinkFileName, unsmeared_sink, false, vm().getTrajectory()); } } diff --git a/Hadrons/Modules/MSource/Wall.hpp b/Hadrons/Modules/MSource/Wall.hpp index ea26d6c9..d6a223cf 100644 --- a/Hadrons/Modules/MSource/Wall.hpp +++ b/Hadrons/Modules/MSource/Wall.hpp @@ -143,7 +143,7 @@ void TWall::execute(void) envGetTmp(LatticeComplex, coor); p = strToVec(par().mom); ph = Zero(); - for(unsigned int mu = 0; mu < env().getNd(); mu++) + for(unsigned int mu = 0; mu < p.size(); mu++) { LatticeCoordinate(coor, mu); ph = ph + (p[mu]/env().getDim(mu))*coor; diff --git a/Hadrons/NamedTensor.hpp b/Hadrons/NamedTensor.hpp index 954de2d8..53e10e04 100644 --- a/Hadrons/NamedTensor.hpp +++ b/Hadrons/NamedTensor.hpp @@ -159,9 +159,9 @@ using LapEvecs = Grid::Hadrons::EigenPack; class NoiseTensor : public NamedTensor { + public: static const std::string Name__; static const std::array DefaultIndexNames__; - public: // Default constructor (assumes tensor will be loaded from file) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NoiseTensor() : NamedTensor{Name__, DefaultIndexNames__} {} @@ -173,9 +173,9 @@ class NoiseTensor : public NamedTensor class PerambTensor : public NamedTensor { + public: static const std::string Name__; static const std::array DefaultIndexNames__; - public: // Default constructor (assumes tensor will be loaded from file) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PerambTensor() : NamedTensor{Name__, DefaultIndexNames__} {} @@ -185,6 +185,20 @@ class PerambTensor : public NamedTensor : NamedTensor{Name__, DefaultIndexNames__, nT, nVec, LI, nNoise, nT_inv, SI} {} }; +class TimesliceEvals : public NamedTensor +{ + public: + static const std::string Name__; + static const std::array DefaultIndexNames__; + // Default constructor (assumes tensor will be loaded from file) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TimesliceEvals() : NamedTensor{Name__, DefaultIndexNames__} {} + + // Construct a named tensor explicitly specifying size of each dimension + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TimesliceEvals(Eigen::Index nT, Eigen::Index nVec) + : NamedTensor{Name__, DefaultIndexNames__, nT, nVec} {} +}; + END_MODULE_NAMESPACE END_HADRONS_NAMESPACE #endif // Hadrons_NamedTensor_hpp_ diff --git a/documentation/GridXcode/readme.md b/documentation/GridXcode/readme.md index 8cbf9112..11deb6fa 100644 --- a/documentation/GridXcode/readme.md +++ b/documentation/GridXcode/readme.md @@ -39,8 +39,8 @@ These are the environment variables we will define for Grid: Variable | Typical Value | Use --- | --- | --- -`Grid` | `/Users/user_id/src/Grid` | Path to grid source -`GridPre` | `/Users/user_id/bin` | Path to install directory containing grid pre-requisites built from source +`Grid` | `$HOME/src/Grid` | Path to grid source +`GridPre` | `$HOME/.local` | Path to install directory containing grid pre-requisites built from source `GridPkg` | **MacPorts**=`/opt/local`, **Homebrew**=`/usr/local` | Path to package manager install directory Choose either of the following ways to do this, and when you're done, log out and in again. To check these have been set: @@ -54,7 +54,7 @@ Choose either of the following ways to do this, and when you're done, log out an ```apple script do shell script "launchctl setenv Grid $HOME/src/Grid -launchctl setenv GridPre $HOME/bin +launchctl setenv GridPre $HOME/.local launchctl setenv GridPkg /opt/local" ``` @@ -82,7 +82,7 @@ Make the file `environment.plist` in `~/Library/LaunchAgents` with the following sh -c launchctl setenv Grid $HOME/src/Grid -launchctl setenv GridPre $HOME/bin +launchctl setenv GridPre $HOME/.local launchctl setenv GridPkg /opt/local @@ -94,16 +94,18 @@ launchctl setenv GridPkg /opt/local ## 3. Install and build Open MPI -- ***optional*** -Download the latest version of [Open MPI][OMPI] version 3.1 (I used 3.1.3). - -NB: Grid does not have any dependencies on fortran, however many standard scientific packages do, so you may as well download GNU fortran (e.g. MacPorts ``gfortran`` package) and build Open MPI like so: +Download the latest version of [Open MPI][OMPI] version 3.1 (I used 3.1.5) and build it like so: [OMPI]: https://www.open-mpi.org/software/ompi/v3.1/ - ../configure CC=clang CXX=clang++ F77=gfortran FC=gfortran CXXFLAGS=-g --prefix=$GridPre/openmpi + ../configure CC=clang CXX=clang++ CXXFLAGS=-g --prefix=$GridPre/bin make -j 4 all install -(If you don't want to bother with fortran bindings, just don't include the F77 and FC flags) +***Note the `/bin` at the end of the prefix - this is required. As a quirk of the OpenMPI installer, `--prefix` must point to the `bin` subdirectory, with other files installed in `$GridPre/include`, `$GridPre/lib`, `$GridPre/share`, etc.*** + +Grid does not have any dependencies on fortran, however many standard scientific packages do, so you may wish to download GNU fortran (e.g. MacPorts ``gfortran`` package) and add the following to your configure invocation: + + F77=gfortran FC=gfortran ## 4. Install and build Grid pre-requisites @@ -121,17 +123,17 @@ These are the `portname`s for mandatory Grid libraries: * git-flow-avh * gmp +* hdf5 * mpfr and these are the `portname`s for optional Grid libraries: * fftw-3-single -* hdf5 * lapack * doxygen * OpenBLAS -***Please update this list with any packages I've missed! ... and double-check whether OpenBLAS is really for Grid*** +***Please update this list with any packages I've missed! ... and double-check whether OpenBLAS is really for Grid. NB: lapack doesn't seem to work. Should it be scalapack?*** ### 2. [Homebrew][Homebrew] @@ -149,7 +151,7 @@ There isn't currently a port for [C-LIME][C-LIME], so download the source and th [C-LIME]: https://usqcd-software.github.io/c-lime/ "C-language API for Lattice QCD Interchange Message Encapsulation / Large Internet Message Encapsulation" - ../configure --prefix=$GridPre/lime CC=clang + ../configure CC=clang --prefix=$GridPre make -j 4 all install ## 5. Install, Configure and Build Grid @@ -182,19 +184,19 @@ Below are shown the `configure` script invocations for three recommended configu This is the build for every day developing and debugging with Xcode. It uses the Xcode clang c++ compiler, without MPI, and defaults to double-precision. Xcode builds the `Debug` configuration with debug symbols for full debugging: - ../configure --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre/lime --enable-simd=GEN --enable-precision=double CXX=clang++ --prefix=$GridPre/GridDebug --enable-comms=none --enable-doxygen-doc + ../configure CXX=clang++ --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-precision=double --prefix=$GridPre/GridDebug --enable-comms=none #### 2. `Release` Since Grid itself doesn't really have debug configurations, the release build is recommended to be the same as `Debug`, except using single-precision (handy for validation): - ../configure --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre/lime --enable-simd=GEN --enable-precision=single CXX=clang++ --prefix=$GridPre/GridRelease --enable-comms=none --enable-doxygen-doc + ../configure CXX=clang++ --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-precision=single --prefix=$GridPre/GridRelease --enable-comms=none #### 3. `MPIDebug` Debug configuration with MPI: - ../configure --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre/lime --enable-simd=GEN --enable-precision=double CXX=clang++ --prefix=$GridPre/GridMPIDebug --enable-comms=mpi-auto MPICXX=$GridPre/openmpi/bin/mpicxx --enable-doxygen-doc + ../configure CXX=clang++ --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-precision=double --prefix=$GridPre/GridMPIDebug --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx ### 5.3 Build Grid @@ -250,9 +252,9 @@ Obtain a list of header locations required by Grid by running the following from ./grid-config --cxxflags -Output should look similar to: +Output should look similar to (but will likely include duplicates): - -I$GridPre/openmpi/include -I$GridPkg/include -I$GridPre/lime/include -I$GridPkg/include -I$GridPkg/include -I$GridPkg/include -O3 -g -std=c++11 + -I$GridPre/include -I$GridPkg/include -O3 -g -std=c++11 The header locations follow the `-I` switches. You can ignore the other switches, and you can ignore duplicate entries, which just mean that your package manager has installed multiple packages in the same location. @@ -265,13 +267,12 @@ Set HEADER_SEARCH_PATHS to: followed by (***the order is important***) the locations reported by `grid-config --cxxflags`, ignoring duplicates, e.g.: - $GridPre/openmpi/include + $GridPre/include $GridPkg/include - $GridPre/lime/include - + **Note: the easiest way to set this value is to put it all on one line, space separated, and edit the text to the right of `HEADER_SEARCH_PATHS`**, i.e.: - $Grid/build$(CONFIGURATION)/Grid $Grid $GridPre/openmpi/include $GridPkg/include $GridPre/lime/include + $Grid/build$(CONFIGURATION)/Grid $Grid $GridPre/include $GridPkg/include #### LIBRARY_SEARCH_PATHS @@ -279,13 +280,13 @@ Obtain a list of library locations required by Grid by running the following fro ./grid-config --ldflags -Output should look similar to: +Output should look similar to (but will likely include duplicates): - -L$GridPre/openmpi/lib -L$GridPkg/lib -L$GridPre/lime/lib -L$GridPkg/lib -L$GridPkg/lib -L$GridPkg/lib + -L$GridPre/lib -L$GridPkg/lib Paste the output ***with `$Grid/build$(CONFIGURATION)/Grid $Grid/build$(CONFIGURATION)/Hadrons ` prepended*** into `LIBRARY_SEARCH_PATHS`: - $Grid/build$(CONFIGURATION)/Grid $Grid/build$(CONFIGURATION)/Hadrons $GridPre/openmpi/lib $GridPkg/lib $GridPre/lime/lib + $Grid/build$(CONFIGURATION)/Grid $Grid/build$(CONFIGURATION)/Hadrons $GridPre/lib $GridPkg/lib ### 2. Linking @@ -369,7 +370,7 @@ Instead: 3. From a terminal session, locate and run your executable using `mpirun` (*the mangled name of the project build products will not be exactly the same as this example*): - `$GridPre/openmpi/bin/mpirun -np 2 ~/Library/Developer/Xcode/DerivedData/HelloGrid-fiyyuveptaqelbbvllomcgjyvghr/Build/Products/Debug/HelloGrid --grid 4.4.4.8 --mpi 1.1.1.2` + `$GridPre/bin/mpirun -np 2 ~/Library/Developer/Xcode/DerivedData/HelloGrid-fiyyuveptaqelbbvllomcgjyvghr/Build/Products/Debug/HelloGrid --grid 4.4.4.8 --mpi 1.1.1.2` The Xcode debugger will attach to the first process. diff --git a/tests/IO/Test_openqcd_io.cc b/tests/IO/Test_openqcd_io.cc new file mode 100644 index 00000000..83b498c2 --- /dev/null +++ b/tests/IO/Test_openqcd_io.cc @@ -0,0 +1,84 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/io/Test_openqcd_io.cc + +Copyright (C) 2015 - 2020 + +Author: Daniel Richtmann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +#if defined(GRID_COMMS_NONE) +#error This test requires Grid compiled with MPI +#endif + +using namespace Grid; + +int main(int argc, char** argv) { + Grid_init(&argc, &argv); + + auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd()); + auto mpi_layout = GridDefaultMpi(); + auto latt_size = GridDefaultLatt(); + + GridCartesian grid(latt_size, simd_layout, mpi_layout); + + GridParallelRNG pRNG(&grid); + + pRNG.SeedFixedIntegers(std::vector({45, 12, 81, 9})); + + LatticeGaugeField Umu_ref(&grid); + LatticeGaugeField Umu_me(&grid); + LatticeGaugeField Umu_diff(&grid); + + FieldMetaData header_ref; + FieldMetaData header_me; + + Umu_ref = Zero(); + Umu_me = Zero(); + + std::string file("/home/daniel/configs/openqcd/test_16x8_pbcn6"); + + if(GridCmdOptionExists(argv, argv + argc, "--config")) { + file = GridCmdOptionPayload(argv, argv + argc, "--config"); + std::cout << "file: " << file << std::endl; + assert(!file.empty()); + } + + OpenQcdIOChromaReference::readConfiguration(Umu_ref, header_ref, file); + OpenQcdIO::readConfiguration(Umu_me, header_me, file); + + std::cout << GridLogMessage << header_ref << std::endl; + std::cout << GridLogMessage << header_me << std::endl; + + Umu_diff = Umu_ref - Umu_me; + + // clang-format off + std::cout << GridLogMessage + << "norm2(Umu_ref) = " << norm2(Umu_ref) + << " norm2(Umu_me) = " << norm2(Umu_me) + << " norm2(Umu_diff) = " << norm2(Umu_diff) << std::endl; + // clang-format on + + Grid_finalize(); +}