diff --git a/.gitignore b/.gitignore index dc59879f..49295fc6 100644 --- a/.gitignore +++ b/.gitignore @@ -123,6 +123,7 @@ make-bin-BUCK.sh ##################### lib/qcd/spin/gamma-gen/*.h lib/qcd/spin/gamma-gen/*.cc +lib/version.h # vs code editor files # ######################## diff --git a/Makefile.am b/Makefile.am index 3a65cf1b..d507bf08 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,6 +5,10 @@ include $(top_srcdir)/doxygen.inc bin_SCRIPTS=grid-config +BUILT_SOURCES = version.h + +version.h: + echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d\\"%n" HEAD`" > $(srcdir)/lib/version.h .PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) diff --git a/configure.ac b/configure.ac index 3a6a2960..aced6a9c 100644 --- a/configure.ac +++ b/configure.ac @@ -340,7 +340,7 @@ case ${ac_PRECISION} in esac ###################### Shared memory allocation technique under MPI3 -AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|hugetlbfs], +AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|hugetlbfs|shmnone], [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) case ${ac_SHM} in @@ -349,6 +349,10 @@ case ${ac_SHM} in AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) ;; + shmnone) + AC_DEFINE([GRID_MPI3_SHM_NONE],[1],[GRID_MPI3_SHM_NONE] ) + ;; + hugetlbfs) AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] ) ;; diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h index b541a3be..72fe9fe0 100644 --- a/lib/algorithms/Algorithms.h +++ b/lib/algorithms/Algorithms.h @@ -39,6 +39,7 @@ Author: Peter Boyle #include #include +#include #include #include #include diff --git a/lib/algorithms/iterative/Deflation.h b/lib/algorithms/iterative/Deflation.h new file mode 100644 index 00000000..b2239c55 --- /dev/null +++ b/lib/algorithms/iterative/Deflation.h @@ -0,0 +1,101 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_DEFLATION_H +#define GRID_DEFLATION_H + +namespace Grid { + +struct ZeroGuesser { +public: + template + void operator()(const Field &src,Field &guess) { guess = Zero(); }; +}; +struct SourceGuesser { +public: + template + void operator()(const Field &src,Field &guess) { guess = src; }; +}; + +//////////////////////////////// +// Fine grid deflation +//////////////////////////////// +template +struct DeflatedGuesser { +private: + const std::vector &evec; + const std::vector &eval; + +public: + + DeflatedGuesser(const std::vector & _evec,const std::vector & _eval) : evec(_evec), eval(_eval) {}; + + void operator()(const Field &src,Field &guess) { + guess = zero; + assert(evec.size()==eval.size()); + auto N = evec.size(); + for (int i=0;i +class LocalCoherenceDeflatedGuesser { +private: + const std::vector &subspace; + const std::vector &evec_coarse; + const std::vector &eval_coarse; +public: + + LocalCoherenceDeflatedGuesser(const std::vector &_subspace, + const std::vector &_evec_coarse, + const std::vector &_eval_coarse) + : subspace(_subspace), + evec_coarse(_evec_coarse), + eval_coarse(_eval_coarse) + { + } + + void operator()(const FineField &src,FineField &guess) { + int N = (int)evec_coarse.size(); + CoarseField src_coarse(evec_coarse[0]._grid); + CoarseField guess_coarse(evec_coarse[0]._grid); guess_coarse = zero; + blockProject(src_coarse,src,subspace); + for (int i=0;i & _v,std::vector& sort_vals, boo basisReorderInPlace(_v,sort_vals,idx); } -// PAB: faster to compute the inner products first then fuse loops. -// If performance critical can improve. -template -void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { - result = zero; - assert(_v.size()==eval.size()); - int N = (int)_v.size(); - for (int i=0;i class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester { public: + LinearFunction &_HermOp; ImplicitlyRestartedLanczosHermOpTester(LinearFunction &HermOp) : _HermOp(HermOp) { }; int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox) @@ -243,6 +231,7 @@ class ImplicitlyRestartedLanczos { ///////////////////////// public: + ////////////////////////////////////////////////////////////////// // PAB: ////////////////////////////////////////////////////////////////// diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/lib/algorithms/iterative/LocalCoherenceLanczos.h index d5d1bbc2..b8348c0c 100644 --- a/lib/algorithms/iterative/LocalCoherenceLanczos.h +++ b/lib/algorithms/iterative/LocalCoherenceLanczos.h @@ -28,7 +28,10 @@ Author: paboyle /* END LEGAL */ #ifndef GRID_LOCAL_COHERENCE_IRL_H #define GRID_LOCAL_COHERENCE_IRL_H + namespace Grid { + + struct LanczosParams : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, @@ -70,21 +73,24 @@ public: typedef Lattice FineField; LinearOperatorBase &_Linop; - Aggregation &_Aggregate; + std::vector &subspace; - ProjectedHermOp(LinearOperatorBase& linop, Aggregation &aggregate) : - _Linop(linop), - _Aggregate(aggregate) { }; + ProjectedHermOp(LinearOperatorBase& linop, std::vector & _subspace) : + _Linop(linop), subspace(_subspace) + { + assert(subspace.size() >0); + }; void operator()(const CoarseField& in, CoarseField& out) { + GridBase *FineGrid = subspace[0]._grid; + int checkerboard = subspace[0].checkerboard; + + FineField fin (FineGrid); fin.checkerboard= checkerboard; + FineField fout(FineGrid); fout.checkerboard = checkerboard; - GridBase *FineGrid = _Aggregate.FineGrid; - FineField fin(FineGrid); - FineField fout(FineGrid); - - _Aggregate.PromoteFromSubspace(in,fin); std::cout< & _poly; LinearOperatorBase &_Linop; - Aggregation &_Aggregate; + std::vector &subspace; - ProjectedFunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop, - Aggregation &aggregate) : + ProjectedFunctionHermOp(OperatorFunction & poly, + LinearOperatorBase& linop, + std::vector & _subspace) : _poly(poly), _Linop(linop), - _Aggregate(aggregate) { }; + subspace(_subspace) + { }; void operator()(const CoarseField& in, CoarseField& out) { - - GridBase *FineGrid = _Aggregate.FineGrid; - - FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; - FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; - _Aggregate.PromoteFromSubspace(in,fin); std::cout< & _Poly; OperatorFunction & _smoother; LinearOperatorBase &_Linop; - Aggregation &_Aggregate; - RealD _coarse_relax_tol; + RealD _coarse_relax_tol; + std::vector &_subspace; + ImplicitlyRestartedLanczosSmoothedTester(LinearFunction &Poly, OperatorFunction &smoother, LinearOperatorBase &Linop, - Aggregation &Aggregate, + std::vector &subspace, RealD coarse_relax_tol=5.0e3) - : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { }; + : _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace), + _coarse_relax_tol(coarse_relax_tol) + { }; int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) { CoarseField v(B); RealD eval_poly = eval; + // Apply operator _Poly(B,v); @@ -168,14 +181,13 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc } int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) { - GridBase *FineGrid = _Aggregate.FineGrid; - - int checkerboard = _Aggregate.checkerboard; - + GridBase *FineGrid = _subspace[0]._grid; + int checkerboard = _subspace[0].checkerboard; FineField fB(FineGrid);fB.checkerboard =checkerboard; FineField fv(FineGrid);fv.checkerboard =checkerboard; - _Aggregate.PromoteFromSubspace(B,fv); + blockPromote(B,fv,_subspace); + _smoother(_Linop,fv,fB); RealD eval_poly = eval; @@ -217,27 +229,65 @@ protected: int _checkerboard; LinearOperatorBase & _FineOp; - // FIXME replace Aggregation with vector of fine; the code reuse is too small for - // the hassle and complexity of cross coupling. - Aggregation _Aggregate; - std::vector evals_fine; - std::vector evals_coarse; - std::vector evec_coarse; + std::vector &evals_fine; + std::vector &evals_coarse; + std::vector &subspace; + std::vector &evec_coarse; + +private: + std::vector _evals_fine; + std::vector _evals_coarse; + std::vector _subspace; + std::vector _evec_coarse; + public: + LocalCoherenceLanczos(GridBase *FineGrid, - GridBase *CoarseGrid, - LinearOperatorBase &FineOp, - int checkerboard) : + GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) : _CoarseGrid(CoarseGrid), _FineGrid(FineGrid), - _Aggregate(CoarseGrid,FineGrid,checkerboard), _FineOp(FineOp), - _checkerboard(checkerboard) + _checkerboard(checkerboard), + evals_fine (_evals_fine), + evals_coarse(_evals_coarse), + subspace (_subspace), + evec_coarse(_evec_coarse) { evals_fine.resize(0); evals_coarse.resize(0); }; - void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } + ////////////////////////////////////////////////////////////////////////// + // Alternate constructore, external storage for use by Hadrons module + ////////////////////////////////////////////////////////////////////////// + LocalCoherenceLanczos(GridBase *FineGrid, + GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard, + std::vector &ext_subspace, + std::vector &ext_coarse, + std::vector &ext_eval_fine, + std::vector &ext_eval_coarse + ) : + _CoarseGrid(CoarseGrid), + _FineGrid(FineGrid), + _FineOp(FineOp), + _checkerboard(checkerboard), + evals_fine (ext_eval_fine), + evals_coarse(ext_eval_coarse), + subspace (ext_subspace), + evec_coarse (ext_coarse) + { + evals_fine.resize(0); + evals_coarse.resize(0); + }; + + void Orthogonalise(void ) { + CoarseScalar InnerProd(_CoarseGrid); + blockOrthogonalise(InnerProd,subspace);std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"< static RealD normalise(T& v) { @@ -246,43 +296,44 @@ public: v = v * (1.0/nn); return nn; } - + /* void fakeFine(void) { int Nk = nbasis; - _Aggregate.subspace.resize(Nk,_FineGrid); - _Aggregate.subspace[0]=1.0; - _Aggregate.subspace[0].checkerboard=_checkerboard; - normalise(_Aggregate.subspace[0]); + subspace.resize(Nk,_FineGrid); + subspace[0]=1.0; + subspace[0].checkerboard=_checkerboard; + normalise(subspace[0]); PlainHermOp Op(_FineOp); for(int k=1;k Op(_FineOp); ImplicitlyRestartedLanczosHermOpTester SimpleTester(Op); for(int k=0;k ChebySmooth(cheby_smooth); - ProjectedFunctionHermOp ChebyOp (ChebySmooth,_FineOp,_Aggregate); - ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + ProjectedFunctionHermOp ChebyOp (ChebySmooth,_FineOp,_subspace); + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax); for(int k=0;k Op(_FineOp); evals_fine.resize(Nm); - _Aggregate.subspace.resize(Nm,_FineGrid); + subspace.resize(Nm,_FineGrid); ImplicitlyRestartedLanczos IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; int Nconv; - IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); + IRL.calc(evals_fine,subspace,src,Nconv,false); // Shrink down to number saved assert(Nstop>=nbasis); assert(Nconv>=nbasis); evals_fine.resize(nbasis); - _Aggregate.subspace.resize(nbasis,_FineGrid); + subspace.resize(nbasis,_FineGrid); } void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, int Nstop, int Nk, int Nm,RealD resid, RealD MaxIt, RealD betastp, int MinRes) { Chebyshev Cheby(cheby_op); - ProjectedHermOp Op(_FineOp,_Aggregate); - ProjectedFunctionHermOp ChebyOp (Cheby,_FineOp,_Aggregate); + ProjectedHermOp Op(_FineOp,_subspace); + ProjectedFunctionHermOp ChebyOp (Cheby,_FineOp,_subspace); ////////////////////////////////////////////////////////////////////////////////////////////////// // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL ////////////////////////////////////////////////////////////////////////////////////////////////// Chebyshev ChebySmooth(cheby_smooth); - ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_subspace,relax); evals_coarse.resize(Nm); evec_coarse.resize(Nm,_CoarseGrid); diff --git a/lib/algorithms/iterative/SchurRedBlack.h b/lib/algorithms/iterative/SchurRedBlack.h index 5f5a8b66..fac2030f 100644 --- a/lib/algorithms/iterative/SchurRedBlack.h +++ b/lib/algorithms/iterative/SchurRedBlack.h @@ -107,7 +107,12 @@ namespace Grid { }; template - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out, Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -129,7 +134,6 @@ namespace Grid { pickCheckerboard(Odd ,src_o,in); pickCheckerboard(Even,sol_e,out); pickCheckerboard(Odd ,sol_o,out); - std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" < - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -225,6 +235,7 @@ namespace Grid { // Call the red-black solver ////////////////////////////////////////////////////////////// std::cout< - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -305,6 +321,7 @@ namespace Grid { ////////////////////////////////////////////////////////////// std::cout< - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -385,6 +407,7 @@ namespace Grid { std::cout< 1 && provided != MPI_THREAD_MULTIPLE) ) + assert(0); } Grid_quiesce_nodes(); + // Never clean up as done once. MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); GlobalSharedMemory::Init(communicator_world); @@ -85,9 +89,17 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &c CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { MPI_Comm optimal_comm; - GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine + //////////////////////////////////////////////////// + // Remap using the shared memory optimising routine + // The remap creates a comm which must be freed + //////////////////////////////////////////////////// + GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); InitFromMPICommunicator(processors,optimal_comm); SetCommunicator(optimal_comm); + /////////////////////////////////////////////////// + // Free the temp communicator + /////////////////////////////////////////////////// + MPI_Comm_free(&optimal_comm); } ////////////////////////////////// @@ -183,8 +195,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, } else { srank = 0; - comm_split = parent.communicator; - // std::cout << " Inherited communicator " < &processors, // Take the right SHM buffers ////////////////////////////////////////////////////////////////////////////////////////////////////// SetCommunicator(comm_split); + + /////////////////////////////////////////////// + // Free the temp communicator + /////////////////////////////////////////////// + MPI_Comm_free(&comm_split); if(0){ std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl; @@ -210,6 +227,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, void CartesianCommunicator::InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base) { + //////////////////////////////////////////////////// + // Creates communicator, and the communicator_halo + //////////////////////////////////////////////////// _ndimension = processors.size(); _processor_coor.resize(_ndimension); diff --git a/lib/communicator/SharedMemory.h b/lib/communicator/SharedMemory.h index 0f647dc6..9f6b1a25 100644 --- a/lib/communicator/SharedMemory.h +++ b/lib/communicator/SharedMemory.h @@ -133,6 +133,7 @@ class SharedMemory public: SharedMemory() {}; + ~SharedMemory(); /////////////////////////////////////////////////////////////////////////////////////// // set the buffers & sizes /////////////////////////////////////////////////////////////////////////////////////// diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc index 2a62b7ac..d534a6d9 100644 --- a/lib/communicator/SharedMemoryMPI.cc +++ b/lib/communicator/SharedMemoryMPI.cc @@ -27,6 +27,7 @@ Author: Peter Boyle /* END LEGAL */ #include +#include namespace Grid { @@ -226,6 +227,48 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) }; #endif // MMAP +#ifdef GRID_MPI3_SHM_NONE +void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) +{ + std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<pw_name,WorldNode,r); shm_unlink(shm_name); int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); @@ -281,7 +325,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) size_t size = bytes ; - sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r); + struct passwd *pw = getpwuid (getuid()); + sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r); int fd=shm_open(shm_name,O_RDWR,0666); if ( fd<0 ) { perror("failed shm_open"); assert(0); } @@ -399,5 +444,12 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) return (void *) remote; } } +SharedMemory::~SharedMemory() +{ + int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised); + if ( !MPI_is_finalised ) { + MPI_Comm_free(&ShmComm); + } +}; } diff --git a/lib/communicator/SharedMemoryNone.cc b/lib/communicator/SharedMemoryNone.cc index 7feed7e4..a23e3c1c 100644 --- a/lib/communicator/SharedMemoryNone.cc +++ b/lib/communicator/SharedMemoryNone.cc @@ -122,5 +122,7 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) { return NULL; } +SharedMemory::~SharedMemory() +{}; } diff --git a/lib/lattice/Lattice_comparison_utils.h b/lib/lattice/Lattice_comparison_utils.h index 14a19383..9580d4d2 100644 --- a/lib/lattice/Lattice_comparison_utils.h +++ b/lib/lattice/Lattice_comparison_utils.h @@ -198,7 +198,7 @@ namespace Grid { typedef typename vsimd::scalar_type scalar;\ return Comparison(functor(),lhs,rhs);\ }\ - template\ + template = 0>\ inline vInteger operator op(const iScalar &lhs,const iScalar &rhs)\ { \ return lhs._internal op rhs._internal; \ diff --git a/lib/lattice/Lattice_coordinate.h b/lib/lattice/Lattice_coordinate.h index 2e20ba17..19eceba8 100644 --- a/lib/lattice/Lattice_coordinate.h +++ b/lib/lattice/Lattice_coordinate.h @@ -52,23 +52,5 @@ namespace Grid { } }; - // LatticeCoordinate(); - // FIXME for debug; deprecate this; made obscelete by - template void lex_sites(Lattice &l){ - Real *v_ptr = (Real *)&l._odata[0]; - size_t o_len = l._grid->oSites(); - size_t v_len = sizeof(vobj)/sizeof(vRealF); - size_t vec_len = vRealF::Nsimd(); - - for(int i=0;i &in, Lattice &out) template void precisionChange(Lattice &out, const Lattice &in){ assert(out._grid->Nd() == in._grid->Nd()); + assert(out._grid->FullDimensions() == in._grid->FullDimensions()); out.checkerboard = in.checkerboard; GridBase *in_grid=in._grid; GridBase *out_grid = out._grid; diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index b40a75af..39acf0e0 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -91,7 +91,7 @@ class BinaryIO { typedef typename vobj::scalar_object sobj; GridBase *grid = lat._grid; - int lsites = grid->lSites(); + uint64_t lsites = grid->lSites(); std::vector scalardata(lsites); unvectorizeToLexOrdArray(scalardata,lat); @@ -160,7 +160,9 @@ class BinaryIO { /* * Scidac csum is rather more heavyweight + * FIXME -- 128^3 x 256 x 16 will overflow. */ + int global_site; Lexicographic::CoorFromIndex(coor,local_site,local_vol); @@ -261,7 +263,7 @@ class BinaryIO { GridBase *grid, std::vector &iodata, std::string file, - Integer offset, + uint64_t offset, const std::string &format, int control, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -523,7 +525,7 @@ class BinaryIO { static inline void readLatticeObject(Lattice &Umu, std::string file, munger munge, - Integer offset, + uint64_t offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -533,7 +535,7 @@ class BinaryIO { typedef typename vobj::Realified::scalar_type word; word w=0; GridBase *grid = Umu._grid; - int lsites = grid->lSites(); + uint64_t lsites = grid->lSites(); std::vector scalardata(lsites); std::vector iodata(lsites); // Munge, checksum, byte order in here @@ -544,7 +546,7 @@ class BinaryIO { GridStopWatch timer; timer.Start(); - parallel_for(int x=0;xBarrier(); @@ -560,7 +562,7 @@ class BinaryIO { static inline void writeLatticeObject(Lattice &Umu, std::string file, munger munge, - Integer offset, + uint64_t offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -569,7 +571,7 @@ class BinaryIO { typedef typename vobj::scalar_object sobj; typedef typename vobj::Realified::scalar_type word; word w=0; GridBase *grid = Umu._grid; - int lsites = grid->lSites(); + uint64_t lsites = grid->lSites(); std::vector scalardata(lsites); std::vector iodata(lsites); // Munge, checksum, byte order in here @@ -580,7 +582,7 @@ class BinaryIO { GridStopWatch timer; timer.Start(); unvectorizeToLexOrdArray(scalardata,Umu); - parallel_for(int x=0;xBarrier(); timer.Stop(); @@ -597,7 +599,7 @@ class BinaryIO { static inline void readRNG(GridSerialRNG &serial, GridParallelRNG ¶llel, std::string file, - Integer offset, + uint64_t offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb) @@ -610,8 +612,8 @@ class BinaryIO { std::string format = "IEEE32BIG"; GridBase *grid = parallel._grid; - int gsites = grid->gSites(); - int lsites = grid->lSites(); + uint64_t gsites = grid->gSites(); + uint64_t lsites = grid->lSites(); uint32_t nersc_csum_tmp = 0; uint32_t scidac_csuma_tmp = 0; @@ -626,7 +628,7 @@ class BinaryIO { nersc_csum,scidac_csuma,scidac_csumb); timer.Start(); - parallel_for(int lidx=0;lidx tmp(RngStateCount); std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin()); parallel.SetState(tmp,lidx); @@ -659,7 +661,7 @@ class BinaryIO { static inline void writeRNG(GridSerialRNG &serial, GridParallelRNG ¶llel, std::string file, - Integer offset, + uint64_t offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb) @@ -670,8 +672,8 @@ class BinaryIO { typedef std::array RNGstate; GridBase *grid = parallel._grid; - int gsites = grid->gSites(); - int lsites = grid->lSites(); + uint64_t gsites = grid->gSites(); + uint64_t lsites = grid->lSites(); uint32_t nersc_csum_tmp; uint32_t scidac_csuma_tmp; @@ -684,7 +686,7 @@ class BinaryIO { timer.Start(); std::vector iodata(lsites); - parallel_for(int lidx=0;lidx tmp(RngStateCount); parallel.GetState(tmp,lidx); std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin()); diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index b86e250f..b81d1e43 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -337,6 +337,20 @@ class GridLimeWriter : public BinaryIO { template void writeLimeLatticeBinaryObject(Lattice &field,std::string record_name) { + //////////////////////////////////////////////////////////////////// + // NB: FILE and iostream are jointly writing disjoint sequences in the + // the same file through different file handles (integer units). + // + // These are both buffered, so why I think this code is right is as follows. + // + // i) write record header to FILE *File, telegraphing the size; flush + // ii) ftello reads the offset from FILE *File . + // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. + // Closes iostream and flushes. + // iv) fseek on FILE * to end of this disjoint section. + // v) Continue writing scidac record. + //////////////////////////////////////////////////////////////////// + //////////////////////////////////////////// // Create record header //////////////////////////////////////////// @@ -350,25 +364,24 @@ class GridLimeWriter : public BinaryIO { // std::cout << "W Gsites " <_gsites<(); BinarySimpleMunger munge; - BinaryIO::writeLatticeObject(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); - // fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb); + + /////////////////////////////////////////// + // Wind forward and close the record + /////////////////////////////////////////// + fseek(File,0,SEEK_END); + uint64_t offset2 = ftello(File); // std::cout << " now at offset "<=0); //////////////////////////////////////// @@ -568,7 +581,6 @@ class IldgWriter : public ScidacWriter { writeLimeIldgLFN(header.ildg_lfn); // rec writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum // limeDestroyWriter(LimeW); - fclose(File); } }; diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index 786839f2..e2c2bc39 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -57,7 +57,7 @@ namespace Grid { // for the header-reader static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field) { - int offset=0; + uint64_t offset=0; std::map header; std::string line; @@ -139,7 +139,7 @@ namespace Grid { typedef Lattice > GaugeField; GridBase *grid = Umu._grid; - int offset = readHeader(file,Umu._grid,header); + uint64_t offset = readHeader(file,Umu._grid,header); FieldMetaData clone(header); @@ -236,7 +236,7 @@ namespace Grid { GaugeStatistics(Umu,header); MachineCharacteristics(header); - int offset; + uint64_t offset; truncate(file); @@ -278,7 +278,7 @@ namespace Grid { header.plaquette=0.0; MachineCharacteristics(header); - int offset; + uint64_t offset; #ifdef RNG_RANLUX header.floating_point = std::string("UINT64"); @@ -313,7 +313,7 @@ namespace Grid { GridBase *grid = parallel._grid; - int offset = readHeader(file,grid,header); + uint64_t offset = readHeader(file,grid,header); FieldMetaData clone(header); diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index eace6484..e053b98c 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -73,7 +73,7 @@ void CayleyFermion5D::DminusDag(const FermionField &psi, FermionField &chi this->DW(psi,tmp_f,DaggerYes); for(int s=0;s { RealD factor = 0.5 * beta / RealD(Nc); - //GaugeLinkField Umu(U._grid); + GaugeLinkField Umu(U._grid); GaugeLinkField dSdU_mu(U._grid); for (int mu = 0; mu < Nd; mu++) { - //Umu = PeekIndex(U, mu); + Umu = PeekIndex(U, mu); // Staple in direction mu - //WilsonLoops::Staple(dSdU_mu, U, mu); - //dSdU_mu = Ta(Umu * dSdU_mu) * factor; - - - WilsonLoops::StapleMult(dSdU_mu, U, mu); - dSdU_mu = Ta(dSdU_mu) * factor; + WilsonLoops::Staple(dSdU_mu, U, mu); + dSdU_mu = Ta(Umu * dSdU_mu) * factor; PokeIndex(dSdU, dSdU_mu, mu); } diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index cdd76ecc..6cf34e0c 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -212,6 +212,7 @@ public: // For the force term +/* static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { GridBase *grid = Umu._grid; std::vector U(Nd, grid); @@ -225,7 +226,7 @@ static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { for (int nu = 0; nu < Nd; nu++) { if (nu != mu) { - // this is ~10% faster than the Staple + // this is ~10% faster than the Staple -- PAB: so what it gives the WRONG answers for other BC's! tmp1 = Cshift(U[nu], mu, 1); tmp2 = Cshift(U[mu], nu, 1); staple += tmp1* adj(U[nu]*tmp2); @@ -235,7 +236,7 @@ static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { } staple = U[mu]*staple; } - +*/ ////////////////////////////////////////////////// // the sum over all staples on each site ////////////////////////////////////////////////// diff --git a/lib/serialisation/BaseIO.h b/lib/serialisation/BaseIO.h index 24e1cec7..c9b3fb9e 100644 --- a/lib/serialisation/BaseIO.h +++ b/lib/serialisation/BaseIO.h @@ -31,113 +31,10 @@ Author: Guido Cossu #define GRID_SERIALISATION_ABSTRACT_READER_H #include +#include +#include namespace Grid { - // Vector IO utilities /////////////////////////////////////////////////////// - // helper function to read space-separated values - template - std::vector strToVec(const std::string s) - { - std::istringstream sstr(s); - T buf; - std::vector v; - - while(!sstr.eof()) - { - sstr >> buf; - v.push_back(buf); - } - - return v; - } - - // output to streams for vectors - template < class T > - inline std::ostream & operator<<(std::ostream &os, const std::vector &v) - { - os << "["; - for (auto &x: v) - { - os << x << " "; - } - if (v.size() > 0) - { - os << "\b"; - } - os << "]"; - - return os; - } - - // Vector element trait ////////////////////////////////////////////////////// - template - struct element - { - typedef T type; - static constexpr bool is_number = false; - }; - - template - struct element> - { - typedef typename element::type type; - static constexpr bool is_number = std::is_arithmetic::value - or is_complex::value - or element::is_number; - }; - - // Vector flattening utility class //////////////////////////////////////////// - // Class to flatten a multidimensional std::vector - template - class Flatten - { - public: - typedef typename element::type Element; - public: - explicit Flatten(const V &vector); - const V & getVector(void); - const std::vector & getFlatVector(void); - const std::vector & getDim(void); - private: - void accumulate(const Element &e); - template - void accumulate(const W &v); - void accumulateDim(const Element &e); - template - void accumulateDim(const W &v); - private: - const V &vector_; - std::vector flatVector_; - std::vector dim_; - }; - - // Class to reconstruct a multidimensional std::vector - template - class Reconstruct - { - public: - typedef typename element::type Element; - public: - Reconstruct(const std::vector &flatVector, - const std::vector &dim); - const V & getVector(void); - const std::vector & getFlatVector(void); - const std::vector & getDim(void); - private: - void fill(std::vector &v); - template - void fill(W &v); - void resize(std::vector &v, const unsigned int dim); - template - void resize(W &v, const unsigned int dim); - private: - V vector_; - const std::vector &flatVector_; - std::vector dim_; - size_t ind_{0}; - unsigned int dimInd_{0}; - }; - // Pair IO utilities ///////////////////////////////////////////////////////// // helper function to parse input in the format "" template @@ -151,15 +48,15 @@ namespace Grid { do { is.get(c); - } while (c != '<' && !is.eof()); - if (c == '<') + } while (c != '(' && !is.eof()); + if (c == '(') { int start = is.tellg(); do { is.get(c); - } while (c != '>' && !is.eof()); - if (c == '>') + } while (c != ')' && !is.eof()); + if (c == ')') { int end = is.tellg(); int psize = end - start - 1; @@ -182,7 +79,7 @@ namespace Grid { template inline std::ostream & operator<<(std::ostream &os, const std::pair &p) { - os << "<" << p.first << " " << p.second << ">"; + os << "(" << p.first << " " << p.second << ")"; return os; } @@ -205,6 +102,12 @@ namespace Grid { template typename std::enable_if::value, void>::type write(const std::string& s, const U &output); + template + void write(const std::string &s, const iScalar &output); + template + void write(const std::string &s, const iVector &output); + template + void write(const std::string &s, const iMatrix &output); private: T *upcast; }; @@ -224,6 +127,12 @@ namespace Grid { template typename std::enable_if::value, void>::type read(const std::string& s, U &output); + template + void read(const std::string &s, iScalar &output); + template + void read(const std::string &s, iVector &output); + template + void read(const std::string &s, iMatrix &output); protected: template void fromString(U &output, const std::string &s); @@ -237,203 +146,9 @@ namespace Grid { }; template struct isWriter { static const bool value = false; - }; - - - - // Generic writer interface - // serializable base class - class Serializable - { - public: - template - static inline void write(Writer &WR,const std::string &s, - const Serializable &obj) - {} - - template - static inline void read(Reader &RD,const std::string &s, - Serializable &obj) - {} - - friend inline std::ostream & operator<<(std::ostream &os, - const Serializable &obj) - { - return os; - } }; - - // Flatten class template implementation ///////////////////////////////////// - template - void Flatten::accumulate(const Element &e) - { - flatVector_.push_back(e); - } - - template - template - void Flatten::accumulate(const W &v) - { - for (auto &e: v) - { - accumulate(e); - } - } - - template - void Flatten::accumulateDim(const Element &e) {}; - - template - template - void Flatten::accumulateDim(const W &v) - { - dim_.push_back(v.size()); - accumulateDim(v[0]); - } - - template - Flatten::Flatten(const V &vector) - : vector_(vector) - { - accumulate(vector_); - accumulateDim(vector_); - } - - template - const V & Flatten::getVector(void) - { - return vector_; - } - - template - const std::vector::Element> & - Flatten::getFlatVector(void) - { - return flatVector_; - } - - template - const std::vector & Flatten::getDim(void) - { - return dim_; - } - - // Reconstruct class template implementation ///////////////////////////////// - template - void Reconstruct::fill(std::vector &v) - { - for (auto &e: v) - { - e = flatVector_[ind_++]; - } - } - - template - template - void Reconstruct::fill(W &v) - { - for (auto &e: v) - { - fill(e); - } - } - - template - void Reconstruct::resize(std::vector &v, const unsigned int dim) - { - v.resize(dim_[dim]); - } - - template - template - void Reconstruct::resize(W &v, const unsigned int dim) - { - v.resize(dim_[dim]); - for (auto &e: v) - { - resize(e, dim + 1); - } - } - - template - Reconstruct::Reconstruct(const std::vector &flatVector, - const std::vector &dim) - : flatVector_(flatVector) - , dim_(dim) - { - resize(vector_, 0); - fill(vector_); - } - - template - const V & Reconstruct::getVector(void) - { - return vector_; - } - - template - const std::vector::Element> & - Reconstruct::getFlatVector(void) - { - return flatVector_; - } - - template - const std::vector & Reconstruct::getDim(void) - { - return dim_; - } - - // Generic writer interface ////////////////////////////////////////////////// - template - inline void push(Writer &w, const std::string &s) { - w.push(s); - } - - template - inline void push(Writer &w, const char *s) - { - w.push(std::string(s)); - } - - template - inline void pop(Writer &w) - { - w.pop(); - } - - template - inline void write(Writer &w, const std::string& s, const U &output) - { - w.write(s, output); - } - - // Generic reader interface - template - inline bool push(Reader &r, const std::string &s) - { - return r.push(s); - } - - template - inline bool push(Reader &r, const char *s) - { - return r.push(std::string(s)); - } - - template - inline void pop(Reader &r) - { - r.pop(); - } - - template - inline void read(Reader &r, const std::string &s, U &output) - { - r.read(s, output); - } - - // Writer template implementation //////////////////////////////////////////// + + // Writer template implementation template Writer::Writer(void) { @@ -467,6 +182,27 @@ namespace Grid { { upcast->writeDefault(s, output); } + + template + template + void Writer::write(const std::string &s, const iScalar &output) + { + upcast->writeDefault(s, tensorToVec(output)); + } + + template + template + void Writer::write(const std::string &s, const iVector &output) + { + upcast->writeDefault(s, tensorToVec(output)); + } + + template + template + void Writer::write(const std::string &s, const iMatrix &output) + { + upcast->writeDefault(s, tensorToVec(output)); + } // Reader template implementation template @@ -502,7 +238,37 @@ namespace Grid { { upcast->readDefault(s, output); } + + template + template + void Reader::read(const std::string &s, iScalar &output) + { + typename TensorToVec>::type v; + + upcast->readDefault(s, v); + vecToTensor(output, v); + } + + template + template + void Reader::read(const std::string &s, iVector &output) + { + typename TensorToVec>::type v; + + upcast->readDefault(s, v); + vecToTensor(output, v); + } + template + template + void Reader::read(const std::string &s, iMatrix &output) + { + typename TensorToVec>::type v; + + upcast->readDefault(s, v); + vecToTensor(output, v); + } + template template void Reader::fromString(U &output, const std::string &s) @@ -521,6 +287,76 @@ namespace Grid { abort(); } } + + // serializable base class /////////////////////////////////////////////////// + class Serializable + { + public: + template + static inline void write(Writer &WR,const std::string &s, + const Serializable &obj) + {} + + template + static inline void read(Reader &RD,const std::string &s, + Serializable &obj) + {} + + friend inline std::ostream & operator<<(std::ostream &os, + const Serializable &obj) + { + return os; + } + }; + + // Generic writer interface ////////////////////////////////////////////////// + template + inline void push(Writer &w, const std::string &s) { + w.push(s); + } + + template + inline void push(Writer &w, const char *s) + { + w.push(std::string(s)); + } + + template + inline void pop(Writer &w) + { + w.pop(); + } + + template + inline void write(Writer &w, const std::string& s, const U &output) + { + w.write(s, output); + } + + // Generic reader interface ////////////////////////////////////////////////// + template + inline bool push(Reader &r, const std::string &s) + { + return r.push(s); + } + + template + inline bool push(Reader &r, const char *s) + { + return r.push(std::string(s)); + } + + template + inline void pop(Reader &r) + { + r.pop(); + } + + template + inline void read(Reader &r, const std::string &s, U &output) + { + r.read(s, output); + } } #endif diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 94ad9736..12625ab8 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "Hdf5Type.h" #ifndef H5_NO_NAMESPACE diff --git a/lib/serialisation/VectorUtils.h b/lib/serialisation/VectorUtils.h new file mode 100644 index 00000000..f5c76b84 --- /dev/null +++ b/lib/serialisation/VectorUtils.h @@ -0,0 +1,336 @@ +#ifndef GRID_SERIALISATION_VECTORUTILS_H +#define GRID_SERIALISATION_VECTORUTILS_H + +#include +#include + +namespace Grid { + // Grid scalar tensors to nested std::vectors ////////////////////////////////// + template + struct TensorToVec + { + typedef T type; + }; + + template + struct TensorToVec> + { + typedef typename TensorToVec::type type; + }; + + template + struct TensorToVec> + { + typedef typename std::vector::type> type; + }; + + template + struct TensorToVec> + { + typedef typename std::vector::type>> type; + }; + + template + typename TensorToVec::type tensorToVec(const T &t) + { + return t; + } + + template + typename TensorToVec>::type tensorToVec(const iScalar& t) + { + return tensorToVec(t._internal); + } + + template + typename TensorToVec>::type tensorToVec(const iVector& t) + { + typename TensorToVec>::type v; + + v.resize(N); + for (unsigned int i = 0; i < N; i++) + { + v[i] = tensorToVec(t._internal[i]); + } + + return v; + } + + template + typename TensorToVec>::type tensorToVec(const iMatrix& t) + { + typename TensorToVec>::type v; + + v.resize(N); + for (unsigned int i = 0; i < N; i++) + { + v[i].resize(N); + for (unsigned int j = 0; j < N; j++) + { + v[i][j] = tensorToVec(t._internal[i][j]); + } + } + + return v; + } + + template + void vecToTensor(T &t, const typename TensorToVec::type &v) + { + t = v; + } + + + template + void vecToTensor(iScalar &t, const typename TensorToVec>::type &v) + { + vecToTensor(t._internal, v); + } + + template + void vecToTensor(iVector &t, const typename TensorToVec>::type &v) + { + for (unsigned int i = 0; i < N; i++) + { + vecToTensor(t._internal[i], v[i]); + } + } + + template + void vecToTensor(iMatrix &t, const typename TensorToVec>::type &v) + { + for (unsigned int i = 0; i < N; i++) + for (unsigned int j = 0; j < N; j++) + { + vecToTensor(t._internal[i][j], v[i][j]); + } + } + + // Vector element trait ////////////////////////////////////////////////////// + template + struct element + { + typedef T type; + static constexpr bool is_number = false; + }; + + template + struct element> + { + typedef typename element::type type; + static constexpr bool is_number = std::is_arithmetic::value + or is_complex::value + or element::is_number; + }; + + // Vector flattening utility class //////////////////////////////////////////// + // Class to flatten a multidimensional std::vector + template + class Flatten + { + public: + typedef typename element::type Element; + public: + explicit Flatten(const V &vector); + const V & getVector(void); + const std::vector & getFlatVector(void); + const std::vector & getDim(void); + private: + void accumulate(const Element &e); + template + void accumulate(const W &v); + void accumulateDim(const Element &e); + template + void accumulateDim(const W &v); + private: + const V &vector_; + std::vector flatVector_; + std::vector dim_; + }; + + // Class to reconstruct a multidimensional std::vector + template + class Reconstruct + { + public: + typedef typename element::type Element; + public: + Reconstruct(const std::vector &flatVector, + const std::vector &dim); + const V & getVector(void); + const std::vector & getFlatVector(void); + const std::vector & getDim(void); + private: + void fill(std::vector &v); + template + void fill(W &v); + void resize(std::vector &v, const unsigned int dim); + template + void resize(W &v, const unsigned int dim); + private: + V vector_; + const std::vector &flatVector_; + std::vector dim_; + size_t ind_{0}; + unsigned int dimInd_{0}; + }; + + // Flatten class template implementation + template + void Flatten::accumulate(const Element &e) + { + flatVector_.push_back(e); + } + + template + template + void Flatten::accumulate(const W &v) + { + for (auto &e: v) + { + accumulate(e); + } + } + + template + void Flatten::accumulateDim(const Element &e) {}; + + template + template + void Flatten::accumulateDim(const W &v) + { + dim_.push_back(v.size()); + accumulateDim(v[0]); + } + + template + Flatten::Flatten(const V &vector) + : vector_(vector) + { + accumulate(vector_); + accumulateDim(vector_); + } + + template + const V & Flatten::getVector(void) + { + return vector_; + } + + template + const std::vector::Element> & + Flatten::getFlatVector(void) + { + return flatVector_; + } + + template + const std::vector & Flatten::getDim(void) + { + return dim_; + } + + // Reconstruct class template implementation + template + void Reconstruct::fill(std::vector &v) + { + for (auto &e: v) + { + e = flatVector_[ind_++]; + } + } + + template + template + void Reconstruct::fill(W &v) + { + for (auto &e: v) + { + fill(e); + } + } + + template + void Reconstruct::resize(std::vector &v, const unsigned int dim) + { + v.resize(dim_[dim]); + } + + template + template + void Reconstruct::resize(W &v, const unsigned int dim) + { + v.resize(dim_[dim]); + for (auto &e: v) + { + resize(e, dim + 1); + } + } + + template + Reconstruct::Reconstruct(const std::vector &flatVector, + const std::vector &dim) + : flatVector_(flatVector) + , dim_(dim) + { + resize(vector_, 0); + fill(vector_); + } + + template + const V & Reconstruct::getVector(void) + { + return vector_; + } + + template + const std::vector::Element> & + Reconstruct::getFlatVector(void) + { + return flatVector_; + } + + template + const std::vector & Reconstruct::getDim(void) + { + return dim_; + } + + // Vector IO utilities /////////////////////////////////////////////////////// + // helper function to read space-separated values + template + std::vector strToVec(const std::string s) + { + std::istringstream sstr(s); + T buf; + std::vector v; + + while(!sstr.eof()) + { + sstr >> buf; + v.push_back(buf); + } + + return v; + } + + // output to streams for vectors + template < class T > + inline std::ostream & operator<<(std::ostream &os, const std::vector &v) + { + os << "["; + for (auto &x: v) + { + os << x << " "; + } + if (v.size() > 0) + { + os << "\b"; + } + os << "]"; + + return os; + } +} + +#endif \ No newline at end of file diff --git a/lib/tensors/Tensor_logical.h b/lib/tensors/Tensor_logical.h index 7ab3668b..58b2b03b 100644 --- a/lib/tensors/Tensor_logical.h +++ b/lib/tensors/Tensor_logical.h @@ -55,5 +55,38 @@ LOGICAL_BINOP(&); LOGICAL_BINOP(||); LOGICAL_BINOP(&&); +template +strong_inline bool operator==(const iScalar &t1, const iScalar &t2) +{ + return (t1._internal == t2._internal); +} + +template +strong_inline bool operator==(const iVector &t1, const iVector &t2) +{ + bool res = true; + + for (unsigned int i = 0; i < N; ++i) + { + res = (res && (t1._internal[i] == t2._internal[i])); + } + + return res; +} + +template +strong_inline bool operator==(const iMatrix &t1, const iMatrix &t2) +{ + bool res = true; + + for (unsigned int i = 0; i < N; ++i) + for (unsigned int j = 0; j < N; ++j) + { + res = (res && (t1._internal[i][j] == t2._internal[i][j])); + } + + return res; +} + } #endif diff --git a/lib/util/Init.cc b/lib/util/Init.cc index fb3d7a1e..45a37a02 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -49,6 +49,7 @@ Author: paboyle #include #include +#include #include @@ -288,6 +289,7 @@ void Grid_init(int *argc,char ***argv) std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"< &simd, std::vector &mpi); + void printHash(void); }; #endif diff --git a/lib/util/version.cc b/lib/util/version.cc new file mode 100644 index 00000000..19759274 --- /dev/null +++ b/lib/util/version.cc @@ -0,0 +1,12 @@ +#include +#include +namespace Grid { + void printHash(){ +#ifdef GITHASH + std::cout << "Current Grid git commit hash=" << GITHASH << std::endl; +#else + std::cout << "Current Grid git commit hash is undefined. Check makefile." << std::endl; +#endif +#undef GITHASH +} +} diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index 82638ad9..bca4d01c 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -45,7 +45,8 @@ public: bool , b, std::vector, array, std::vector >, twodimarray, - std::vector > >, cmplx3darray + std::vector > >, cmplx3darray, + SpinColourMatrix, scm ); myclass() {} myclass(int i) @@ -59,6 +60,12 @@ public: y=2*i; b=true; name="bother said pooh"; + scm()(0, 1)(2, 1) = 2.356; + scm()(3, 0)(1, 1) = 1.323; + scm()(2, 1)(0, 1) = 5.3336; + scm()(0, 2)(1, 1) = 6.336; + scm()(2, 1)(2, 2) = 7.344; + scm()(1, 1)(2, 0) = 8.3534; } }; @@ -93,8 +100,30 @@ void ioTest(const std::string &filename, const O &object, const std::string &nam if (!good) exit(EXIT_FAILURE); } +template +void tensorConvTestFn(GridSerialRNG &rng, const std::string label) +{ + T t, ft; + Real n; + bool good; + + random(rng, t); + auto tv = tensorToVec(t); + vecToTensor(ft, tv); + n = norm2(t - ft); + good = (n == 0); + std::cout << label << " norm 2 diff: " << n << " -- " + << (good ? "success" : "failure") << std::endl; +} + +#define tensorConvTest(rng, type) tensorConvTestFn(rng, #type) + int main(int argc,char **argv) { + GridSerialRNG rng; + + rng.SeedFixedIntegers(std::vector({42,10,81,9})); + std::cout << "==== basic IO" << std::endl; XmlWriter WR("bother.xml"); @@ -120,7 +149,7 @@ int main(int argc,char **argv) std::cout << "-- serialisable class writing to 'bother.xml'..." << std::endl; write(WR,"obj",obj); WR.write("obj2", obj); - vec.push_back(myclass(1234)); + vec.push_back(obj); vec.push_back(myclass(5678)); vec.push_back(myclass(3838)); pair = std::make_pair(myenum::red, myenum::blue); @@ -131,8 +160,6 @@ int main(int argc,char **argv) std::cout << "-- serialisable class comparison:" << std::endl; std::cout << "vec[0] == obj: " << ((vec[0] == obj) ? "true" : "false") << std::endl; std::cout << "vec[1] == obj: " << ((vec[1] == obj) ? "true" : "false") << std::endl; - - write(WR, "objpair", pair); std::cout << "-- pair writing to std::cout:" << std::endl; std::cout << pair << std::endl; @@ -141,26 +168,20 @@ int main(int argc,char **argv) //// XML ioTest("iotest.xml", obj, "XML (object) "); ioTest("iotest.xml", vec, "XML (vector of objects)"); - ioTest("iotest.xml", pair, "XML (pair of objects)"); //// binary ioTest("iotest.bin", obj, "binary (object) "); ioTest("iotest.bin", vec, "binary (vector of objects)"); - ioTest("iotest.bin", pair, "binary (pair of objects)"); //// text ioTest("iotest.dat", obj, "text (object) "); ioTest("iotest.dat", vec, "text (vector of objects)"); - ioTest("iotest.dat", pair, "text (pair of objects)"); //// text ioTest("iotest.json", obj, "JSON (object) "); ioTest("iotest.json", vec, "JSON (vector of objects)"); - ioTest("iotest.json", pair, "JSON (pair of objects)"); //// HDF5 -#undef HAVE_HDF5 #ifdef HAVE_HDF5 ioTest("iotest.h5", obj, "HDF5 (object) "); ioTest("iotest.h5", vec, "HDF5 (vector of objects)"); - ioTest("iotest.h5", pair, "HDF5 (pair of objects)"); #endif std::cout << "\n==== vector flattening/reconstruction" << std::endl; @@ -197,68 +218,11 @@ int main(int argc,char **argv) std::cout << flatdv.getVector() << std::endl; std::cout << std::endl; - - std::cout << ".:::::: Testing JSON classes "<< std::endl; - - - { - JSONWriter JW("bother.json"); - - // test basic type writing - myenum a = myenum::red; - push(JW,"BasicTypes"); - write(JW,std::string("i16"),i16); - write(JW,"myenum",a); - write(JW,"u16",u16); - write(JW,"i32",i32); - write(JW,"u32",u32); - write(JW,"i64",i64); - write(JW,"u64",u64); - write(JW,"f",f); - write(JW,"d",d); - write(JW,"b",b); - pop(JW); - - - // test serializable class writing - myclass obj(1234); // non-trivial constructor - std::cout << obj << std::endl; - std::cout << "-- serialisable class writing to 'bother.json'..." << std::endl; - write(JW,"obj",obj); - JW.write("obj2", obj); - - - std::vector vec; - vec.push_back(myclass(1234)); - vec.push_back(myclass(5678)); - vec.push_back(myclass(3838)); - write(JW, "objvec", vec); - - } - - - { - JSONReader RD("bother.json"); - myclass jcopy1; - std::vector jveccopy1; - read(RD,"obj",jcopy1); - read(RD,"objvec", jveccopy1); - std::cout << "Loaded (JSON) -----------------" << std::endl; - std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; - } - - -/* - // This is still work in progress - { - // Testing the next element function - JSONReader RD("test.json"); - RD.push("grid"); - RD.push("Observable"); - std::string name; - read(RD,"name", name); - } -*/ - - + std::cout << "==== Grid tensor to vector test" << std::endl; + tensorConvTest(rng, SpinColourMatrix); + tensorConvTest(rng, SpinColourVector); + tensorConvTest(rng, ColourMatrix); + tensorConvTest(rng, ColourVector); + tensorConvTest(rng, SpinMatrix); + tensorConvTest(rng, SpinVector); } diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc index 2601b76c..a53d8921 100644 --- a/tests/Test_dwf_mixedcg_prec.cc +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -103,6 +103,33 @@ int main (int argc, char ** argv) std::cout << "Diff between mixed and regular CG: " << diff << std::endl; + #ifdef HAVE_LIME + if( GridCmdOptionExists(argv,argv+argc,"--checksums") ){ + std::string file1("./Propagator1"); + std::string file2("./Propagator2"); + emptyUserRecord record; + uint32_t nersc_csum; + uint32_t scidac_csuma; + uint32_t scidac_csumb; + typedef SpinColourVectorD FermionD; + typedef vSpinColourVectorD vFermionD; + + BinarySimpleMunger munge; + std::string format = getFormatString(); + + BinaryIO::writeLatticeObject(result_o,file1,munge, 0, format, + nersc_csum,scidac_csuma,scidac_csumb); + + std::cout << " Mixed checksums "<(result_o_2,file1,munge, 0, format, + nersc_csum,scidac_csuma,scidac_csumb); + + std::cout << " CG checksums "< subspace(nbasis,FGrid); @@ -119,7 +120,7 @@ int main (int argc, char ** argv) MdagMLinearOperator HermDefOp(Ddwf); typedef Aggregation Subspace; - Subspace Aggregates(Coarse5d,FGrid); + Subspace Aggregates(Coarse5d,FGrid,cb); Aggregates.CreateSubspaceRandom(RNG5); subspace=Aggregates.subspace; diff --git a/tests/debug/Test_cayley_ldop_cr.cc b/tests/debug/Test_cayley_ldop_cr.cc index cbefdd46..c6005fd0 100644 --- a/tests/debug/Test_cayley_ldop_cr.cc +++ b/tests/debug/Test_cayley_ldop_cr.cc @@ -78,6 +78,7 @@ int main (int argc, char ** argv) RealD mass=0.1; RealD M5=1.5; + int cb=0; std::cout< HermDefOp(Ddwf); - Subspace Aggregates(Coarse5d,FGrid); + Subspace Aggregates(Coarse5d,FGrid,cb); Aggregates.CreateSubspace(RNG5,HermDefOp); diff --git a/tests/forces/Test_gp_plaq_force.cc b/tests/forces/Test_gp_plaq_force.cc new file mode 100644 index 00000000..e121f21b --- /dev/null +++ b/tests/forces/Test_gp_plaq_force.cc @@ -0,0 +1,123 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_gp_rect_force.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + int threads = GridThread::GetThreads(); + std::cout< seeds({1,2,3,4}); + + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + + LatticeGaugeField U(&Grid); + + SU3::HotConfiguration(pRNG,U); + + double beta = 1.0; + double c1 = 0.331; + + //ConjugatePlaqPlusRectangleActionR Action(beta,c1); + ConjugateWilsonGaugeActionR Action(beta); + //WilsonGaugeActionR Action(beta); + + ComplexD S = Action.S(U); + + // get the deriv of phidag MdagM phi with respect to "U" + LatticeGaugeField UdSdU(&Grid); + + Action.deriv(U,UdSdU); + + //////////////////////////////////// + // Modify the gauge field a little + //////////////////////////////////// + RealD dt = 0.0001; + + LatticeColourMatrix mommu(&Grid); + LatticeColourMatrix forcemu(&Grid); + LatticeGaugeField mom(&Grid); + LatticeGaugeField Uprime(&Grid); + + for(int mu=0;mu(mom,mommu,mu); + + // fourth order exponential approx + parallel_for(auto i=mom.begin();i(UdSdU,mu); + mommu = PeekIndex(mom,mu); + + // Update gauge action density + // U = exp(p dt) U + // dU/dt = p U + // so dSdt = trace( dUdt dSdU) = trace( p UdSdUmu ) + + dS = dS - trace(mommu*UdSdUmu)*dt*2.0; + + } + ComplexD dSpred = sum(dS); + + std::cout << GridLogMessage << " S "<_Aggregate.subspace.size()==nbasis); + assert(this->subspace.size()==nbasis); emptyUserRecord record; Grid::QCD::ScidacWriter WR; WR.open(evecs_file); for(int k=0;k_Aggregate.subspace[k],record); + WR.writeScidacFieldRecord(this->subspace[k],record); } WR.close(); @@ -72,7 +72,7 @@ public: void checkpointFineRestore(std::string evecs_file,std::string evals_file) { this->evals_fine.resize(nbasis); - this->_Aggregate.subspace.resize(nbasis,this->_FineGrid); + this->subspace.resize(nbasis,this->_FineGrid); std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<_Aggregate.subspace[k].checkerboard=this->_checkerboard; - RD.readScidacFieldRecord(this->_Aggregate.subspace[k],record); + this->subspace[k].checkerboard=this->_checkerboard; + RD.readScidacFieldRecord(this->subspace[k],record); } RD.close(); @@ -180,7 +180,6 @@ int main (int argc, char ** argv) { GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4); GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4); - GridRedBlackCartesian * CoarseGrid5rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid5); // Gauge field LatticeGaugeField Umu(UGrid); @@ -206,7 +205,7 @@ int main (int argc, char ** argv) { const int nbasis= 60; assert(nbasis==Ns1); - LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd); + LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd); std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; assert( (Params.doFine)||(Params.doFineRead)); @@ -221,7 +220,9 @@ int main (int argc, char ** argv) { std::cout << GridLogIRL<<"Checkpointing Fine evecs"<