diff --git a/.gitignore b/.gitignore index 399f2f6b..45a3ea53 100644 --- a/.gitignore +++ b/.gitignore @@ -83,6 +83,7 @@ ltmain.sh .Trashes ehthumbs.db Thumbs.db +.dirstamp # build directory # ################### @@ -97,11 +98,8 @@ build.sh # Eigen source # ################ -lib/Eigen/* - -# FFTW source # -################ -lib/fftw/* +Grid/Eigen +Eigen/* # libtool macros # ################## @@ -112,14 +110,7 @@ m4/libtool.m4 ################ gh-pages/ -# Buck files # -############## -.buck* -buck-out -BUCK -make-bin-BUCK.sh - # generated sources # ##################### -lib/qcd/spin/gamma-gen/*.h -lib/qcd/spin/gamma-gen/*.cc +Grid/qcd/spin/gamma-gen/*.h +Grid/qcd/spin/gamma-gen/*.cc diff --git a/.travis.yml b/.travis.yml index 7d8203ce..129fd582 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,11 @@ matrix: - os: osx osx_image: xcode8.3 compiler: clang + env: PREC=single + - os: osx + osx_image: xcode8.3 + compiler: clang + env: PREC=double before_install: - export GRIDDIR=`pwd` @@ -16,9 +21,11 @@ before_install: - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi install: + - export CWD=`pwd` + - echo $CWD - export CC=$CC$VERSION - export CXX=$CXX$VERSION - echo $PATH @@ -31,16 +38,24 @@ install: - which $CXX - $CXX --version - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi script: - ./bootstrap.sh - mkdir build - cd build - - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none + - mkdir lime + - cd lime + - mkdir build + - cd build + - wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz + - tar xf lime-1.3.2.tar.gz + - cd lime-1.3.2 + - ./configure --prefix=$CWD/build/lime/install + - make -j4 + - make install + - cd $CWD/build + - ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF} - make -j4 - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals - - echo make clean - - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none - - make -j4 - - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals - make check diff --git a/lib/DisableWarnings.h b/Grid/DisableWarnings.h similarity index 100% rename from lib/DisableWarnings.h rename to Grid/DisableWarnings.h diff --git a/lib/Grid.h b/Grid/Grid.h similarity index 96% rename from lib/Grid.h rename to Grid/Grid.h index 475c00b6..0fdd5268 100644 --- a/lib/Grid.h +++ b/Grid/Grid.h @@ -42,7 +42,9 @@ Author: paboyle #include #include #include +NAMESPACE_CHECK(GaugeFix); #include +NAMESPACE_CHECK(Smearing); #include #include diff --git a/lib/GridCore.h b/Grid/GridCore.h similarity index 97% rename from lib/GridCore.h rename to Grid/GridCore.h index cc1811af..ba0499f6 100644 --- a/lib/GridCore.h +++ b/Grid/GridCore.h @@ -38,18 +38,21 @@ Author: paboyle #ifndef GRID_BASE_H #define GRID_BASE_H + + #include #include #include #include #include #include +#include #include #include #include #include #include -#include +#include #include #include #include @@ -58,5 +61,6 @@ Author: paboyle #include #include #include +NAMESPACE_CHECK(GridCore) #endif diff --git a/lib/GridQCDcore.h b/Grid/GridQCDcore.h similarity index 98% rename from lib/GridQCDcore.h rename to Grid/GridQCDcore.h index 7f50761f..cae6f43f 100644 --- a/lib/GridQCDcore.h +++ b/Grid/GridQCDcore.h @@ -38,5 +38,6 @@ Author: paboyle #include #include #include +NAMESPACE_CHECK(GridQCDCore); #endif diff --git a/lib/GridStd.h b/Grid/GridStd.h similarity index 100% rename from lib/GridStd.h rename to Grid/GridStd.h diff --git a/lib/Grid_Eigen_Dense.h b/Grid/Grid_Eigen_Dense.h similarity index 84% rename from lib/Grid_Eigen_Dense.h rename to Grid/Grid_Eigen_Dense.h index c57148ba..cbe0a389 100644 --- a/lib/Grid_Eigen_Dense.h +++ b/Grid/Grid_Eigen_Dense.h @@ -1,5 +1,10 @@ #include #pragma once +// Force Eigen to use MKL if Grid has been configured with --enable-mkl +#ifdef USE_MKL +#define EIGEN_USE_MKL_ALL +#endif + #if defined __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" diff --git a/lib/Makefile.am b/Grid/Makefile.am similarity index 51% rename from lib/Makefile.am rename to Grid/Makefile.am index dc33e7cf..b88ea4f2 100644 --- a/lib/Makefile.am +++ b/Grid/Makefile.am @@ -21,6 +21,32 @@ if BUILD_HDF5 extra_headers+=serialisation/Hdf5Type.h endif +all: version-cache + +version-cache: + @if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\ + a="uncommited changes";\ + else\ + a="clean";\ + fi;\ + echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\ + if [ -e version-cache ]; then\ + d=`diff vertmp version-cache`;\ + if [ "$${d}" != "" ]; then\ + mv vertmp version-cache;\ + rm -f Version.h;\ + fi;\ + else\ + mv vertmp version-cache;\ + rm -f Version.h;\ + fi;\ + rm -f vertmp + +Version.h: + cp version-cache Version.h + +.PHONY: version-cache + # # Libraries # @@ -30,8 +56,8 @@ include Eigen.inc lib_LIBRARIES = libGrid.a CCFILES += $(extra_sources) -HFILES += $(extra_headers) +HFILES += $(extra_headers) Config.h Version.h libGrid_a_SOURCES = $(CCFILES) -libGrid_adir = $(pkgincludedir) -nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h +libGrid_adir = $(includedir)/Grid +nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files) diff --git a/lib/Namespace.h b/Grid/Namespace.h similarity index 87% rename from lib/Namespace.h rename to Grid/Namespace.h index e405bc30..29b229fa 100644 --- a/lib/Namespace.h +++ b/Grid/Namespace.h @@ -28,7 +28,11 @@ directory /* END LEGAL */ #pragma once +#include +#include + #define NAMESPACE_BEGIN(A) namespace A { #define NAMESPACE_END(A) } #define GRID_NAMESPACE_BEGIN NAMESPACE_BEGIN(Grid) #define GRID_NAMESPACE_END NAMESPACE_END(Grid) +#define NAMESPACE_CHECK(x) struct namespaceTEST##x {}; static_assert(std::is_same::value,"Not in :: at" ); diff --git a/lib/algorithms/Algorithms.h b/Grid/algorithms/Algorithms.h similarity index 97% rename from lib/algorithms/Algorithms.h rename to Grid/algorithms/Algorithms.h index 070a1019..ef147c53 100644 --- a/lib/algorithms/Algorithms.h +++ b/Grid/algorithms/Algorithms.h @@ -39,6 +39,7 @@ Author: Peter Boyle #include #include +#include #include #include #include diff --git a/lib/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h similarity index 100% rename from lib/algorithms/CoarsenedMatrix.h rename to Grid/algorithms/CoarsenedMatrix.h diff --git a/lib/algorithms/FFT.h b/Grid/algorithms/FFT.h similarity index 100% rename from lib/algorithms/FFT.h rename to Grid/algorithms/FFT.h diff --git a/lib/algorithms/LinearOperator.h b/Grid/algorithms/LinearOperator.h similarity index 88% rename from lib/algorithms/LinearOperator.h rename to Grid/algorithms/LinearOperator.h index 8b2ecf57..ced8d987 100644 --- a/lib/algorithms/LinearOperator.h +++ b/Grid/algorithms/LinearOperator.h @@ -182,12 +182,15 @@ class SchurOperatorBase : public LinearOperatorBase { public: virtual RealD Mpc (const Field &in, Field &out) =0; virtual RealD MpcDag (const Field &in, Field &out) =0; - virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) { + virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) + { Field tmp(in.Grid()); + tmp.Checkerboard() = in.Checkerboard(); ni=Mpc(in,tmp); no=MpcDag(tmp,out); } virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + out.Checkerboard() = in.Checkerboard(); MpcDagMpc(in,out,n1,n2); } virtual void HermOp(const Field &in, Field &out){ @@ -217,11 +220,13 @@ public: virtual RealD Mpc (const Field &in, Field &out) { Field tmp(in.Grid()); // std::cout <<"grid pointers: in.Grid()="<< in.Grid() << " out.Grid()=" << out.Grid() << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl; + tmp.Checkerboard() = !in.Checkerboard(); _Mat.Meooe(in,tmp); _Mat.MooeeInv(tmp,out); _Mat.Meooe(out,tmp); + //std::cout << "cb in " << in.Checkerboard() << " cb out " << out.Checkerboard() << std::endl; _Mat.Mooee(in,out); return axpy_norm(out,-1.0,tmp,out); } @@ -305,36 +310,69 @@ template class SchurStaggeredOperator : public SchurOperatorBase { protected: Matrix &_Mat; + Field tmp; + RealD mass; + double tMpc; + double tIP; + double tMeo; + double taxpby_norm; + uint64_t ncall; public: - SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){}; + void Report(void) + { + std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "< using SchurStagOperator = SchurStaggeredOpera template class OperatorFunction { public: virtual void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) = 0; + virtual void operator() (LinearOperatorBase &Linop, const std::vector &in,std::vector &out) { + assert(in.size()==out.size()); + for(int k=0;k class LinearFunction { diff --git a/lib/algorithms/Preconditioner.h b/Grid/algorithms/Preconditioner.h similarity index 100% rename from lib/algorithms/Preconditioner.h rename to Grid/algorithms/Preconditioner.h diff --git a/lib/algorithms/SparseMatrix.h b/Grid/algorithms/SparseMatrix.h similarity index 86% rename from lib/algorithms/SparseMatrix.h rename to Grid/algorithms/SparseMatrix.h index 63b30e95..c1473e56 100644 --- a/lib/algorithms/SparseMatrix.h +++ b/Grid/algorithms/SparseMatrix.h @@ -55,6 +55,14 @@ public: template class CheckerBoardedSparseMatrixBase : public SparseMatrixBase { public: virtual GridBase *RedBlackGrid(void)=0; + + ////////////////////////////////////////////////////////////////////// + // Query the even even properties to make algorithmic decisions + ////////////////////////////////////////////////////////////////////// + virtual RealD Mass(void) { return 0.0; }; + virtual int ConstEE(void) { return 0; }; // Disable assumptions unless overridden + virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better + // half checkerboard operaions virtual void Meooe (const Field &in, Field &out)=0; virtual void Mooee (const Field &in, Field &out)=0; diff --git a/lib/algorithms/approx/Chebyshev.h b/Grid/algorithms/approx/Chebyshev.h similarity index 100% rename from lib/algorithms/approx/Chebyshev.h rename to Grid/algorithms/approx/Chebyshev.h diff --git a/lib/algorithms/approx/Forecast.h b/Grid/algorithms/approx/Forecast.h similarity index 100% rename from lib/algorithms/approx/Forecast.h rename to Grid/algorithms/approx/Forecast.h diff --git a/lib/algorithms/approx/LICENSE b/Grid/algorithms/approx/LICENSE similarity index 100% rename from lib/algorithms/approx/LICENSE rename to Grid/algorithms/approx/LICENSE diff --git a/lib/algorithms/approx/MultiShiftFunction.cc b/Grid/algorithms/approx/MultiShiftFunction.cc similarity index 100% rename from lib/algorithms/approx/MultiShiftFunction.cc rename to Grid/algorithms/approx/MultiShiftFunction.cc diff --git a/lib/algorithms/approx/MultiShiftFunction.h b/Grid/algorithms/approx/MultiShiftFunction.h similarity index 100% rename from lib/algorithms/approx/MultiShiftFunction.h rename to Grid/algorithms/approx/MultiShiftFunction.h diff --git a/lib/algorithms/approx/README b/Grid/algorithms/approx/README similarity index 100% rename from lib/algorithms/approx/README rename to Grid/algorithms/approx/README diff --git a/lib/algorithms/approx/Remez.cc b/Grid/algorithms/approx/Remez.cc similarity index 100% rename from lib/algorithms/approx/Remez.cc rename to Grid/algorithms/approx/Remez.cc diff --git a/lib/algorithms/approx/Remez.h b/Grid/algorithms/approx/Remez.h similarity index 100% rename from lib/algorithms/approx/Remez.h rename to Grid/algorithms/approx/Remez.h diff --git a/lib/algorithms/approx/Zolotarev.cc b/Grid/algorithms/approx/Zolotarev.cc similarity index 100% rename from lib/algorithms/approx/Zolotarev.cc rename to Grid/algorithms/approx/Zolotarev.cc diff --git a/lib/algorithms/approx/Zolotarev.h b/Grid/algorithms/approx/Zolotarev.h similarity index 100% rename from lib/algorithms/approx/Zolotarev.h rename to Grid/algorithms/approx/Zolotarev.h diff --git a/lib/algorithms/approx/bigfloat.h b/Grid/algorithms/approx/bigfloat.h similarity index 100% rename from lib/algorithms/approx/bigfloat.h rename to Grid/algorithms/approx/bigfloat.h diff --git a/lib/algorithms/approx/bigfloat_double.h b/Grid/algorithms/approx/bigfloat_double.h similarity index 100% rename from lib/algorithms/approx/bigfloat_double.h rename to Grid/algorithms/approx/bigfloat_double.h diff --git a/lib/algorithms/iterative/AdefGeneric.h b/Grid/algorithms/iterative/AdefGeneric.h similarity index 100% rename from lib/algorithms/iterative/AdefGeneric.h rename to Grid/algorithms/iterative/AdefGeneric.h diff --git a/Grid/algorithms/iterative/BlockConjugateGradient.h b/Grid/algorithms/iterative/BlockConjugateGradient.h new file mode 100644 index 00000000..cfce4aa5 --- /dev/null +++ b/Grid/algorithms/iterative/BlockConjugateGradient.h @@ -0,0 +1,694 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h + +Copyright (C) 2017 + +Author: Azusa Yamaguchi +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec }; + +////////////////////////////////////////////////////////////////////////// +// Block conjugate gradient. Dimension zero should be the block direction +////////////////////////////////////////////////////////////////////////// +template +class BlockConjugateGradient : public OperatorFunction { + public: + + typedef typename Field::scalar_type scomplex; + + int blockDim ; + int Nblock; + + BlockCGtype CGtype; + bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. + // Defaults true. + RealD Tolerance; + Integer MaxIterations; + Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion + Integer PrintInterval; //GridLogMessages or Iterative + + BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) + : Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100) + {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Thin QR factorisation (google it) +//////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////// + //Dimensions + // R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock + // + // Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen) + // + // Q C = R => Q = R C^{-1} + // + // Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock} + // + // Set C = L^{dag}, and then Q^dag Q = ident + // + // Checks: + // Cdag C = Rdag R ; passes. + // QdagQ = 1 ; passes + //////////////////////////////////////////////////////////////////////////////////////////////////// +void ThinQRfact (Eigen::MatrixXcd &m_rr, + Eigen::MatrixXcd &C, + Eigen::MatrixXcd &Cinv, + Field & Q, + const Field & R) +{ + int Orthog = blockDim; // First dimension is block dim; this is an assumption + sliceInnerProductMatrix(m_rr,R,R,Orthog); + + // Force manifest hermitian to avoid rounding related + m_rr = 0.5*(m_rr+m_rr.adjoint()); + + Eigen::MatrixXcd L = m_rr.llt().matrixL(); + + C = L.adjoint(); + Cinv = C.inverse(); + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Q = R C^{-1} + // + // Q_j = R_i Cinv(i,j) + // + // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already + //////////////////////////////////////////////////////////////////////////////////////////////////// + sliceMulMatrix(Q,Cinv,R,Orthog); +} +// see comments above +void ThinQRfact (Eigen::MatrixXcd &m_rr, + Eigen::MatrixXcd &C, + Eigen::MatrixXcd &Cinv, + std::vector & Q, + const std::vector & R) +{ + InnerProductMatrix(m_rr,R,R); + + m_rr = 0.5*(m_rr+m_rr.adjoint()); + + Eigen::MatrixXcd L = m_rr.llt().matrixL(); + + C = L.adjoint(); + Cinv = C.inverse(); + + MulMatrix(Q,Cinv,R); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Call one of several implementations +//////////////////////////////////////////////////////////////////////////////////////////////////// +void operator()(LinearOperatorBase &Linop, const Field &Src, Field &Psi) +{ + if ( CGtype == BlockCGrQ ) { + BlockCGrQsolve(Linop,Src,Psi); + } else if (CGtype == CGmultiRHS ) { + CGmultiRHSsolve(Linop,Src,Psi); + } else { + assert(0); + } +} +virtual void operator()(LinearOperatorBase &Linop, const std::vector &Src, std::vector &Psi) +{ + if ( CGtype == BlockCGrQVec ) { + BlockCGrQsolveVec(Linop,Src,Psi); + } else { + assert(0); + } +} + +//////////////////////////////////////////////////////////////////////////// +// BlockCGrQ implementation: +//-------------------------- +// X is guess/Solution +// B is RHS +// Solve A X_i = B_i ; i refers to Nblock index +//////////////////////////////////////////////////////////////////////////// +void BlockCGrQsolve(LinearOperatorBase &Linop, const Field &B, Field &X) +{ + int Orthog = blockDim; // First dimension is block dim; this is an assumption + Nblock = B.Grid()->_fdimensions[Orthog]; +/* FAKE */ + Nblock=8; + std::cout< residuals(Nblock); + std::vector ssq(Nblock); + + sliceNorm(ssq,B,Orthog); + RealD sssum=0; + for(int b=0;b Thin QR factorisation (google it) + * for k: + * Z = AD + * M = [D^dag Z]^{-1} + * X = X + D MC + * QS = Q - ZM + * D = Q + D S^dag + * C = S C + */ + /////////////////////////////////////// + // Initial block: initial search dir is guess + /////////////////////////////////////// + std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " < Thin QR factorisation (google it) + Linop.HermOp(X, AD); + tmp = B - AD; + + ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); + D=Q; + + std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " < max_resid ) max_resid = rr; + } + + std::cout << GridLogIterative << "\titeration "< &Linop, const Field &Src, Field &Psi) +{ + int Orthog = blockDim; // First dimension is block dim + Nblock = Src.Grid()->_fdimensions[Orthog]; + + std::cout< v_pAp(Nblock); + std::vector v_rr (Nblock); + std::vector v_rr_inv(Nblock); + std::vector v_alpha(Nblock); + std::vector v_beta(Nblock); + + // Initial residual computation & set up + std::vector residuals(Nblock); + std::vector ssq(Nblock); + + sliceNorm(ssq,Src,Orthog); + RealD sssum=0; + for(int b=0;b max_resid ) max_resid = rr; + } + + if ( max_resid < Tolerance*Tolerance ) { + + SolverTimer.Stop(); + + std::cout << GridLogMessage<<"MultiRHS solver converged in " < &X, const std::vector &Y){ + for(int b=0;b &AP, Eigen::MatrixXcd &m , const std::vector &X,const std::vector &Y,RealD scale=1.0){ + // Should make this cache friendly with site outermost, parallel_for + // Deal with case AP aliases with either Y or X + std::vector tmp(Nblock,X[0]); + for(int b=0;b &AP, Eigen::MatrixXcd &m , const std::vector &X){ + // Should make this cache friendly with site outermost, parallel_for + for(int b=0;b &P){ + double nn = 0.0; + for(int b=0;b &Linop, const std::vector &B, std::vector &X) +{ + Nblock = B.size(); + assert(Nblock == X.size()); + + std::cout< tmp(Nblock,Fake); + std::vector Q(Nblock,Fake); + std::vector D(Nblock,Fake); + std::vector Z(Nblock,Fake); + std::vector AD(Nblock,Fake); + + Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock); + Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock); + Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock); + + Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock); + Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock); + Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock); + Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock); + + Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock); + Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock); + + // Initial residual computation & set up + std::vector residuals(Nblock); + std::vector ssq(Nblock); + + RealD sssum=0; + for(int b=0;b Thin QR factorisation (google it) + * for k: + * Z = AD + * M = [D^dag Z]^{-1} + * X = X + D MC + * QS = Q - ZM + * D = Q + D S^dag + * C = S C + */ + /////////////////////////////////////// + // Initial block: initial search dir is guess + /////////////////////////////////////// + std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " < Thin QR factorisation (google it) + for(int b=0;b max_resid ) max_resid = rr; + } + + std::cout << GridLogIterative << "\t Block Iteration "< &Linop, const Field &src, Field &psi) { psi.Checkerboard() = src.Checkerboard(); + conformable(psi, src); RealD cp, c, a, d, b, ssq, qq, b_pred; @@ -70,7 +71,6 @@ public: Linop.HermOpAndNorm(psi, mmp, d, b); - r = src - mmp; p = r; @@ -96,38 +96,47 @@ public: << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; GridStopWatch LinalgTimer; + GridStopWatch InnerTimer; + GridStopWatch AxpyNormTimer; + GridStopWatch LinearCombTimer; GridStopWatch MatrixTimer; GridStopWatch SolverTimer; SolverTimer.Start(); int k; - for (k = 1; k <= MaxIterations; k++) { + for (k = 1; k <= MaxIterations*1000; k++) { c = cp; MatrixTimer.Start(); - Linop.HermOpAndNorm(p, mmp, d, qq); + Linop.HermOp(p, mmp); MatrixTimer.Stop(); LinalgTimer.Start(); - // RealD qqck = norm2(mmp); - // ComplexD dck = innerProduct(p,mmp); + InnerTimer.Start(); + ComplexD dc = innerProduct(p,mmp); + InnerTimer.Stop(); + d = dc.real(); a = c / d; - b_pred = a * (a * qq - d) / c; + AxpyNormTimer.Start(); cp = axpy_norm(r, -a, mmp, r); + AxpyNormTimer.Stop(); b = cp / c; - // Fuse these loops ; should be really easy - psi = a * p + psi; - p = p * b + r; - + LinearCombTimer.Start(); + auto psi_v = psi.View(); + auto p_v = p.View(); + auto r_v = r.View(); + parallel_for(int ss=0;ssoSites();ss++){ + vstream(psi_v[ss], a * p_v[ss] + psi_v[ss]); + vstream(p_v [ss], b * p_v[ss] + r_v[ss]); + } + LinearCombTimer.Stop(); LinalgTimer.Stop(); std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k - << " residual " << cp << " target " << rsq << std::endl; - std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl; - std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl; + << " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl; // Stopping condition if (cp <= rsq) { @@ -148,6 +157,9 @@ public: std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <, public: RealD Tolerance; Integer MaxIterations; + Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion int verbose; MultiShiftFunction shifts; @@ -164,6 +165,15 @@ public: axpby(psi[s],0.,-bs[s]*alpha[s],src,src); } + /////////////////////////////////////// + // Timers + /////////////////////////////////////// + GridStopWatch AXPYTimer; + GridStopWatch ShiftTimer; + GridStopWatch QRTimer; + GridStopWatch MatrixTimer; + GridStopWatch SolverTimer; + SolverTimer.Start(); // Iteration loop int k; @@ -171,7 +181,9 @@ public: for (k=1;k<=MaxIterations;k++){ a = c /cp; + AXPYTimer.Start(); axpy(p,a,p,r); + AXPYTimer.Stop(); // Note to self - direction ps is iterated seperately // for each shift. Does not appear to have any scope @@ -180,6 +192,7 @@ public: // However SAME r is used. Could load "r" and update // ALL ps[s]. 2/3 Bandwidth saving // New Kernel: Load r, vector of coeffs, vector of pointers ps + AXPYTimer.Start(); for(int s=0;s + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_DEFLATION_H +#define GRID_DEFLATION_H + +namespace Grid { + +template +class ZeroGuesser: public LinearFunction { +public: + virtual void operator()(const Field &src, Field &guess) { guess = Zero(); }; +}; + +template +class SourceGuesser: public LinearFunction { +public: + virtual void operator()(const Field &src, Field &guess) { guess = src; }; +}; + +//////////////////////////////// +// Fine grid deflation +//////////////////////////////// +template +class DeflatedGuesser: public LinearFunction { +private: + const std::vector &evec; + const std::vector &eval; + +public: + + DeflatedGuesser(const std::vector & _evec,const std::vector & _eval) : evec(_evec), eval(_eval) {}; + + virtual void operator()(const Field &src,Field &guess) { + guess = Zero(); + assert(evec.size()==eval.size()); + auto N = evec.size(); + for (int i=0;i +class LocalCoherenceDeflatedGuesser: public LinearFunction { +private: + const std::vector &subspace; + const std::vector &evec_coarse; + const std::vector &eval_coarse; +public: + + LocalCoherenceDeflatedGuesser(const std::vector &_subspace, + const std::vector &_evec_coarse, + const std::vector &_eval_coarse) + : subspace(_subspace), + evec_coarse(_evec_coarse), + eval_coarse(_eval_coarse) + { + } + + void operator()(const FineField &src,FineField &guess) { + int N = (int)evec_coarse.size(); + CoarseField src_coarse(evec_coarse[0].Grid()); + CoarseField guess_coarse(evec_coarse[0].Grid()); guess_coarse = Zero(); + blockProject(src_coarse,src,subspace); + for (int i=0;i void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) { typedef decltype(basis[0].View()) View; - std::vector basis_v(basis.size()); + auto tmp_v = basis[0].View(); + std::vector basis_v(basis.size(),tmp_v); typedef typename Field::vector_object vobj; GridBase* grid = basis[0].Grid(); @@ -63,7 +64,7 @@ void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, i thread_region { - std::vector < vobj > B(Nm); // Thread private + std::vector < vobj , commAllocator > B(Nm); // Thread private thread_loop_in_region( (int ss=0;ss < grid->oSites();ss++),{ for(int j=j0; j class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester { public: + LinearFunction &_HermOp; ImplicitlyRestartedLanczosHermOpTester(LinearFunction &HermOp) : _HermOp(HermOp) { }; int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox) @@ -250,6 +252,7 @@ class ImplicitlyRestartedLanczos { ///////////////////////// public: + ////////////////////////////////////////////////////////////////// // PAB: ////////////////////////////////////////////////////////////////// @@ -497,15 +500,13 @@ until convergence Field B(grid); B.Checkerboard() = evec[0].Checkerboard(); // power of two search pattern; not every evalue in eval2 is assessed. + int allconv =1; for(int jj = 1; jj<=Nstop; jj*=2){ int j = Nstop-jj; RealD e = eval2_copy[j]; // Discard the evalue basisRotateJ(B,evec,Qt,j,0,Nk,Nm); - if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) { - if ( j > Nconv ) { - Nconv=j+1; - jj=Nstop; // Terminate the scan - } + if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) { + allconv=0; } } // Do evec[0] for good measure @@ -513,8 +514,10 @@ until convergence int j=0; RealD e = eval2_copy[0]; basisRotateJ(B,evec,Qt,j,0,Nk,Nm); - _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox); + if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0; } + if ( allconv ) Nconv = Nstop; + // test if we converged, if so, terminate std::cout<= "<=Nstop || beta_k < betastp){ diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/Grid/algorithms/iterative/LocalCoherenceLanczos.h similarity index 71% rename from lib/algorithms/iterative/LocalCoherenceLanczos.h rename to Grid/algorithms/iterative/LocalCoherenceLanczos.h index ca5193df..40b4b347 100644 --- a/lib/algorithms/iterative/LocalCoherenceLanczos.h +++ b/Grid/algorithms/iterative/LocalCoherenceLanczos.h @@ -47,6 +47,7 @@ public: struct LocalCoherenceLanczosParams : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams, + bool, saveEvecs, bool, doFine, bool, doFineRead, bool, doCoarse, @@ -72,21 +73,24 @@ public: typedef Lattice FineField; LinearOperatorBase &_Linop; - Aggregation &_Aggregate; + std::vector &subspace; - ProjectedHermOp(LinearOperatorBase& linop, Aggregation &aggregate) : - _Linop(linop), - _Aggregate(aggregate) { }; + ProjectedHermOp(LinearOperatorBase& linop, std::vector & _subspace) : + _Linop(linop), subspace(_subspace) + { + assert(subspace.size() >0); + }; void operator()(const CoarseField& in, CoarseField& out) { + GridBase *FineGrid = subspace[0].Grid(); + int checkerboard = subspace[0].Checkerboard(); - GridBase *FineGrid = _Aggregate.FineGrid; - FineField fin(FineGrid); - FineField fout(FineGrid); + FineField fin (FineGrid); fin.Checkerboard()= checkerboard; + FineField fout(FineGrid); fout.Checkerboard() = checkerboard; - _Aggregate.PromoteFromSubspace(in,fin); std::cout< & _poly; LinearOperatorBase &_Linop; - Aggregation &_Aggregate; + std::vector &subspace; - ProjectedFunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop, - Aggregation &aggregate) : + ProjectedFunctionHermOp(OperatorFunction & poly, + LinearOperatorBase& linop, + std::vector & _subspace) : _poly(poly), _Linop(linop), - _Aggregate(aggregate) { }; + subspace(_subspace) + { }; void operator()(const CoarseField& in, CoarseField& out) { - GridBase *FineGrid = _Aggregate.FineGrid; + GridBase *FineGrid = subspace[0].Grid(); + int checkerboard = subspace[0].Checkerboard(); - FineField fin(FineGrid) ;fin.Checkerboard() =_Aggregate.Checkerboard(); - FineField fout(FineGrid);fout.Checkerboard() =_Aggregate.Checkerboard(); + FineField fin (FineGrid); fin.Checkerboard() =checkerboard; + FineField fout(FineGrid);fout.Checkerboard() =checkerboard; - _Aggregate.PromoteFromSubspace(in,fin); std::cout< & _Poly; OperatorFunction & _smoother; LinearOperatorBase &_Linop; - Aggregation &_Aggregate; RealD _coarse_relax_tol; + std::vector &_subspace; + ImplicitlyRestartedLanczosSmoothedTester(LinearFunction &Poly, OperatorFunction &smoother, LinearOperatorBase &Linop, - Aggregation &Aggregate, + std::vector &subspace, RealD coarse_relax_tol=5.0e3) - : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { }; + : _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace), + _coarse_relax_tol(coarse_relax_tol) + { }; int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) { CoarseField v(B); RealD eval_poly = eval; + // Apply operator _Poly(B,v); @@ -170,14 +181,13 @@ public: } int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) { - GridBase *FineGrid = _Aggregate.FineGrid; - - int checkerboard = _Aggregate.Checkerboard(); - + GridBase *FineGrid = _subspace[0].Grid(); + int checkerboard = _subspace[0].Checkerboard(); FineField fB(FineGrid);fB.Checkerboard() =checkerboard; FineField fv(FineGrid);fv.Checkerboard() =checkerboard; - _Aggregate.PromoteFromSubspace(B,fv); + blockPromote(B,fv,_subspace); + _smoother(_Linop,fv,fB); RealD eval_poly = eval; @@ -219,27 +229,67 @@ protected: int _checkerboard; LinearOperatorBase & _FineOp; - // FIXME replace Aggregation with vector of fine; the code reuse is too small for - // the hassle and complexity of cross coupling. - Aggregation _Aggregate; - std::vector evals_fine; - std::vector evals_coarse; - std::vector evec_coarse; + std::vector &evals_fine; + std::vector &evals_coarse; + std::vector &subspace; + std::vector &evec_coarse; + +private: + std::vector _evals_fine; + std::vector _evals_coarse; + std::vector _subspace; + std::vector _evec_coarse; + public: + LocalCoherenceLanczos(GridBase *FineGrid, GridBase *CoarseGrid, LinearOperatorBase &FineOp, int checkerboard) : _CoarseGrid(CoarseGrid), _FineGrid(FineGrid), - _Aggregate(CoarseGrid,FineGrid,checkerboard), _FineOp(FineOp), - _checkerboard(checkerboard) + _checkerboard(checkerboard), + evals_fine (_evals_fine), + evals_coarse(_evals_coarse), + subspace (_subspace), + evec_coarse(_evec_coarse) { evals_fine.resize(0); evals_coarse.resize(0); }; - void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } + ////////////////////////////////////////////////////////////////////////// + // Alternate constructore, external storage for use by Hadrons module + ////////////////////////////////////////////////////////////////////////// + LocalCoherenceLanczos(GridBase *FineGrid, + GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard, + std::vector &ext_subspace, + std::vector &ext_coarse, + std::vector &ext_eval_fine, + std::vector &ext_eval_coarse + ) : + _CoarseGrid(CoarseGrid), + _FineGrid(FineGrid), + _FineOp(FineOp), + _checkerboard(checkerboard), + evals_fine (ext_eval_fine), + evals_coarse(ext_eval_coarse), + subspace (ext_subspace), + evec_coarse (ext_coarse) + { + evals_fine.resize(0); + evals_coarse.resize(0); + }; + + void Orthogonalise(void ) { + CoarseScalar InnerProd(_CoarseGrid); + std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"< static RealD normalise(T& v) { @@ -248,43 +298,44 @@ public: v = v * (1.0/nn); return nn; } - + /* void fakeFine(void) { int Nk = nbasis; - _Aggregate.subspace.resize(Nk,_FineGrid); - _Aggregate.subspace[0]=1.0; - _Aggregate.subspace[0].Checkerboard()=_checkerboard; - normalise(_Aggregate.subspace[0]); + subspace.resize(Nk,_FineGrid); + subspace[0]=1.0; + subspace[0].Checkerboard()=_checkerboard; + normalise(subspace[0]); PlainHermOp Op(_FineOp); for(int k=1;k Op(_FineOp); ImplicitlyRestartedLanczosHermOpTester SimpleTester(Op); for(int k=0;k ChebySmooth(cheby_smooth); - ProjectedFunctionHermOp ChebyOp (ChebySmooth,_FineOp,_Aggregate); - ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + ProjectedFunctionHermOp ChebyOp (ChebySmooth,_FineOp,subspace); + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax); for(int k=0;k Op(_FineOp); evals_fine.resize(Nm); - _Aggregate.subspace.resize(Nm,_FineGrid); + subspace.resize(Nm,_FineGrid); ImplicitlyRestartedLanczos IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); FineField src(_FineGrid); src=1.0; src.Checkerboard() = _checkerboard; int Nconv; - IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); + IRL.calc(evals_fine,subspace,src,Nconv,false); // Shrink down to number saved assert(Nstop>=nbasis); assert(Nconv>=nbasis); evals_fine.resize(nbasis); - _Aggregate.subspace.resize(nbasis,_FineGrid); + subspace.resize(nbasis,_FineGrid); } void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, int Nstop, int Nk, int Nm,RealD resid, RealD MaxIt, RealD betastp, int MinRes) { Chebyshev Cheby(cheby_op); - ProjectedHermOp Op(_FineOp,_Aggregate); - ProjectedFunctionHermOp ChebyOp (Cheby,_FineOp,_Aggregate); + ProjectedHermOp Op(_FineOp,subspace); + ProjectedFunctionHermOp ChebyOp (Cheby,_FineOp,subspace); ////////////////////////////////////////////////////////////////////////////////////////////////// // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL ////////////////////////////////////////////////////////////////////////////////////////////////// Chebyshev ChebySmooth(cheby_smooth); - ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax); evals_coarse.resize(Nm); evec_coarse.resize(Nm,_CoarseGrid); diff --git a/lib/algorithms/iterative/NormalEquations.h b/Grid/algorithms/iterative/NormalEquations.h similarity index 100% rename from lib/algorithms/iterative/NormalEquations.h rename to Grid/algorithms/iterative/NormalEquations.h diff --git a/lib/algorithms/iterative/PrecConjugateResidual.h b/Grid/algorithms/iterative/PrecConjugateResidual.h similarity index 100% rename from lib/algorithms/iterative/PrecConjugateResidual.h rename to Grid/algorithms/iterative/PrecConjugateResidual.h diff --git a/lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h b/Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h similarity index 100% rename from lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h rename to Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h diff --git a/Grid/algorithms/iterative/SchurRedBlack.h b/Grid/algorithms/iterative/SchurRedBlack.h new file mode 100644 index 00000000..4b9a2ed8 --- /dev/null +++ b/Grid/algorithms/iterative/SchurRedBlack.h @@ -0,0 +1,473 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/SchurRedBlack.h + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_SCHUR_RED_BLACK_H +#define GRID_SCHUR_RED_BLACK_H + + + /* + * Red black Schur decomposition + * + * M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo) + * (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 ) + * = L D U + * + * L^-1 = (1 0 ) + * (-MoeMee^{-1} 1 ) + * L^{dag} = ( 1 Mee^{-dag} Moe^{dag} ) + * ( 0 1 ) + * L^{-d} = ( 1 -Mee^{-dag} Moe^{dag} ) + * ( 0 1 ) + * + * U^-1 = (1 -Mee^{-1} Meo) + * (0 1 ) + * U^{dag} = ( 1 0) + * (Meo^dag Mee^{-dag} 1) + * U^{-dag} = ( 1 0) + * (-Meo^dag Mee^{-dag} 1) + *********************** + * M psi = eta + *********************** + *Odd + * i) D_oo psi_o = L^{-1} eta_o + * eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) + * + * Wilson: + * (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o + * Stag: + * D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e) + * + * L^-1 eta_o= (1 0 ) (e + * (-MoeMee^{-1} 1 ) + * + *Even + * ii) Mee psi_e + Meo psi_o = src_e + * + * => sol_e = M_ee^-1 * ( src_e - Meo sol_o )... + * + * + * TODO: Other options: + * + * a) change checkerboards for Schur e<->o + * + * Left precon by Moo^-1 + * b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o + * eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e) + * + * Right precon by Moo^-1 + * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o + * eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) + * psi_o = M_oo^-1 phi_o + * TODO: Deflation + */ +namespace Grid { + + /////////////////////////////////////////////////////////////////////////////////////////////////////// + // Use base class to share code + /////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////// + // Take a matrix and form a Red Black solver calling a Herm solver + // Use of RB info prevents making SchurRedBlackSolve conform to standard interface + /////////////////////////////////////////////////////////////////////////////////////////////////////// + template class SchurRedBlackBase { + protected: + typedef CheckerBoardedSparseMatrixBase Matrix; + OperatorFunction & _HermitianRBSolver; + int CBfactorise; + bool subGuess; + public: + + SchurRedBlackBase(OperatorFunction &HermitianRBSolver, const bool initSubGuess = false) : + _HermitianRBSolver(HermitianRBSolver) + { + CBfactorise = 0; + subtractGuess(initSubGuess); + }; + void subtractGuess(const bool initSubGuess) + { + subGuess = initSubGuess; + } + bool isSubtractGuess(void) + { + return subGuess; + } + + ///////////////////////////////////////////////////////////// + // Shared code + ///////////////////////////////////////////////////////////// + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + void operator()(Matrix &_Matrix, const std::vector &in, std::vector &out) + { + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + + template + void operator()(Matrix &_Matrix, const std::vector &in, std::vector &out,Guesser &guess) + { + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + int nblock = in.size(); + + std::vector src_o(nblock,grid); + std::vector sol_o(nblock,grid); + + std::vector guess_save; + + Field resid(fgrid); + Field tmp(grid); + + //////////////////////////////////////////////// + // Prepare RedBlack source + //////////////////////////////////////////////// + for(int b=0;b + void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ + + // FIXME CGdiagonalMee not implemented virtual function + // FIXME use CBfactorise to control schur decomp + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + Field resid(fgrid); + Field src_o(grid); + Field src_e(grid); + Field sol_o(grid); + + //////////////////////////////////////////////// + // RedBlack source + //////////////////////////////////////////////// + RedBlackSource(_Matrix,in,src_e,src_o); + + //////////////////////////////// + // Construct the guess + //////////////////////////////// + Field tmp(grid); + guess(src_o,sol_o); + + Field guess_save(grid); + guess_save = sol_o; + + ////////////////////////////////////////////////////////////// + // Call the red-black solver + ////////////////////////////////////////////////////////////// + RedBlackSolve(_Matrix,src_o,sol_o); + + //////////////////////////////////////////////// + // Fionn A2A boolean behavioural control + //////////////////////////////////////////////// + if (subGuess) sol_o= sol_o-guess_save; + + /////////////////////////////////////////////////// + // RedBlack solution needs the even source + /////////////////////////////////////////////////// + RedBlackSolution(_Matrix,sol_o,src_e,out); + + // Verify the unprec residual + if ( ! subGuess ) { + _Matrix.M(out,resid); + resid = resid-in; + RealD ns = norm2(in); + RealD nr = norm2(resid); + + std::cout< &src_o, std::vector &sol_o)=0; + + }; + + template class SchurRedBlackStaggeredSolve : public SchurRedBlackBase { + public: + typedef CheckerBoardedSparseMatrixBase Matrix; + + SchurRedBlackStaggeredSolve(OperatorFunction &HermitianRBSolver, const bool initSubGuess = false) + : SchurRedBlackBase (HermitianRBSolver,initSubGuess) + { + } + + ////////////////////////////////////////////////////// + // Override RedBlack specialisation + ////////////////////////////////////////////////////// + virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) + { + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + Field tmp(grid); + Field Mtmp(grid); + + pickCheckerboard(Even,src_e,src); + pickCheckerboard(Odd ,src_o,src); + + ///////////////////////////////////////////////////// + // src_o = (source_o - Moe MeeInv source_e) + ///////////////////////////////////////////////////// + _Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even); + _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd); + tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd); + + _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm. + } + virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol) + { + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + Field tmp(grid); + Field sol_e(grid); + Field src_e(grid); + + src_e = src_e_c; // Const correctness + + /////////////////////////////////////////////////// + // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... + /////////////////////////////////////////////////// + _Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even); + src_e = src_e-tmp; assert( src_e.Checkerboard() ==Even); + _Matrix.MooeeInv(src_e,sol_e); assert( sol_e.Checkerboard() ==Even); + + setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even); + setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd ); + } + virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) + { + SchurStaggeredOperator _HermOpEO(_Matrix); + this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.Checkerboard()==Odd); + }; + virtual void RedBlackSolve (Matrix & _Matrix,const std::vector &src_o, std::vector &sol_o) + { + SchurStaggeredOperator _HermOpEO(_Matrix); + this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); + } + }; + template using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve; + + /////////////////////////////////////////////////////////////////////////////////////////////////////// + // Site diagonal has Mooee on it. + /////////////////////////////////////////////////////////////////////////////////////////////////////// + template class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase { + public: + typedef CheckerBoardedSparseMatrixBase Matrix; + + SchurRedBlackDiagMooeeSolve(OperatorFunction &HermitianRBSolver, const bool initSubGuess = false) + : SchurRedBlackBase (HermitianRBSolver,initSubGuess) {}; + + + ////////////////////////////////////////////////////// + // Override RedBlack specialisation + ////////////////////////////////////////////////////// + virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) + { + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + Field tmp(grid); + Field Mtmp(grid); + + pickCheckerboard(Even,src_e,src); + pickCheckerboard(Odd ,src_o,src); + + ///////////////////////////////////////////////////// + // src_o = Mdag * (source_o - Moe MeeInv source_e) + ///////////////////////////////////////////////////// + _Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even); + _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd); + tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd); + + // get the right MpcDag + SchurDiagMooeeOperator _HermOpEO(_Matrix); + _HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd); + + } + virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) + { + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + Field tmp(grid); + Field sol_e(grid); + Field src_e_i(grid); + /////////////////////////////////////////////////// + // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... + /////////////////////////////////////////////////// + _Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even); + src_e_i = src_e-tmp; assert( src_e_i.Checkerboard() ==Even); + _Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.Checkerboard() ==Even); + + setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even); + setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd ); + } + virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) + { + SchurDiagMooeeOperator _HermOpEO(_Matrix); + this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.Checkerboard()==Odd); + }; + virtual void RedBlackSolve (Matrix & _Matrix,const std::vector &src_o, std::vector &sol_o) + { + SchurDiagMooeeOperator _HermOpEO(_Matrix); + this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); + } + }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////// + // Site diagonal is identity, right preconditioned by Mee^inv + // ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta + //=> psi = MeeInv phi + /////////////////////////////////////////////////////////////////////////////////////////////////////// + template class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase { + public: + typedef CheckerBoardedSparseMatrixBase Matrix; + + ///////////////////////////////////////////////////// + // Wrap the usual normal equations Schur trick + ///////////////////////////////////////////////////// + SchurRedBlackDiagTwoSolve(OperatorFunction &HermitianRBSolver, const bool initSubGuess = false) + : SchurRedBlackBase(HermitianRBSolver,initSubGuess) {}; + + virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) + { + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + SchurDiagTwoOperator _HermOpEO(_Matrix); + + Field tmp(grid); + Field Mtmp(grid); + + pickCheckerboard(Even,src_e,src); + pickCheckerboard(Odd ,src_o,src); + + ///////////////////////////////////////////////////// + // src_o = Mdag * (source_o - Moe MeeInv source_e) + ///////////////////////////////////////////////////// + _Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even); + _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd); + tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd); + + // get the right MpcDag + _HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd); + } + + virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) + { + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + Field sol_o_i(grid); + Field tmp(grid); + Field sol_e(grid); + + //////////////////////////////////////////////// + // MooeeInv due to pecond + //////////////////////////////////////////////// + _Matrix.MooeeInv(sol_o,tmp); + sol_o_i = tmp; + + /////////////////////////////////////////////////// + // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... + /////////////////////////////////////////////////// + _Matrix.Meooe(sol_o_i,tmp); assert( tmp.Checkerboard() ==Even); + tmp = src_e-tmp; assert( src_e.Checkerboard() ==Even); + _Matrix.MooeeInv(tmp,sol_e); assert( sol_e.Checkerboard() ==Even); + + setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even); + setCheckerboard(sol,sol_o_i); assert( sol_o_i.Checkerboard() ==Odd ); + }; + + virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) + { + SchurDiagTwoOperator _HermOpEO(_Matrix); + this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); + }; + virtual void RedBlackSolve (Matrix & _Matrix,const std::vector &src_o, std::vector &sol_o) + { + SchurDiagTwoOperator _HermOpEO(_Matrix); + this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); + } + }; +} +#endif diff --git a/lib/allocator/AlignedAllocator.cc b/Grid/allocator/AlignedAllocator.cc similarity index 100% rename from lib/allocator/AlignedAllocator.cc rename to Grid/allocator/AlignedAllocator.cc diff --git a/lib/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h similarity index 99% rename from lib/allocator/AlignedAllocator.h rename to Grid/allocator/AlignedAllocator.h index f55fe71b..ed1fbec2 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -219,7 +219,6 @@ public: if ( __freeme ) free((void *)__freeme); #endif #endif - } void construct(pointer __p, const _Tp& __val) { }; void construct(pointer __p) { }; diff --git a/lib/cartesian/Cartesian.h b/Grid/cartesian/Cartesian.h similarity index 100% rename from lib/cartesian/Cartesian.h rename to Grid/cartesian/Cartesian.h diff --git a/lib/cartesian/Cartesian_base.h b/Grid/cartesian/Cartesian_base.h similarity index 99% rename from lib/cartesian/Cartesian_base.h rename to Grid/cartesian/Cartesian_base.h index 813ea548..76abe0ee 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/Grid/cartesian/Cartesian_base.h @@ -60,6 +60,7 @@ public: virtual ~GridBase() = default; + // Physics Grid information. Coordinate _simd_layout;// Which dimensions get relayed out over simd lanes. Coordinate _fdimensions;// (full) Global dimensions of array prior to cb removal @@ -79,6 +80,8 @@ public: Coordinate _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d] Coordinate _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 + bool _isCheckerBoarded; + public: //////////////////////////////////////////////////////////////// diff --git a/lib/cartesian/Cartesian_full.h b/Grid/cartesian/Cartesian_full.h similarity index 97% rename from lib/cartesian/Cartesian_full.h rename to Grid/cartesian/Cartesian_full.h index 5aa0747b..c083817b 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/Grid/cartesian/Cartesian_full.h @@ -96,6 +96,7 @@ public: /////////////////////// // Grid information /////////////////////// + _isCheckerBoarded = false; _ndimension = dimensions.size(); _fdimensions.resize(_ndimension); @@ -121,6 +122,7 @@ public: // Use a reduced simd grid _ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions + //std::cout << _ldimensions[d] << " " << _gdimensions[d] << " " << _processors[d] << std::endl; assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition @@ -165,6 +167,7 @@ public: block = block * _rdimensions[d]; } }; + }; NAMESPACE_END(Grid); diff --git a/lib/cartesian/Cartesian_red_black.h b/Grid/cartesian/Cartesian_red_black.h similarity index 99% rename from lib/cartesian/Cartesian_red_black.h rename to Grid/cartesian/Cartesian_red_black.h index 3cde6b7f..34f763d2 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/Grid/cartesian/Cartesian_red_black.h @@ -140,9 +140,8 @@ public: const Coordinate &checker_dim_mask, int checker_dim) { - /////////////////////// - // Grid information - /////////////////////// + + _isCheckerBoarded = true; _checker_dim = checker_dim; assert(checker_dim_mask[checker_dim] == 1); _ndimension = dimensions.size(); diff --git a/lib/communicator/Communicator.h b/Grid/communicator/Communicator.h similarity index 100% rename from lib/communicator/Communicator.h rename to Grid/communicator/Communicator.h diff --git a/lib/communicator/Communicator_base.cc b/Grid/communicator/Communicator_base.cc similarity index 100% rename from lib/communicator/Communicator_base.cc rename to Grid/communicator/Communicator_base.cc diff --git a/lib/communicator/Communicator_base.h b/Grid/communicator/Communicator_base.h similarity index 99% rename from lib/communicator/Communicator_base.h rename to Grid/communicator/Communicator_base.h index 0eebd305..11dbfcbb 100644 --- a/lib/communicator/Communicator_base.h +++ b/Grid/communicator/Communicator_base.h @@ -83,6 +83,7 @@ private: public: + //////////////////////////////////////////////////////////////////////////////////////// // Wraps MPI_Cart routines, or implements equivalent on other impls //////////////////////////////////////////////////////////////////////////////////////// diff --git a/lib/communicator/Communicator_mpi3.cc b/Grid/communicator/Communicator_mpi3.cc similarity index 92% rename from lib/communicator/Communicator_mpi3.cc rename to Grid/communicator/Communicator_mpi3.cc index c13bc2e4..4d02c7b9 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/Grid/communicator/Communicator_mpi3.cc @@ -44,11 +44,15 @@ void CartesianCommunicator::Init(int *argc, char ***argv) MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); - //assert (provided == MPI_THREAD_MULTIPLE); + // assert (provided == MPI_THREAD_MULTIPLE); + //If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE + if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) || + (nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) ) { + assert(0); + } } - Grid_quiesce_nodes(); - + // Never clean up as done once. MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); GlobalSharedMemory::Init(communicator_world); @@ -84,9 +88,17 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, Coordinate &coor) CartesianCommunicator::CartesianCommunicator(const Coordinate &processors) { MPI_Comm optimal_comm; - GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine + //////////////////////////////////////////////////// + // Remap using the shared memory optimising routine + // The remap creates a comm which must be freed + //////////////////////////////////////////////////// + GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); InitFromMPICommunicator(processors,optimal_comm); SetCommunicator(optimal_comm); + /////////////////////////////////////////////////// + // Free the temp communicator + /////////////////////////////////////////////////// + MPI_Comm_free(&optimal_comm); } ////////////////////////////////// @@ -111,10 +123,8 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const // split the communicator ////////////////////////////////////////////////////////////////////////////////////////////////////// // int Nparent = parent._processors ; - // std::cout << " splitting from communicator "< /* END LEGAL */ #include +#include #ifdef GRID_NVCC #include @@ -120,19 +121,150 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm) assert(WorldNode!=-1); _ShmSetup=1; } - -void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm) +// Gray encode support +int BinaryToGray (int binary) { + int gray = (binary>>1)^binary; + return gray; +} +int Log2Size(int TwoToPower,int MAXLOG2) { - //////////////////////////////////////////////////////////////// - // Assert power of two shm_size. - //////////////////////////////////////////////////////////////// int log2size = -1; - for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){ - if ( (0x1< HyperCubeCoords(maxhdim,0); + std::vector RootHyperCubeCoords(maxhdim,0); + int R; + int I; + int N; + const int namelen = _POSIX_HOST_NAME_MAX; + char name[namelen]; + + // Parse ICE-XA hostname to get hypercube location + gethostname(name,namelen); + int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ; + assert(nscan==3); + + int nlo = N%9; + int nhi = N/9; + uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ; + uint32_t rootcoor = hypercoor; + + ////////////////////////////////////////////////////////////////// + // Print debug info + ////////////////////////////////////////////////////////////////// + for(int d=0;d>d)&0x1; + } + + std::string hname(name); + std::cout << "hostname "<=0); + + ////////////////////////////////////// + // Printing + ////////////////////////////////////// + for(int d=0;d>d)&0x1; + } + + //////////////////////////////////////////////////////////////// + // Identify subblock of ranks on node spreading across dims + // in a maximally symmetrical way + //////////////////////////////////////////////////////////////// + int ndimension = processors.size(); + std::vector processor_coor(ndimension); + std::vector WorldDims = processors; std::vector ShmDims (ndimension,1); std::vector NodeDims (ndimension); + std::vector ShmCoor (ndimension); std::vector NodeCoor (ndimension); std::vector WorldCoor(ndimension); + std::vector HyperCoor(ndimension); + int dim = 0; + for(int l2=0;l2> bits; + } + //////////////////////////////////////////////////////////////// + // Check processor counts match + //////////////////////////////////////////////////////////////// + int Nprocessors=1; + for(int i=0;i shmids(WorldShmSize); + + if ( WorldShmRank == 0 ) { + for(int r=0;rpw_name,WorldNode,r); shm_unlink(shm_name); int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); @@ -349,7 +589,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) #endif void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); - if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); } + // std::cout << "Set WorldShmCommBufs["<pw_name,WorldNode,r); int fd=shm_open(shm_name,O_RDWR,0666); if ( fd<0 ) { perror("failed shm_open"); assert(0); } @@ -430,7 +675,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ; - MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,comm); + MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm); ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr]; } @@ -504,6 +749,13 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) return (void *) remote; } } +SharedMemory::~SharedMemory() +{ + int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised); + if ( !MPI_is_finalised ) { + MPI_Comm_free(&ShmComm); + } +}; NAMESPACE_END(Grid); diff --git a/lib/communicator/SharedMemoryNone.cc b/Grid/communicator/SharedMemoryNone.cc similarity index 99% rename from lib/communicator/SharedMemoryNone.cc rename to Grid/communicator/SharedMemoryNone.cc index ba705112..ed37ab47 100644 --- a/lib/communicator/SharedMemoryNone.cc +++ b/Grid/communicator/SharedMemoryNone.cc @@ -122,6 +122,8 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) { return NULL; } +SharedMemory::~SharedMemory() +{}; NAMESPACE_END(Grid); diff --git a/lib/cshift/Cshift.h b/Grid/cshift/Cshift.h similarity index 100% rename from lib/cshift/Cshift.h rename to Grid/cshift/Cshift.h diff --git a/lib/cshift/Cshift_common.h b/Grid/cshift/Cshift_common.h similarity index 83% rename from lib/cshift/Cshift_common.h rename to Grid/cshift/Cshift_common.h index 8c429723..df606657 100644 --- a/lib/cshift/Cshift_common.h +++ b/Grid/cshift/Cshift_common.h @@ -23,10 +23,9 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#ifndef _GRID_CSHIFT_COMMON_H_ -#define _GRID_CSHIFT_COMMON_H_ + *************************************************************************************/ + /* END LEGAL */ +#pragma once NAMESPACE_BEGIN(Grid); @@ -45,40 +44,44 @@ Gather_plane_simple (const Lattice &rhs,commVector &buffer,int dimen int so=plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane int e1=rhs.Grid()->_slice_nblock[dimension]; int e2=rhs.Grid()->_slice_block[dimension]; + int ent = 0; + static Vector > table; table.resize(e1*e2); int stride=rhs.Grid()->_slice_stride[dimension]; + auto rhs_v = rhs.View(); if ( cbmask == 0x3 ) { - thread_loop_collapse2( (int n=0;n > table; for(int n=0;nCheckerBoardFromOindex(o+b); - if ( ocb &cbmask ) { - table.push_back(std::pair (bo++,o+b)); - } + int bo = n*e2; + table[ent++] = std::pair(off+bo+b,so+o+b); } } - thread_loop( (int i=0;iCheckerBoardFromOindex(o+b); + if ( ocb &cbmask ) { + table[ent++]=std::pair (off+bo++,so+o+b); + } + } + } } + thread_loop( (int i=0;i void -Gather_plane_extract(const Lattice &rhs,ExtractPointerArray pointers,int dimension,int plane,int cbmask) +Gather_plane_extract(const Lattice &rhs, + ExtractPointerArray pointers, + int dimension,int plane,int cbmask) { int rd = rhs.Grid()->_rdimensions[dimension]; @@ -102,7 +105,6 @@ Gather_plane_extract(const Lattice &rhs,ExtractPointerArray(temp,pointers,offset); - } }); } else { @@ -142,31 +144,37 @@ template void Scatter_plane_simple (Lattice &rhs,commVector_slice_nblock[dimension]; int e2=rhs.Grid()->_slice_block[dimension]; int stride=rhs.Grid()->_slice_stride[dimension]; - auto rhs_v = rhs.View(); + + static std::vector > table; table.resize(e1*e2); + int ent =0; + if ( cbmask ==0x3 ) { - thread_loop_collapse2( (int n=0;n_slice_stride[dimension]; int bo =n*rhs.Grid()->_slice_block[dimension]; - rhs_v[so+o+b]=buffer[bo+b]; + table[ent++] = std::pair(so+o+b,bo+b); } - }); + } + } else { - std::vector > table; int bo=0; for(int n=0;n_slice_stride[dimension]; int ocb=1<CheckerBoardFromOindex(o+b);// Could easily be a table lookup if ( ocb & cbmask ) { - table.push_back(std::pair (so+o+b,bo++)); + table[ent++]=std::pair (so+o+b,bo++); } } } - thread_loop( (int i=0;i void Scatter_plane_merge(Lattice &rhs,ExtractPointerA int e1=rhs.Grid()->_slice_nblock[dimension]; int e2=rhs.Grid()->_slice_block[dimension]; - auto rhs_v = rhs.View(); if(cbmask ==0x3 ) { + auto rhs_v = rhs.View(); thread_loop_collapse2( (int n=0;n_slice_stride[dimension]; @@ -198,9 +206,9 @@ template void Scatter_plane_merge(Lattice &rhs,ExtractPointerA // Case of SIMD split AND checker dim cannot currently be hit, except in // Test_cshift_red_black code. - // std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl; - // think this is buggy FIXME + // std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<_slice_stride[dimension]; @@ -225,40 +233,44 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs cbmask=0x3; } - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); - int ro = rplane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane int lo = lplane*lhs.Grid()->_ostride[dimension]; // base offset for start of plane int e1=rhs.Grid()->_slice_nblock[dimension]; // clearly loop invariant for icpc int e2=rhs.Grid()->_slice_block[dimension]; int stride = rhs.Grid()->_slice_stride[dimension]; + static std::vector > table; table.resize(e1*e2); + int ent=0; + if(cbmask == 0x3 ){ - thread_loop_collapse2((int n=0;n(lo+o,ro+o); } - }); + } } else { - thread_loop_collapse2( (int n=0;nCheckerBoardFromOindex(o); if ( ocb&cbmask ) { - vstream(lhs_v[lo+o],rhs_v[ro+o]); + table[ent++] = std::pair(lo+o,ro+o); } } - }); + } } - + + auto rhs_v = rhs.View(); + auto lhs_v = lhs.View(); + thread_loop( (int i=0;i void Copy_plane_permute(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type) { - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); int rd = rhs.Grid()->_rdimensions[dimension]; @@ -273,15 +285,29 @@ template void Copy_plane_permute(Lattice& lhs,const Lattice_slice_block [dimension]; int stride = rhs.Grid()->_slice_stride[dimension]; - thread_loop_collapse2( (int n=0;n > table; table.resize(e1*e2); + int ent=0; + double t_tab,t_perm; + if ( cbmask == 0x3 ) { + for(int n=0;n(lo+o+b,ro+o+b); + }} + } else { + for(int n=0;nCheckerBoardFromOindex(o+b); - if ( ocb&cbmask ) { - permute(lhs_v[lo+o+b],rhs_v[ro+o+b],permute_type); - } - } + if ( ocb&cbmask ) table[ent++] = std::pair(lo+o+b,ro+o+b); + }} + } + + auto rhs_v = rhs.View(); + auto lhs_v = lhs.View(); + thread_loop( (int i=0;i void Cshift_local(Lattice& ret,const Lattice &r sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even); sshift[1] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Odd); + double t_local; + if ( sshift[0] == sshift[1] ) { Cshift_local(ret,rhs,dimension,shift,0x3); } else { @@ -323,17 +351,13 @@ template void Cshift_local(Lattice &ret,const Lattice &r for(int x=0;x_ostride[dimension]; int cb= (cbmask==0x2)? Odd : Even; int sshift = grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb); int sx = (x+sshift)%rd; - - // FIXME : This must change where we have a - // Rotate slice. - // Document how this works ; why didn't I do this when I first wrote it... // wrap is whether sshift > rd. // num is sshift mod rd. // @@ -369,11 +393,8 @@ template void Cshift_local(Lattice &ret,const Lattice &r if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist); else Copy_plane(ret,rhs,dimension,x,sx,cbmask); - } } - NAMESPACE_END(Grid); -#endif diff --git a/lib/cshift/Cshift_mpi.h b/Grid/cshift/Cshift_mpi.h similarity index 92% rename from lib/cshift/Cshift_mpi.h rename to Grid/cshift/Cshift_mpi.h index 1ee2233c..0f0e80b1 100644 --- a/lib/cshift/Cshift_mpi.h +++ b/Grid/cshift/Cshift_mpi.h @@ -54,13 +54,13 @@ template Lattice Cshift(const Lattice &rhs,int dimension if ( !comm_dim ) { - // std::cout << "Cshift_local" < void Cshift_comms_simd(Lattice& ret,const LatticeCheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even); sshift[1] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Odd); + //std::cout << "Cshift_comms_simd dim "< void Cshift_comms_simd(Lattice &ret,const Lattice_simd_layout[dimension]; int comm_dim = grid->_processors[dimension] >1 ; + //std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<=0); diff --git a/lib/cshift/Cshift_none.h b/Grid/cshift/Cshift_none.h similarity index 100% rename from lib/cshift/Cshift_none.h rename to Grid/cshift/Cshift_none.h diff --git a/lib/json/json.hpp b/Grid/json/json.hpp similarity index 62% rename from lib/json/json.hpp rename to Grid/json/json.hpp index 3f5c2b19..c8b0cc9e 100644 --- a/lib/json/json.hpp +++ b/Grid/json/json.hpp @@ -1,11 +1,12 @@ /* __ _____ _____ _____ __| | __| | | | JSON for Modern C++ -| | |__ | | | | | | version 2.1.1 +| | |__ | | | | | | version 3.2.0 |_____|_____|_____|_|___| https://github.com/nlohmann/json Licensed under the MIT License . -Copyright (c) 2013-2017 Niels Lohmann . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2018 Niels Lohmann . Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -29,42 +30,104 @@ SOFTWARE. #ifndef NLOHMANN_JSON_HPP #define NLOHMANN_JSON_HPP -#include // all_of, copy, fill, find, for_each, generate_n, none_of, remove, reverse, transform -#include // array +#define NLOHMANN_JSON_VERSION_MAJOR 3 +#define NLOHMANN_JSON_VERSION_MINOR 2 +#define NLOHMANN_JSON_VERSION_PATCH 0 + +#include // all_of, find, for_each #include // assert #include // and, not, or -#include // lconv, localeconv -#include // isfinite, labs, ldexp, signbit #include // nullptr_t, ptrdiff_t, size_t -#include // int64_t, uint64_t -#include // abort, strtod, strtof, strtold, strtoul, strtoll, strtoull -#include // memcpy, strlen -#include // forward_list -#include // function, hash, less +#include // hash, less #include // initializer_list -#include // hex -#include // istream, ostream -#include // advance, begin, back_inserter, bidirectional_iterator_tag, distance, end, inserter, iterator, iterator_traits, next, random_access_iterator_tag, reverse_iterator -#include // numeric_limits -#include // locale -#include // map -#include // addressof, allocator, allocator_traits, unique_ptr +#include // istream, ostream +#include // iterator_traits, random_access_iterator_tag #include // accumulate -#include // stringstream -#include // getline, stoi, string, to_string -#include // add_pointer, conditional, decay, enable_if, false_type, integral_constant, is_arithmetic, is_base_of, is_const, is_constructible, is_convertible, is_default_constructible, is_enum, is_floating_point, is_integral, is_nothrow_move_assignable, is_nothrow_move_constructible, is_pointer, is_reference, is_same, is_scalar, is_signed, remove_const, remove_cv, remove_pointer, remove_reference, true_type, underlying_type -#include // declval, forward, make_pair, move, pair, swap -#include // valarray +#include // string, stoi, to_string +#include // declval, forward, move, pair, swap + +// #include +#ifndef NLOHMANN_JSON_FWD_HPP +#define NLOHMANN_JSON_FWD_HPP + +#include // int64_t, uint64_t +#include // map +#include // allocator +#include // string #include // vector +/*! +@brief namespace for Niels Lohmann +@see https://github.com/nlohmann +@since version 1.0.0 +*/ +namespace nlohmann +{ +/*! +@brief default JSONSerializer template argument + +This serializer ignores the template arguments and uses ADL +([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) +for serialization. +*/ +template +struct adl_serializer; + +template class ObjectType = + std::map, + template class ArrayType = std::vector, + class StringType = std::string, class BooleanType = bool, + class NumberIntegerType = std::int64_t, + class NumberUnsignedType = std::uint64_t, + class NumberFloatType = double, + template class AllocatorType = std::allocator, + template class JSONSerializer = + adl_serializer> +class basic_json; + +/*! +@brief JSON Pointer + +A JSON pointer defines a string syntax for identifying a specific value +within a JSON document. It can be used with functions `at` and +`operator[]`. Furthermore, JSON pointers are the base for JSON patches. + +@sa [RFC 6901](https://tools.ietf.org/html/rfc6901) + +@since version 2.0.0 +*/ +template +class json_pointer; + +/*! +@brief default JSON class + +This type is the default specialization of the @ref basic_json class which +uses the standard template types. + +@since version 1.0.0 +*/ +using json = basic_json<>; +} + +#endif + +// #include + + +// This file contains all internal macro definitions +// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them + // exclude unsupported compilers -#if defined(__clang__) - #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 - #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" - #endif -#elif defined(__GNUC__) - #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40805 - #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" +#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) + #if defined(__clang__) + #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 + #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 + #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" + #endif #endif #endif @@ -90,14 +153,36 @@ SOFTWARE. #endif // allow to disable exceptions -#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && not defined(JSON_NOEXCEPTION) +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) #define JSON_THROW(exception) throw exception #define JSON_TRY try #define JSON_CATCH(exception) catch(exception) + #define JSON_INTERNAL_CATCH(exception) catch(exception) #else #define JSON_THROW(exception) std::abort() #define JSON_TRY if(true) #define JSON_CATCH(exception) if(false) + #define JSON_INTERNAL_CATCH(exception) if(false) +#endif + +// override exception macros +#if defined(JSON_THROW_USER) + #undef JSON_THROW + #define JSON_THROW JSON_THROW_USER +#endif +#if defined(JSON_TRY_USER) + #undef JSON_TRY + #define JSON_TRY JSON_TRY_USER +#endif +#if defined(JSON_CATCH_USER) + #undef JSON_CATCH + #define JSON_CATCH JSON_CATCH_USER + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_CATCH_USER +#endif +#if defined(JSON_INTERNAL_CATCH_USER) + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER #endif // manual branch prediction @@ -109,31 +194,16 @@ SOFTWARE. #define JSON_UNLIKELY(x) x #endif -/*! -@brief namespace for Niels Lohmann -@see https://github.com/nlohmann -@since version 1.0.0 -*/ -namespace nlohmann -{ -template -struct adl_serializer; +// C++ language standard detection +#if (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 +#elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) + #define JSON_HAS_CPP_14 +#endif -// forward declaration of basic_json (required to split the class) -template class ObjectType = - std::map, - template class ArrayType = std::vector, - class StringType = std::string, class BooleanType = bool, - class NumberIntegerType = std::int64_t, - class NumberUnsignedType = std::uint64_t, - class NumberFloatType = double, - template class AllocatorType = std::allocator, - template class JSONSerializer = - adl_serializer> -class basic_json; - -// Ugly macros to avoid uglier copy-paste when specializing basic_json -// This is only temporary and will be removed in 3.0 +// Ugly macros to avoid uglier copy-paste when specializing basic_json. They +// may be removed in the future once the class is split. #define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ template class ObjectType, \ @@ -148,17 +218,409 @@ class basic_json; NumberIntegerType, NumberUnsignedType, NumberFloatType, \ AllocatorType, JSONSerializer> +// #include + +#include // not +#include // size_t +#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type + +namespace nlohmann +{ +namespace detail +{ +// alias templates to reduce boilerplate +template +using enable_if_t = typename std::enable_if::type; + +template +using uncvref_t = typename std::remove_cv::type>::type; + +// implementation of C++14 index_sequence and affiliates +// source: https://stackoverflow.com/a/32223343 +template +struct index_sequence +{ + using type = index_sequence; + using value_type = std::size_t; + static constexpr std::size_t size() noexcept + { + return sizeof...(Ints); + } +}; + +template +struct merge_and_renumber; + +template +struct merge_and_renumber, index_sequence> + : index_sequence < I1..., (sizeof...(I1) + I2)... > {}; + +template +struct make_index_sequence + : merge_and_renumber < typename make_index_sequence < N / 2 >::type, + typename make_index_sequence < N - N / 2 >::type > {}; + +template<> struct make_index_sequence<0> : index_sequence<> {}; +template<> struct make_index_sequence<1> : index_sequence<0> {}; + +template +using index_sequence_for = make_index_sequence; + +// dispatch utility (taken from ranges-v3) +template struct priority_tag : priority_tag < N - 1 > {}; +template<> struct priority_tag<0> {}; + +// taken from ranges-v3 +template +struct static_const +{ + static constexpr T value{}; +}; + +template +constexpr T static_const::value; +} +} + +// #include + + +#include // not +#include // numeric_limits +#include // false_type, is_constructible, is_integral, is_same, true_type +#include // declval + +// #include + +// #include + +// #include + + +#include + +// #include + + +namespace nlohmann +{ +namespace detail +{ +template struct make_void +{ + using type = void; +}; +template using void_t = typename make_void::type; +} +} + + +// http://en.cppreference.com/w/cpp/experimental/is_detected +namespace nlohmann +{ +namespace detail +{ +struct nonesuch +{ + nonesuch() = delete; + ~nonesuch() = delete; + nonesuch(nonesuch const&) = delete; + void operator=(nonesuch const&) = delete; +}; + +template class Op, + class... Args> +struct detector +{ + using value_t = std::false_type; + using type = Default; +}; + +template class Op, class... Args> +struct detector>, Op, Args...> +{ + using value_t = std::true_type; + using type = Op; +}; + +template