diff --git a/lib/Algorithms.h b/lib/Algorithms.h index 0252606f..2007cf60 100644 --- a/lib/Algorithms.h +++ b/lib/Algorithms.h @@ -5,14 +5,17 @@ #include #include +#include +#include +#include +#include + #include #include #include #include -#include -#include -#include +#include // Eigen/lanczos // EigCg diff --git a/lib/Make.inc b/lib/Make.inc index 069e818d..c38606e2 100644 --- a/lib/Make.inc +++ b/lib/Make.inc @@ -1,4 +1,4 @@ -HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h +HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h -CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc +CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index ee0b2ec1..1cf78501 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -207,6 +207,14 @@ namespace Grid { virtual void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) = 0; }; + ///////////////////////////////////////////////////////////// + // Base classes for Multishift solvers for operators + ///////////////////////////////////////////////////////////// + template class OperatorMultiFunction { + public: + virtual void operator() (LinearOperatorBase &Linop, const Field &in, std::vector &out) = 0; + }; + // FIXME : To think about // Chroma functionality list defining LinearOperator diff --git a/lib/algorithms/approx/MultiShiftFunction.cc b/lib/algorithms/approx/MultiShiftFunction.cc new file mode 100644 index 00000000..8f62dffd --- /dev/null +++ b/lib/algorithms/approx/MultiShiftFunction.cc @@ -0,0 +1,29 @@ +#include + +namespace Grid { +double MultiShiftFunction::approx(double x) +{ + double a = norm; + for(int n=0;n poles; + std::vector residues; + std::vector tolerances; + RealD norm; + RealD lo,hi; + MultiShiftFunction(int n,RealD _lo,RealD _hi): poles(n), residues(n), lo(_lo), hi(_hi) {;}; + RealD approx(RealD x); + void csv(std::ostream &out); + void gnuplot(std::ostream &out); + MultiShiftFunction(AlgRemez & remez,double tol,bool inverse) : + order(remez.getDegree()), + tolerances(remez.getDegree(),tol), + poles(remez.getDegree()), + residues(remez.getDegree()) + { + remez.getBounds(lo,hi); + if ( inverse ) remez.getIPFE (&residues[0],&poles[0],&norm); + else remez.getPFE (&residues[0],&poles[0],&norm); + } +}; +} +#endif diff --git a/lib/algorithms/approx/Remez.h b/lib/algorithms/approx/Remez.h index 906811be..336382d2 100644 --- a/lib/algorithms/approx/Remez.h +++ b/lib/algorithms/approx/Remez.h @@ -125,8 +125,17 @@ class AlgRemez // Destructor virtual ~AlgRemez(); + int getDegree(void){ + assert(n==d); + return n; + } // Reset the bounds of the approximation void setBounds(double lower, double upper); + // Reset the bounds of the approximation + void getBounds(double &lower, double &upper) { + lower=(double)apstrt; + upper=(double)apend; + } // Generate the rational approximation x^(pnum/pden) double generateApprox(int num_degree, int den_degree, diff --git a/lib/algorithms/iterative/ConjugateGradientMultiShift.h b/lib/algorithms/iterative/ConjugateGradientMultiShift.h new file mode 100644 index 00000000..580ab838 --- /dev/null +++ b/lib/algorithms/iterative/ConjugateGradientMultiShift.h @@ -0,0 +1,250 @@ +#ifndef GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H +#define GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H + +namespace Grid { + + ///////////////////////////////////////////////////////////// + // Base classes for iterative processes based on operators + // single input vec, single output vec. + ///////////////////////////////////////////////////////////// + + template + class ConjugateGradientMultiShift : public OperatorMultiFunction, + public OperatorFunction + { +public: + RealD Tolerance; + Integer MaxIterations; + int verbose; + MultiShiftFunction shifts; + + ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) : + MaxIterations(maxit), + shifts(_shifts) + { + verbose=1; + } + +void operator() (LinearOperatorBase &Linop, const Field &src, Field &psi) +{ + + GridBase *grid = src._grid; + int nshift = shifts.order; + std::vector results(nshift,grid); + + (*this)(Linop,src,results); + + psi = shifts.norm*src; + for(int i=0;i &Linop, const Field &src, std::vector &psi) +{ + + GridBase *grid = src._grid; + + //////////////////////////////////////////////////////////////////////// + // Convenience references to the info stored in "MultiShiftFunction" + //////////////////////////////////////////////////////////////////////// + int nshift = shifts.order; + + std::vector &mass(shifts.poles); // Make references to array in "shifts" + std::vector &mresidual(shifts.tolerances); + std::vector alpha(nshift,1.0); + std::vector ps(nshift,grid);// Search directions + + assert(psi.size()==nshift); + assert(mass.size()==nshift); + assert(mresidual.size()==nshift); + + // dynamic sized arrays on stack; 2d is a pain with vector + RealD bs[nshift]; + RealD rsq[nshift]; + RealD z[nshift][2]; + int converged[nshift]; + + const int primary =0; + + //Primary shift fields CG iteration + RealD a,b,c,d; + RealD cp,bp,qq; //prev + + // Matrix mult fields + Field r(grid); + Field p(grid); + Field tmp(grid); + Field mmp(grid); + + // Check lightest mass + for(int s=0;s= mass[primary] ); + converged[s]=0; + } + + // Wire guess to zero + // Residuals "r" are src + // First search direction "p" is also src + cp = norm2(src); + for(int s=0;s 2+Ls, so ~ 3x saving + // Pipelined CG gain: + // + // New Kernel: Load r, vector of coeffs, vector of pointers ps + // New Kernel: Load psi[0], vector of coeffs, vector of pointers ps + // If can predict the coefficient bs then we can fuse these and avoid write reread cyce + // on ps[s]. + // Before: 3 x npole + 3 x npole + // After : 2 x npole (ps[s]) => 3x speed up of multishift CG. + + if( (!converged[s]) ) { + axpy(psi[ss],-bs[s]*alpha[s],ps[s],psi[ss]); + } + } + + // Convergence checks + int all_converged = 1; + for(int s=0;s + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template class DumbOperator : public LinearOperatorBase { +public: + LatticeComplex scale; + LatticeComplex sqrtscale; + DumbOperator(GridBase *grid) + : scale(grid), + sqrtscale(grid) + { + GridParallelRNG pRNG(grid); + std::vector seeds({5,6,7,8}); + pRNG.SeedFixedIntegers(seeds); + + random(pRNG,sqrtscale); + sqrtscale = sqrtscale * adj(sqrtscale);// force real pos def + scale = sqrtscale * sqrtscale; + } + void Op (const Field &in, Field &out){ + out = scale * in; + } + void AdjOp (const Field &in, Field &out){ + out = scale * in; + } + void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){ + ComplexD dot; + + out = scale * in; + + dot= innerProduct(in,out); + n1=real(dot); + + dot = innerProduct(out,out); + n2=real(dot); + } + void ApplySqrt(const Field &in, Field &out){ + out = sqrtscale * in; + } +}; + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + GridCartesian *grid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplexF::Nsimd()), + GridDefaultMpi()); + + double lo=0.001; + double hi=1.0; + int precision=64; + int degree=10; + AlgRemez remez(lo,hi,precision); + + //////////////////////////////////////// + // sqrt and inverse sqrt + //////////////////////////////////////// + std::cout << "Generating degree "< seeds({1,2,3,4}); + pRNG.SeedFixedIntegers(seeds); + + LatticeFermion src(grid); random(pRNG,src); + LatticeFermion combined(grid); + LatticeFermion reference(grid); + LatticeFermion error(grid); + std::vector result(degree,grid); + LatticeFermion summed(grid); + + ConjugateGradientMultiShift MSCG(10000,Sqrt); + + DumbOperator Diagonal(grid); + + MSCG(Diagonal,src,result); + + + // double a = norm; + // for(int n=0;n poles; - std::vector residues; - double norm; - double lo,hi; - MultiShiftFunction(int n,double _lo,double _hi): poles(n), residues(n), lo(_lo), hi(_hi) {;}; - double approx(double x); - void csv(std::ostream &out); - void gnuplot(std::ostream &out); -}; -double MultiShiftFunction::approx(double x) -{ - double a = norm; - for(int n=0;n latt_size = GridDefaultLatt(); std::vector simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd()); std::vector mpi_layout = GridDefaultMpi();