From 15f22425c8dbd3cb54efb46f27b31b199892dc88 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 6 Jul 2016 14:50:01 -0400 Subject: [PATCH 1/7] Added option to prevent CG from exiting when it fails to converge --- lib/algorithms/iterative/ConjugateGradient.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index e0431a53..f5102019 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -40,9 +40,10 @@ namespace Grid { template class ConjugateGradient : public OperatorFunction { public: + bool ErrorOnNoConverge; //throw an assert when the CG fails to converge. Defaults true. RealD Tolerance; Integer MaxIterations; - ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) { + ConjugateGradient(RealD tol,Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){ }; @@ -137,13 +138,15 @@ public: std::cout< Date: Wed, 6 Jul 2016 15:57:04 -0400 Subject: [PATCH 2/7] Implemented mixed precision CG. Fixed filelist to exclude lib/Old directory and include Config.h. --- lib/Algorithms.h | 1 + lib/Make.inc | 4 +- .../iterative/ConjugateGradientMixedPrec.h | 141 ++++++++++++++++++ lib/lattice/Lattice_ET.h | 6 + lib/lattice/Lattice_transfer.h | 90 +++++++++++ lib/tensors/Tensor_extract_merge.h | 28 ++++ lib/tensors/Tensor_traits.h | 30 ++++ scripts/filelist | 3 +- tests/Make.inc | 22 +-- 9 files changed, 311 insertions(+), 14 deletions(-) create mode 100644 lib/algorithms/iterative/ConjugateGradientMixedPrec.h diff --git a/lib/Algorithms.h b/lib/Algorithms.h index 0a3d34ce..a0c37b36 100644 --- a/lib/Algorithms.h +++ b/lib/Algorithms.h @@ -44,6 +44,7 @@ Author: Peter Boyle #include #include +#include // Lanczos support #include diff --git a/lib/Make.inc b/lib/Make.inc index 8763692a..90fe6b7f 100644 --- a/lib/Make.inc +++ b/lib/Make.inc @@ -1,4 +1,4 @@ -HFILES=./Algorithms.h ./AlignedAllocator.h ./Cartesian.h ./Communicator.h ./Cshift.h ./Grid.h ./Init.h ./Lattice.h ./Lexicographic.h ./Log.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./PerfCount.h ./Simd.h ./Stencil.h ./Tensors.h ./Threads.h ./Timer.h ./algorithms/CoarsenedMatrix.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/DenseMatrix.h ./algorithms/iterative/EigenSort.h ./algorithms/iterative/Francis.h ./algorithms/iterative/Householder.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/Matrix.h ./algorithms/iterative/MatrixUtils.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./communicator/Communicator_base.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./lattice/Lattice_ET.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./parallelIO/BinaryIO.h ./parallelIO/NerscIO.h ./pugixml/pugixml.h ./qcd/QCD.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/WilsonTMFermion.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/WilsonKernelsAsmBody.h ./qcd/action/gauge/GaugeImpl.h ./qcd/action/gauge/PlaqPlusRectangleAction.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/HmcRunner.h ./qcd/hmc/NerscCheckpointer.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SUn.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./serialisation/BaseIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/Serialisation.h ./serialisation/TextIO.h ./serialisation/XmlIO.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_imci.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Intel512avx.h ./simd/Intel512wilson.h ./simd/Intel512common.h ./simd/Intel512double.h ./simd/Intel512imci.h ./simd/Intel512single.h ./stencil/Lebesgue.h ./tensors/Tensor_Ta.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h +HFILES=./cshift/Cshift_none.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./Tensors.h ./qcd/utils/SUn.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/CovariantCshift.h ./qcd/utils/WilsonLoops.h ./qcd/utils/LinalgUtils.h ./qcd/QCD.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonTMFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonKernelsAsmBody.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/gauge/GaugeImpl.h ./qcd/action/gauge/PlaqPlusRectangleAction.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/hmc/HmcRunner.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/hmc/HMC.h ./qcd/hmc/NerscCheckpointer.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./simd/Intel512wilson.h ./simd/Intel512common.h ./simd/Grid_sse4.h ./simd/Grid_qpx.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Intel512imci.h ./simd/Intel512avx.h ./simd/Grid_neon.h ./simd/Grid_imci.h ./simd/Intel512single.h ./simd/Grid_empty.h ./simd/Intel512double.h ./simd/Grid_avx512.h ./simd/Grid_avx.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_unary.h ./tensors/Tensor_trace.h ./tensors/Tensor_determinant.h ./tensors/Tensor_reality.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_class.h ./tensors/Tensor_arith.h ./tensors/Tensor_index.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_logical.h ./tensors/Tensor_Ta.h ./tensors/Tensor_exp.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_outer.h ./tensors/Tensor_traits.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_inner.h ./tensors/Tensor_transpose.h ./Log.h ./Communicator.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./cartesian/Cartesian_base.h ./Timer.h ./Init.h ./Algorithms.h ./Lexicographic.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/bigfloat.h ./algorithms/Preconditioner.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/Francis.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/Householder.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/Matrix.h ./algorithms/iterative/EigenSort.h ./algorithms/iterative/ConjugateGradientMixedPrec.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/DenseMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/iterative/MatrixUtils.h ./algorithms/CoarsenedMatrix.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./serialisation/XmlIO.h ./serialisation/TextIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/BaseIO.h ./serialisation/Serialisation.h ./Stencil.h ./lattice/Lattice_base.h ./lattice/Lattice_arith.h ./lattice/Lattice_rng.h ./lattice/Lattice_unary.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_local.h ./lattice/Lattice_reality.h ./lattice/Lattice_comparison.h ./lattice/Lattice_trace.h ./lattice/Lattice_reduction.h ./lattice/Lattice_transpose.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_transfer.h ./lattice/Lattice_where.h ./lattice/Lattice_ET.h ./lattice/Lattice_conformable.h ./lattice/Lattice_overload.h ./Lattice.h ./communicator/Communicator_base.h ./Cshift.h ./AlignedAllocator.h ./PerfCount.h ./Simd.h ./stencil/Lebesgue.h ./Threads.h ./parallelIO/NerscIO.h ./parallelIO/BinaryIO.h ./pugixml/pugixml.h ./Grid.h ./Cartesian.h Config.h -CCFILES=./Init.cc ./Log.cc ./PerfCount.cc ./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./pugixml/pugixml.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsAsm.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/WilsonTMFermion.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./serialisation/BinaryIO.cc ./serialisation/TextIO.cc ./serialisation/XmlIO.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc +CCFILES=./Log.cc ./qcd/utils/SpaceTimeGrid.cc ./qcd/action/fermion/WilsonKernelsAsm.cc ./qcd/action/fermion/WilsonTMFermion.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./algorithms/approx/Zolotarev.cc ./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./serialisation/XmlIO.cc ./serialisation/TextIO.cc ./serialisation/BinaryIO.cc ./Init.cc ./stencil/Stencil_common.cc ./stencil/Lebesgue.cc ./PerfCount.cc ./pugixml/pugixml.cc diff --git a/lib/algorithms/iterative/ConjugateGradientMixedPrec.h b/lib/algorithms/iterative/ConjugateGradientMixedPrec.h new file mode 100644 index 00000000..7931bbed --- /dev/null +++ b/lib/algorithms/iterative/ConjugateGradientMixedPrec.h @@ -0,0 +1,141 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrec.h + + Copyright (C) 2015 + +Author: Christopher Kelly + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H +#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H + +namespace Grid { + + template::value == 2, int>::type = 0,typename std::enable_if< getPrecision::value == 1, int>::type = 0> + class MixedPrecisionConjugateGradient : public LinearFunction { + public: + RealD Tolerance; + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + LinearOperatorBase &Linop_f; + LinearOperatorBase &Linop_d; + + //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess + LinearFunction *guesser; + + MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase &_Linop_f, LinearOperatorBase &_Linop_d) : + Linop_f(_Linop_f), Linop_d(_Linop_d), + Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), + OuterLoopNormMult(100.), guesser(NULL){ }; + + void useGuesser(LinearFunction &g){ + guesser = g; + } + + void operator() (const FieldD &src_d_in, FieldD &sol_d){ + GridStopWatch TotalTimer; + TotalTimer.Start(); + + int cb = src_d_in.checkerboard; + sol_d.checkerboard = cb; + + RealD src_norm = norm2(src_d_in); + RealD stop = src_norm * Tolerance*Tolerance; + + GridBase* DoublePrecGrid = src_d_in._grid; + FieldD tmp_d(DoublePrecGrid); + tmp_d.checkerboard = cb; + + FieldD tmp2_d(DoublePrecGrid); + tmp2_d.checkerboard = cb; + + FieldD src_d(DoublePrecGrid); + src_d = src_d_in; //source for next inner iteration, computed from residual during operation + + RealD inner_tol = Tolerance; + + FieldF src_f(SinglePrecGrid); + src_f.checkerboard = cb; + + FieldF sol_f(SinglePrecGrid); + sol_f.checkerboard = cb; + + ConjugateGradient CG_f(inner_tol, MaxInnerIterations); + CG_f.ErrorOnNoConverge = false; + + GridStopWatch InnerCGtimer; + + GridStopWatch PrecChangeTimer; + + for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ + //Compute double precision rsd and also new RHS vector. + Linop_d.HermOp(sol_d, tmp_d); + RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector + + std::cout< CG_d(Tolerance, MaxInnerIterations); + CG_d(Linop_d, src_d_in, sol_d); + + TotalTimer.Stop(); + std::cout< using is_lattice = std::is_base_of; template using is_lattice_expr = std::is_base_of; +//Specialization of getVectorType for lattices +template +struct getVectorType >{ + typedef typename Lattice::vector_object type; +}; + template inline sobj eval(const unsigned int ss, const sobj &arg) { diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 638563a9..4a6e6112 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -482,6 +482,96 @@ void Replicate(Lattice &coarse,Lattice & fine) } +//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order +template +typename std::enable_if::value && !isSIMDvectorized::value, void>::type unvectorizeToLexOrdArray(std::vector &out, const Lattice &in){ + typedef typename vobj::vector_type vtype; + + GridBase* in_grid = in._grid; + out.resize(in_grid->lSites()); + + int ndim = in_grid->Nd(); + int in_nsimd = vtype::Nsimd(); + std::vector in_icoor[in_nsimd]; + + for(int lane=0; lane < in_nsimd; lane++){ + in_icoor[lane].resize(ndim); + in_grid->iCoorFromIindex(in_icoor[lane], lane); + } + +PARALLEL_FOR_LOOP + for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index + //Assemble vector of pointers to output elements + std::vector out_ptrs(in_nsimd); + + std::vector in_ocoor(ndim); + in_grid->oCoorFromOindex(in_ocoor, in_oidx); + + std::vector lcoor(in_grid->Nd()); + + for(int lane=0; lane < in_nsimd; lane++){ + for(int mu=0;mu_rdimensions[mu]*in_icoor[lane][mu]; + + int lex; + Lexicographic::IndexFromCoor(lcoor, lex, in_grid->_ldimensions); + out_ptrs[lane] = &out[lex]; + } + + //Unpack into those ptrs + const vobj & in_vobj = in._odata[in_oidx]; + extract1(in_vobj, out_ptrs, 0); + } +} + +//Convert a Lattice from one precision to another +template +void precisionChange(Lattice &out, const Lattice &in){ + assert(out._grid->Nd() == in._grid->Nd()); + out.checkerboard = in.checkerboard; + GridBase *in_grid=in._grid; + GridBase *out_grid = out._grid; + + typedef typename VobjOut::scalar_object SobjOut; + typedef typename VobjIn::scalar_object SobjIn; + + int ndim = out._grid->Nd(); + int out_nsimd = out_grid->Nsimd(); + + std::vector out_icoor[out_nsimd]; + + for(int lane=0; lane < out_nsimd; lane++){ + out_icoor[lane].resize(ndim); + out_grid->iCoorFromIindex(out_icoor[lane], lane); + } + + std::vector in_slex_conv(in_grid->lSites()); + unvectorizeToLexOrdArray(in_slex_conv, in); + + PARALLEL_FOR_LOOP + for(int out_oidx=0;out_oidxoSites();out_oidx++){ + std::vector out_ocoor(ndim); + out_grid->oCoorFromOindex(out_ocoor, out_oidx); + + std::vector ptrs(out_nsimd); + + std::vector lcoor(out_grid->Nd()); + + for(int lane=0; lane < out_nsimd; lane++){ + for(int mu=0;mu_rdimensions[mu]*out_icoor[lane][mu]; + + int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions); + ptrs[lane] = &in_slex_conv[llex]; + } + merge(out._odata[out_oidx], ptrs, 0); + } +} + + + + + } #endif diff --git a/lib/tensors/Tensor_extract_merge.h b/lib/tensors/Tensor_extract_merge.h index ad98213d..41a431ad 100644 --- a/lib/tensors/Tensor_extract_merge.h +++ b/lib/tensors/Tensor_extract_merge.h @@ -10,6 +10,7 @@ Author: Azusa Yamaguchi Author: Peter Boyle Author: neo Author: paboyle +Author: Christopher Kelly This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -167,6 +168,33 @@ void extract(const vobj &vec,std::vector &extrac } } +//////////////////////////////////////////////////////////////////////// +// Extract to a bunch of scalar object pointers of different scalar type, with offset. Useful for precision change +//////////////////////////////////////////////////////////////////////// +template inline +void extract1(const vobj &vec,std::vector &extracted, int offset) +{ + typedef typename vobj::scalar_type vobj_scalar_type ; + typedef typename vobj::vector_type vobj_vector_type ; + + typedef typename sobj::scalar_type sobj_scalar_type ; + + static const int words=sizeof(vobj)/sizeof(vobj_vector_type); + static const int Nsimd=vobj_vector_type::Nsimd(); + + int Nextr=extracted.size(); + int s = Nsimd/Nextr; + vobj_scalar_type * vp = (vobj_scalar_type *)&vec; + + for(int w=0;w Author: Peter Boyle +Author: Christopher Kelly This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -230,6 +231,35 @@ namespace Grid { static const bool value = true; }; + //Get the SIMD vector type from a Grid tensor or Lattice + template + struct getVectorType{ + typedef T type; + }; + + //Query if a tensor or Lattice is SIMD vector or scalar + template + class isSIMDvectorized{ + template + static typename std::enable_if< !std::is_same< typename GridTypeMapper::type>::scalar_type, typename GridTypeMapper::type>::vector_type>::value, char>::type test(void *); + + template + static double test(...); + + public: + enum {value = sizeof(test(0)) == sizeof(char) }; + }; + + //Get the precision of a Lattice, tensor or scalar type in units of sizeof(float) + template + class getPrecision{ + typedef typename getVectorType::type vector_obj; //get the vector_obj (i.e. a grid Tensor) if its a Lattice, do nothing otherwise (i.e. if fundamental or grid Tensor) + + typedef typename GridTypeMapper::scalar_type scalar_type; //get the associated scalar type. Works on fundamental and tensor types + typedef typename GridTypeMapper::Realified real_scalar_type; //remove any std::complex wrapper, should get us to the fundamental type + public: + enum { value = sizeof(real_scalar_type)/sizeof(float) }; + }; } #endif diff --git a/scripts/filelist b/scripts/filelist index dcc5bfef..b5843cae 100755 --- a/scripts/filelist +++ b/scripts/filelist @@ -2,7 +2,8 @@ cd lib -HFILES=`find . -type f -name '*.h'` +HFILES=`find . -type f -name '*.h' -not -path '*/Old/*'` +HFILES="$HFILES Config.h" CCFILES=`find . -type f -name '*.cc' -not -name '*ommunicator*.cc'` echo> Make.inc echo HFILES=$HFILES >> Make.inc diff --git a/tests/Make.inc b/tests/Make.inc index f1e2cd9d..fad82247 100644 --- a/tests/Make.inc +++ b/tests/Make.inc @@ -1,13 +1,5 @@ -bin_PROGRAMS += Test_GaugeAction Test_RectPlaq Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_gpforce Test_dwf_hdcr Test_dwf_lanczos Test_dwf_rb5d Test_gamma Test_gp_rect_force Test_gparity Test_gpdwf_force Test_gpwilson_even_odd Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_rect_force Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_synthetic_lanczos Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi Test_wilson_tm_even_odd - - -Test_GaugeAction_SOURCES=Test_GaugeAction.cc -Test_GaugeAction_LDADD=-lGrid - - -Test_RectPlaq_SOURCES=Test_RectPlaq.cc -Test_RectPlaq_LDADD=-lGrid +bin_PROGRAMS += Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_gpforce Test_dwf_hdcr Test_dwf_lanczos Test_dwf_rb5d Test_gamma Test_GaugeAction Test_gparity Test_gpdwf_force Test_gp_rect_force Test_gpwilson_even_odd Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_rect_force Test_RectPlaq Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_synthetic_lanczos Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi Test_wilson_tm_even_odd Test_cayley_cg_SOURCES=Test_cayley_cg.cc @@ -114,8 +106,8 @@ Test_gamma_SOURCES=Test_gamma.cc Test_gamma_LDADD=-lGrid -Test_gp_rect_force_SOURCES=Test_gp_rect_force.cc -Test_gp_rect_force_LDADD=-lGrid +Test_GaugeAction_SOURCES=Test_GaugeAction.cc +Test_GaugeAction_LDADD=-lGrid Test_gparity_SOURCES=Test_gparity.cc @@ -126,6 +118,10 @@ Test_gpdwf_force_SOURCES=Test_gpdwf_force.cc Test_gpdwf_force_LDADD=-lGrid +Test_gp_rect_force_SOURCES=Test_gp_rect_force.cc +Test_gp_rect_force_LDADD=-lGrid + + Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc Test_gpwilson_even_odd_LDADD=-lGrid @@ -202,6 +198,10 @@ Test_rect_force_SOURCES=Test_rect_force.cc Test_rect_force_LDADD=-lGrid +Test_RectPlaq_SOURCES=Test_RectPlaq.cc +Test_RectPlaq_LDADD=-lGrid + + Test_remez_SOURCES=Test_remez.cc Test_remez_LDADD=-lGrid From 713520d3d23cd8be716c624953ce5b3d8f81d6fa Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 6 Jul 2016 16:18:19 -0400 Subject: [PATCH 3/7] Added tester for mixed CG --- tests/Make.inc | 6 +- tests/Test_dwf_mixedcg_prec.cc | 108 +++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 tests/Test_dwf_mixedcg_prec.cc diff --git a/tests/Make.inc b/tests/Make.inc index fad82247..73a176e9 100644 --- a/tests/Make.inc +++ b/tests/Make.inc @@ -1,5 +1,5 @@ -bin_PROGRAMS += Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_gpforce Test_dwf_hdcr Test_dwf_lanczos Test_dwf_rb5d Test_gamma Test_GaugeAction Test_gparity Test_gpdwf_force Test_gp_rect_force Test_gpwilson_even_odd Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_rect_force Test_RectPlaq Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_synthetic_lanczos Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi Test_wilson_tm_even_odd +bin_PROGRAMS += Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_gpforce Test_dwf_hdcr Test_dwf_lanczos Test_dwf_mixedcg_prec Test_dwf_rb5d Test_gamma Test_GaugeAction Test_gparity Test_gpdwf_force Test_gp_rect_force Test_gpwilson_even_odd Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_rect_force Test_RectPlaq Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_synthetic_lanczos Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi Test_wilson_tm_even_odd Test_cayley_cg_SOURCES=Test_cayley_cg.cc @@ -98,6 +98,10 @@ Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc Test_dwf_lanczos_LDADD=-lGrid +Test_dwf_mixedcg_prec_SOURCES=Test_dwf_mixedcg_prec.cc +Test_dwf_mixedcg_prec_LDADD=-lGrid + + Test_dwf_rb5d_SOURCES=Test_dwf_rb5d.cc Test_dwf_rb5d_LDADD=-lGrid diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc new file mode 100644 index 00000000..637b8141 --- /dev/null +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -0,0 +1,108 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_cg_prec.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=8; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + GridCartesian * UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f); + GridCartesian * FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f); + GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermionD src(FGrid); random(RNG5,src); + LatticeFermionD result(FGrid); result=zero; + LatticeGaugeFieldD Umu(UGrid); + LatticeGaugeFieldF Umu_f(UGrid_f); + + SU3::HotConfiguration(RNG4,Umu); + + precisionChange(Umu_f,Umu); + + RealD mass=0.1; + RealD M5=1.8; + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionF Ddwf_f(Umu_f,*FGrid_f,*FrbGrid_f,*UGrid_f,*UrbGrid_f,mass,M5); + + LatticeFermionD src_o(FrbGrid); + LatticeFermionD result_o(FrbGrid); + LatticeFermionD result_o_2(FrbGrid); + pickCheckerboard(Odd,src_o,src); + result_o.checkerboard = Odd; + result_o = zero; + result_o_2.checkerboard = Odd; + result_o_2 = zero; + + SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO_f(Ddwf_f); + + std::cout << "Starting mixed CG" << std::endl; + MixedPrecisionConjugateGradient mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); + mCG(src_o,result_o); + + std::cout << "Starting regular CG" << std::endl; + ConjugateGradient CG(1.0e-8,10000); + CG(HermOpEO,src_o,result_o_2); + + LatticeFermionD diff_o(FrbGrid); + RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); + + std::cout << "Diff between mixed and regular CG: " << diff << std::endl; + + + Grid_finalize(); +} From 25fafa9a8927507eec0fc2b8160c98c29c28dbce Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 6 Jul 2016 16:19:41 -0400 Subject: [PATCH 4/7] Comment --- lib/algorithms/iterative/ConjugateGradientMixedPrec.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/algorithms/iterative/ConjugateGradientMixedPrec.h b/lib/algorithms/iterative/ConjugateGradientMixedPrec.h index 7931bbed..bac2d18a 100644 --- a/lib/algorithms/iterative/ConjugateGradientMixedPrec.h +++ b/lib/algorithms/iterative/ConjugateGradientMixedPrec.h @@ -30,6 +30,7 @@ Author: Christopher Kelly namespace Grid { + //Mixed precision restarted defect correction CG template::value == 2, int>::type = 0,typename std::enable_if< getPrecision::value == 1, int>::type = 0> class MixedPrecisionConjugateGradient : public LinearFunction { public: From 4774a3bcd27e2a8eb0ede7fd6274fec06ace34cc Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 6 Jul 2016 18:01:08 -0400 Subject: [PATCH 5/7] Generalized HotConfiguration and functions it calls to accept gauge fields with precision other than the default. --- lib/qcd/utils/SUn.h | 47 ++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/lib/qcd/utils/SUn.h b/lib/qcd/utils/SUn.h index 124e5f41..e9403836 100644 --- a/lib/qcd/utils/SUn.h +++ b/lib/qcd/utils/SUn.h @@ -43,7 +43,7 @@ public: template using iSUnMatrix = iScalar > > ; template using iSU2Matrix = iScalar > > ; - + ////////////////////////////////////////////////////////////////////////////////////////////////// // Types can be accessed as SU<2>::Matrix , SU<2>::vSUnMatrix, SU<2>::LatticeMatrix etc... ////////////////////////////////////////////////////////////////////////////////////////////////// @@ -552,15 +552,24 @@ Note that in step D setting B ~ X - A and using B in place of A in step E will g } // reunitarise?? - static void LieRandomize(GridParallelRNG &pRNG,LatticeMatrix &out,double scale=1.0){ + template + static void LieRandomize(GridParallelRNG &pRNG,LatticeMatrixType &out,double scale=1.0){ GridBase *grid = out._grid; - - LatticeComplex ca (grid); - LatticeMatrix lie(grid); - LatticeMatrix la (grid); - Complex ci(0.0,scale); - Complex cone(1.0,0.0); - Matrix ta; + + typedef typename LatticeMatrixType::vector_type vector_type; + typedef typename LatticeMatrixType::scalar_type scalar_type; + + typedef iSinglet vTComplexType; + + typedef Lattice LatticeComplexType; + typedef typename GridTypeMapper::scalar_object MatrixType; + + LatticeComplexType ca (grid); + LatticeMatrixType lie(grid); + LatticeMatrixType la (grid); + ComplexD ci(0.0,scale); + ComplexD cone(1.0,0.0); + MatrixType ta; lie=zero; for(int a=0;a + static void HotConfiguration(GridParallelRNG &pRNG,GaugeField &out){ + typedef typename GaugeField::vector_type vector_type; + typedef iSUnMatrix vMatrixType; + typedef Lattice LatticeMatrixType; + + LatticeMatrixType Umu(out._grid); for(int mu=0;mu(out,Umu,mu); @@ -622,13 +635,15 @@ Note that in step D setting B ~ X - A and using B in place of A in step E will g static void taProj( const LatticeMatrix &in, LatticeMatrix &out){ out = Ta(in); } - static void taExp( const LatticeMatrix &x, LatticeMatrix &ex){ - - LatticeMatrix xn(x._grid); + template + static void taExp( const LatticeMatrixType &x, LatticeMatrixType &ex){ + typedef typename LatticeMatrixType::scalar_type ComplexType; + + LatticeMatrixType xn(x._grid); RealD nfac = 1.0; xn = x; - ex =xn+Complex(1.0); // 1+x + ex =xn+ComplexType(1.0); // 1+x // Do a 12th order exponentiation for(int i=2; i <= 12; ++i) From 184642adb0d8daa2dcf4115be16a75152b7076f2 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 6 Jul 2016 18:15:15 -0400 Subject: [PATCH 6/7] Fix for pedantic compilers --- lib/lattice/Lattice_transfer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 4a6e6112..4c9bbc31 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -493,7 +493,7 @@ typename std::enable_if::value && !isSIMDvectorized int ndim = in_grid->Nd(); int in_nsimd = vtype::Nsimd(); - std::vector in_icoor[in_nsimd]; + std::vector > in_icoor(in_nsimd); for(int lane=0; lane < in_nsimd; lane++){ in_icoor[lane].resize(ndim); From dd8cfff1118862de70b5eeccca5fd6c62bc15295 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 6 Jul 2016 18:22:15 -0400 Subject: [PATCH 7/7] Another fix for pedantic compilers --- lib/lattice/Lattice_transfer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 4c9bbc31..bd53cde9 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -539,7 +539,7 @@ void precisionChange(Lattice &out, const Lattice &in){ int ndim = out._grid->Nd(); int out_nsimd = out_grid->Nsimd(); - std::vector out_icoor[out_nsimd]; + std::vector > out_icoor(out_nsimd); for(int lane=0; lane < out_nsimd; lane++){ out_icoor[lane].resize(ndim);