From 85ed8175cb5defd4b6697f85c33e0d71e1426e3b Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Wed, 6 Jul 2016 15:57:04 -0400 Subject: [PATCH] Implemented mixed precision CG. Fixed filelist to exclude lib/Old directory and include Config.h. --- lib/Algorithms.h | 1 + lib/Make.inc | 4 +- .../iterative/ConjugateGradientMixedPrec.h | 141 ++++++++++++++++++ lib/lattice/Lattice_ET.h | 6 + lib/lattice/Lattice_transfer.h | 90 +++++++++++ lib/tensors/Tensor_extract_merge.h | 28 ++++ lib/tensors/Tensor_traits.h | 30 ++++ scripts/filelist | 3 +- tests/Make.inc | 22 +-- 9 files changed, 311 insertions(+), 14 deletions(-) create mode 100644 lib/algorithms/iterative/ConjugateGradientMixedPrec.h diff --git a/lib/Algorithms.h b/lib/Algorithms.h index 0a3d34ce..a0c37b36 100644 --- a/lib/Algorithms.h +++ b/lib/Algorithms.h @@ -44,6 +44,7 @@ Author: Peter Boyle #include #include +#include // Lanczos support #include diff --git a/lib/Make.inc b/lib/Make.inc index 8763692a..90fe6b7f 100644 --- a/lib/Make.inc +++ b/lib/Make.inc @@ -1,4 +1,4 @@ -HFILES=./Algorithms.h ./AlignedAllocator.h ./Cartesian.h ./Communicator.h ./Cshift.h ./Grid.h ./Init.h ./Lattice.h ./Lexicographic.h ./Log.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./PerfCount.h ./Simd.h ./Stencil.h ./Tensors.h ./Threads.h ./Timer.h ./algorithms/CoarsenedMatrix.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/DenseMatrix.h ./algorithms/iterative/EigenSort.h ./algorithms/iterative/Francis.h ./algorithms/iterative/Householder.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/Matrix.h ./algorithms/iterative/MatrixUtils.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./communicator/Communicator_base.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./lattice/Lattice_ET.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./parallelIO/BinaryIO.h ./parallelIO/NerscIO.h ./pugixml/pugixml.h ./qcd/QCD.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/WilsonTMFermion.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/WilsonKernelsAsmBody.h ./qcd/action/gauge/GaugeImpl.h ./qcd/action/gauge/PlaqPlusRectangleAction.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/HmcRunner.h ./qcd/hmc/NerscCheckpointer.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SUn.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./serialisation/BaseIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/Serialisation.h ./serialisation/TextIO.h ./serialisation/XmlIO.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_imci.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Intel512avx.h ./simd/Intel512wilson.h ./simd/Intel512common.h ./simd/Intel512double.h ./simd/Intel512imci.h ./simd/Intel512single.h ./stencil/Lebesgue.h ./tensors/Tensor_Ta.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h +HFILES=./cshift/Cshift_none.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./Tensors.h ./qcd/utils/SUn.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/CovariantCshift.h ./qcd/utils/WilsonLoops.h ./qcd/utils/LinalgUtils.h ./qcd/QCD.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonTMFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonKernelsAsmBody.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/gauge/GaugeImpl.h ./qcd/action/gauge/PlaqPlusRectangleAction.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/hmc/HmcRunner.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/hmc/HMC.h ./qcd/hmc/NerscCheckpointer.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./simd/Intel512wilson.h ./simd/Intel512common.h ./simd/Grid_sse4.h ./simd/Grid_qpx.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Intel512imci.h ./simd/Intel512avx.h ./simd/Grid_neon.h ./simd/Grid_imci.h ./simd/Intel512single.h ./simd/Grid_empty.h ./simd/Intel512double.h ./simd/Grid_avx512.h ./simd/Grid_avx.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_unary.h ./tensors/Tensor_trace.h ./tensors/Tensor_determinant.h ./tensors/Tensor_reality.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_class.h ./tensors/Tensor_arith.h ./tensors/Tensor_index.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_logical.h ./tensors/Tensor_Ta.h ./tensors/Tensor_exp.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_outer.h ./tensors/Tensor_traits.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_inner.h ./tensors/Tensor_transpose.h ./Log.h ./Communicator.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./cartesian/Cartesian_base.h ./Timer.h ./Init.h ./Algorithms.h ./Lexicographic.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/bigfloat.h ./algorithms/Preconditioner.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/Francis.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/Householder.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/Matrix.h ./algorithms/iterative/EigenSort.h ./algorithms/iterative/ConjugateGradientMixedPrec.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/DenseMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/iterative/MatrixUtils.h ./algorithms/CoarsenedMatrix.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./serialisation/XmlIO.h ./serialisation/TextIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/BaseIO.h ./serialisation/Serialisation.h ./Stencil.h ./lattice/Lattice_base.h ./lattice/Lattice_arith.h ./lattice/Lattice_rng.h ./lattice/Lattice_unary.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_local.h ./lattice/Lattice_reality.h ./lattice/Lattice_comparison.h ./lattice/Lattice_trace.h ./lattice/Lattice_reduction.h ./lattice/Lattice_transpose.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_transfer.h ./lattice/Lattice_where.h ./lattice/Lattice_ET.h ./lattice/Lattice_conformable.h ./lattice/Lattice_overload.h ./Lattice.h ./communicator/Communicator_base.h ./Cshift.h ./AlignedAllocator.h ./PerfCount.h ./Simd.h ./stencil/Lebesgue.h ./Threads.h ./parallelIO/NerscIO.h ./parallelIO/BinaryIO.h ./pugixml/pugixml.h ./Grid.h ./Cartesian.h Config.h -CCFILES=./Init.cc ./Log.cc ./PerfCount.cc ./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./pugixml/pugixml.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsAsm.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/WilsonTMFermion.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./serialisation/BinaryIO.cc ./serialisation/TextIO.cc ./serialisation/XmlIO.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc +CCFILES=./Log.cc ./qcd/utils/SpaceTimeGrid.cc ./qcd/action/fermion/WilsonKernelsAsm.cc ./qcd/action/fermion/WilsonTMFermion.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./algorithms/approx/Zolotarev.cc ./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./serialisation/XmlIO.cc ./serialisation/TextIO.cc ./serialisation/BinaryIO.cc ./Init.cc ./stencil/Stencil_common.cc ./stencil/Lebesgue.cc ./PerfCount.cc ./pugixml/pugixml.cc diff --git a/lib/algorithms/iterative/ConjugateGradientMixedPrec.h b/lib/algorithms/iterative/ConjugateGradientMixedPrec.h new file mode 100644 index 00000000..7931bbed --- /dev/null +++ b/lib/algorithms/iterative/ConjugateGradientMixedPrec.h @@ -0,0 +1,141 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrec.h + + Copyright (C) 2015 + +Author: Christopher Kelly + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H +#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H + +namespace Grid { + + template::value == 2, int>::type = 0,typename std::enable_if< getPrecision::value == 1, int>::type = 0> + class MixedPrecisionConjugateGradient : public LinearFunction { + public: + RealD Tolerance; + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + LinearOperatorBase &Linop_f; + LinearOperatorBase &Linop_d; + + //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess + LinearFunction *guesser; + + MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase &_Linop_f, LinearOperatorBase &_Linop_d) : + Linop_f(_Linop_f), Linop_d(_Linop_d), + Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), + OuterLoopNormMult(100.), guesser(NULL){ }; + + void useGuesser(LinearFunction &g){ + guesser = g; + } + + void operator() (const FieldD &src_d_in, FieldD &sol_d){ + GridStopWatch TotalTimer; + TotalTimer.Start(); + + int cb = src_d_in.checkerboard; + sol_d.checkerboard = cb; + + RealD src_norm = norm2(src_d_in); + RealD stop = src_norm * Tolerance*Tolerance; + + GridBase* DoublePrecGrid = src_d_in._grid; + FieldD tmp_d(DoublePrecGrid); + tmp_d.checkerboard = cb; + + FieldD tmp2_d(DoublePrecGrid); + tmp2_d.checkerboard = cb; + + FieldD src_d(DoublePrecGrid); + src_d = src_d_in; //source for next inner iteration, computed from residual during operation + + RealD inner_tol = Tolerance; + + FieldF src_f(SinglePrecGrid); + src_f.checkerboard = cb; + + FieldF sol_f(SinglePrecGrid); + sol_f.checkerboard = cb; + + ConjugateGradient CG_f(inner_tol, MaxInnerIterations); + CG_f.ErrorOnNoConverge = false; + + GridStopWatch InnerCGtimer; + + GridStopWatch PrecChangeTimer; + + for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ + //Compute double precision rsd and also new RHS vector. + Linop_d.HermOp(sol_d, tmp_d); + RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector + + std::cout< CG_d(Tolerance, MaxInnerIterations); + CG_d(Linop_d, src_d_in, sol_d); + + TotalTimer.Stop(); + std::cout< using is_lattice = std::is_base_of; template using is_lattice_expr = std::is_base_of; +//Specialization of getVectorType for lattices +template +struct getVectorType >{ + typedef typename Lattice::vector_object type; +}; + template inline sobj eval(const unsigned int ss, const sobj &arg) { diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 638563a9..4a6e6112 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -482,6 +482,96 @@ void Replicate(Lattice &coarse,Lattice & fine) } +//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order +template +typename std::enable_if::value && !isSIMDvectorized::value, void>::type unvectorizeToLexOrdArray(std::vector &out, const Lattice &in){ + typedef typename vobj::vector_type vtype; + + GridBase* in_grid = in._grid; + out.resize(in_grid->lSites()); + + int ndim = in_grid->Nd(); + int in_nsimd = vtype::Nsimd(); + std::vector in_icoor[in_nsimd]; + + for(int lane=0; lane < in_nsimd; lane++){ + in_icoor[lane].resize(ndim); + in_grid->iCoorFromIindex(in_icoor[lane], lane); + } + +PARALLEL_FOR_LOOP + for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index + //Assemble vector of pointers to output elements + std::vector out_ptrs(in_nsimd); + + std::vector in_ocoor(ndim); + in_grid->oCoorFromOindex(in_ocoor, in_oidx); + + std::vector lcoor(in_grid->Nd()); + + for(int lane=0; lane < in_nsimd; lane++){ + for(int mu=0;mu_rdimensions[mu]*in_icoor[lane][mu]; + + int lex; + Lexicographic::IndexFromCoor(lcoor, lex, in_grid->_ldimensions); + out_ptrs[lane] = &out[lex]; + } + + //Unpack into those ptrs + const vobj & in_vobj = in._odata[in_oidx]; + extract1(in_vobj, out_ptrs, 0); + } +} + +//Convert a Lattice from one precision to another +template +void precisionChange(Lattice &out, const Lattice &in){ + assert(out._grid->Nd() == in._grid->Nd()); + out.checkerboard = in.checkerboard; + GridBase *in_grid=in._grid; + GridBase *out_grid = out._grid; + + typedef typename VobjOut::scalar_object SobjOut; + typedef typename VobjIn::scalar_object SobjIn; + + int ndim = out._grid->Nd(); + int out_nsimd = out_grid->Nsimd(); + + std::vector out_icoor[out_nsimd]; + + for(int lane=0; lane < out_nsimd; lane++){ + out_icoor[lane].resize(ndim); + out_grid->iCoorFromIindex(out_icoor[lane], lane); + } + + std::vector in_slex_conv(in_grid->lSites()); + unvectorizeToLexOrdArray(in_slex_conv, in); + + PARALLEL_FOR_LOOP + for(int out_oidx=0;out_oidxoSites();out_oidx++){ + std::vector out_ocoor(ndim); + out_grid->oCoorFromOindex(out_ocoor, out_oidx); + + std::vector ptrs(out_nsimd); + + std::vector lcoor(out_grid->Nd()); + + for(int lane=0; lane < out_nsimd; lane++){ + for(int mu=0;mu_rdimensions[mu]*out_icoor[lane][mu]; + + int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions); + ptrs[lane] = &in_slex_conv[llex]; + } + merge(out._odata[out_oidx], ptrs, 0); + } +} + + + + + } #endif diff --git a/lib/tensors/Tensor_extract_merge.h b/lib/tensors/Tensor_extract_merge.h index ad98213d..41a431ad 100644 --- a/lib/tensors/Tensor_extract_merge.h +++ b/lib/tensors/Tensor_extract_merge.h @@ -10,6 +10,7 @@ Author: Azusa Yamaguchi Author: Peter Boyle Author: neo Author: paboyle +Author: Christopher Kelly This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -167,6 +168,33 @@ void extract(const vobj &vec,std::vector &extrac } } +//////////////////////////////////////////////////////////////////////// +// Extract to a bunch of scalar object pointers of different scalar type, with offset. Useful for precision change +//////////////////////////////////////////////////////////////////////// +template inline +void extract1(const vobj &vec,std::vector &extracted, int offset) +{ + typedef typename vobj::scalar_type vobj_scalar_type ; + typedef typename vobj::vector_type vobj_vector_type ; + + typedef typename sobj::scalar_type sobj_scalar_type ; + + static const int words=sizeof(vobj)/sizeof(vobj_vector_type); + static const int Nsimd=vobj_vector_type::Nsimd(); + + int Nextr=extracted.size(); + int s = Nsimd/Nextr; + vobj_scalar_type * vp = (vobj_scalar_type *)&vec; + + for(int w=0;w Author: Peter Boyle +Author: Christopher Kelly This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -230,6 +231,35 @@ namespace Grid { static const bool value = true; }; + //Get the SIMD vector type from a Grid tensor or Lattice + template + struct getVectorType{ + typedef T type; + }; + + //Query if a tensor or Lattice is SIMD vector or scalar + template + class isSIMDvectorized{ + template + static typename std::enable_if< !std::is_same< typename GridTypeMapper::type>::scalar_type, typename GridTypeMapper::type>::vector_type>::value, char>::type test(void *); + + template + static double test(...); + + public: + enum {value = sizeof(test(0)) == sizeof(char) }; + }; + + //Get the precision of a Lattice, tensor or scalar type in units of sizeof(float) + template + class getPrecision{ + typedef typename getVectorType::type vector_obj; //get the vector_obj (i.e. a grid Tensor) if its a Lattice, do nothing otherwise (i.e. if fundamental or grid Tensor) + + typedef typename GridTypeMapper::scalar_type scalar_type; //get the associated scalar type. Works on fundamental and tensor types + typedef typename GridTypeMapper::Realified real_scalar_type; //remove any std::complex wrapper, should get us to the fundamental type + public: + enum { value = sizeof(real_scalar_type)/sizeof(float) }; + }; } #endif diff --git a/scripts/filelist b/scripts/filelist index dcc5bfef..b5843cae 100755 --- a/scripts/filelist +++ b/scripts/filelist @@ -2,7 +2,8 @@ cd lib -HFILES=`find . -type f -name '*.h'` +HFILES=`find . -type f -name '*.h' -not -path '*/Old/*'` +HFILES="$HFILES Config.h" CCFILES=`find . -type f -name '*.cc' -not -name '*ommunicator*.cc'` echo> Make.inc echo HFILES=$HFILES >> Make.inc diff --git a/tests/Make.inc b/tests/Make.inc index f1e2cd9d..fad82247 100644 --- a/tests/Make.inc +++ b/tests/Make.inc @@ -1,13 +1,5 @@ -bin_PROGRAMS += Test_GaugeAction Test_RectPlaq Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_gpforce Test_dwf_hdcr Test_dwf_lanczos Test_dwf_rb5d Test_gamma Test_gp_rect_force Test_gparity Test_gpdwf_force Test_gpwilson_even_odd Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_rect_force Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_synthetic_lanczos Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi Test_wilson_tm_even_odd - - -Test_GaugeAction_SOURCES=Test_GaugeAction.cc -Test_GaugeAction_LDADD=-lGrid - - -Test_RectPlaq_SOURCES=Test_RectPlaq.cc -Test_RectPlaq_LDADD=-lGrid +bin_PROGRAMS += Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_gpforce Test_dwf_hdcr Test_dwf_lanczos Test_dwf_rb5d Test_gamma Test_GaugeAction Test_gparity Test_gpdwf_force Test_gp_rect_force Test_gpwilson_even_odd Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_rect_force Test_RectPlaq Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_synthetic_lanczos Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi Test_wilson_tm_even_odd Test_cayley_cg_SOURCES=Test_cayley_cg.cc @@ -114,8 +106,8 @@ Test_gamma_SOURCES=Test_gamma.cc Test_gamma_LDADD=-lGrid -Test_gp_rect_force_SOURCES=Test_gp_rect_force.cc -Test_gp_rect_force_LDADD=-lGrid +Test_GaugeAction_SOURCES=Test_GaugeAction.cc +Test_GaugeAction_LDADD=-lGrid Test_gparity_SOURCES=Test_gparity.cc @@ -126,6 +118,10 @@ Test_gpdwf_force_SOURCES=Test_gpdwf_force.cc Test_gpdwf_force_LDADD=-lGrid +Test_gp_rect_force_SOURCES=Test_gp_rect_force.cc +Test_gp_rect_force_LDADD=-lGrid + + Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc Test_gpwilson_even_odd_LDADD=-lGrid @@ -202,6 +198,10 @@ Test_rect_force_SOURCES=Test_rect_force.cc Test_rect_force_LDADD=-lGrid +Test_RectPlaq_SOURCES=Test_RectPlaq.cc +Test_RectPlaq_LDADD=-lGrid + + Test_remez_SOURCES=Test_remez.cc Test_remez_LDADD=-lGrid