From a263e78f8d5eb655f266df99da5c7c073f37dabe Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 5 Jun 2015 18:16:25 +0100 Subject: [PATCH 01/24] Conjugate residual added --- lib/Algorithms.h | 1 + lib/Make.inc | 4 +- lib/algorithms/LinearOperator.h | 18 ++++- lib/algorithms/SparseMatrix.h | 1 + lib/algorithms/iterative/ConjugateResidual.h | 85 ++++++++++++++++++++ lib/qcd/QCD.h | 8 +- lib/qcd/action/Actions.h | 35 +------- lib/qcd/action/fermion/g5HermitianLinop.h | 38 +++++++++ lib/qcd/{ => spin}/Dirac.cc | 0 lib/qcd/{ => spin}/Dirac.h | 0 lib/qcd/{ => spin}/TwoSpinor.h | 0 lib/qcd/{ => utils}/LinalgUtils.h | 19 +++++ lib/qcd/{ => utils}/SpaceTimeGrid.cc | 0 lib/qcd/{ => utils}/SpaceTimeGrid.h | 0 tests/Make.inc | 12 +++ 15 files changed, 181 insertions(+), 40 deletions(-) create mode 100644 lib/algorithms/iterative/ConjugateResidual.h create mode 100644 lib/qcd/action/fermion/g5HermitianLinop.h rename lib/qcd/{ => spin}/Dirac.cc (100%) rename lib/qcd/{ => spin}/Dirac.h (100%) rename lib/qcd/{ => spin}/TwoSpinor.h (100%) rename lib/qcd/{ => utils}/LinalgUtils.h (88%) rename lib/qcd/{ => utils}/SpaceTimeGrid.cc (100%) rename lib/qcd/{ => utils}/SpaceTimeGrid.h (100%) diff --git a/lib/Algorithms.h b/lib/Algorithms.h index b5a2814a..c2491cf4 100644 --- a/lib/Algorithms.h +++ b/lib/Algorithms.h @@ -5,6 +5,7 @@ #include #include +#include #include #include diff --git a/lib/Make.inc b/lib/Make.inc index 94a566fb..ba35e25f 100644 --- a/lib/Make.inc +++ b/lib/Make.inc @@ -1,4 +1,4 @@ -HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/Dirac.h ./qcd/LinalgUtils.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h +HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h -CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/Dirac.cc ./qcd/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc +CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index 710fcde7..492b34db 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -18,7 +18,8 @@ namespace Grid { public: virtual void Op (const Field &in, Field &out) = 0; // Abstract base virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base - virtual void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2)=0; + virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0; + virtual void HermOp(const Field &in, Field &out)=0; }; @@ -48,9 +49,13 @@ namespace Grid { void AdjOp (const Field &in, Field &out){ _Mat.Mdag(in,out); } - void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){ + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ _Mat.MdagM(in,out,n1,n2); } + void HermOp(const Field &in, Field &out){ + RealD n1,n2; + HermOpAndNorm(in,out,n1,n2); + } }; //////////////////////////////////////////////////////////////////// @@ -67,7 +72,7 @@ namespace Grid { void AdjOp (const Field &in, Field &out){ _Mat.M(in,out); } - void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){ + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ ComplexD dot; _Mat.M(in,out); @@ -78,6 +83,9 @@ namespace Grid { dot = innerProduct(out,out); n2=real(dot); } + void HermOp(const Field &in, Field &out){ + _Mat.M(in,out); + } }; ////////////////////////////////////////////////////////// @@ -98,6 +106,10 @@ namespace Grid { void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ MpcDagMpc(in,out,n1,n2); } + void HermOp(const Field &in, Field &out){ + RealD n1,n2; + HermOpAndNorm(in,out,n1,n2); + } void Op (const Field &in, Field &out){ Mpc(in,out); } diff --git a/lib/algorithms/SparseMatrix.h b/lib/algorithms/SparseMatrix.h index 9146648f..a51657dc 100644 --- a/lib/algorithms/SparseMatrix.h +++ b/lib/algorithms/SparseMatrix.h @@ -18,6 +18,7 @@ namespace Grid { Field tmp (in._grid); ni=M(in,tmp); no=Mdag(tmp,out); + std::cout << "MdagM "<< ni<<" "< + class ConjugateResidual : public OperatorFunction { + public: + RealD Tolerance; + Integer MaxIterations; + int verbose; + + ConjugateResidual(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) { + verbose=1; + }; + + void operator() (LinearOperatorBase &Linop,const Field &src, Field &psi){ + + RealD a, b, c, d; + RealD cp, ssq,rsq; + + RealD rAr, rAAr, rArp; + RealD pAp, pAAp; + + GridBase *grid = src._grid; + psi=zero; + Field r(grid), p(grid), Ap(grid), Ar(grid); + + r=src; + p=src; + + Linop.HermOpAndNorm(p,Ap,pAp,pAAp); + Linop.HermOpAndNorm(r,Ar,rAr,rAAr); + + std::cout << "pAp, pAAp"<< pAp<<" "< -#include -#include -#include +#include +#include +#include +#include #include #include #include diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index edef1b53..56b453c6 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -65,36 +65,9 @@ #include #include - - // Chroma interface defining FermionAction - /* - template class FermAct4D : public FermionAction - virtual LinearOperator* linOp(Handle< FermState > state) const = 0; - virtual LinearOperator* lMdagM(Handle< FermState > state) const = 0; - virtual LinOpSystemSolver* invLinOp(Handle< FermState > state, - virtual MdagMSystemSolver* invMdagM(Handle< FermState > state, - virtual LinOpMultiSystemSolver* mInvLinOp(Handle< FermState > state, - virtual MdagMMultiSystemSolver* mInvMdagM(Handle< FermState > state, - virtual MdagMMultiSystemSolverAccumulate* mInvMdagMAcc(Handle< FermState > state, - virtual SystemSolver* qprop(Handle< FermState > state, - class DiffFermAct4D : public FermAct4D - virtual DiffLinearOperator* linOp(Handle< FermState > state) const = 0; - virtual DiffLinearOperator* lMdagM(Handle< FermState > state) const = 0; - */ - - - // Chroma interface defining GaugeAction - /* - template class GaugeAction - virtual const CreateGaugeState& getCreateState() const = 0; - virtual GaugeState* createState(const Q& q) const - virtual const GaugeBC& getGaugeBC() const - virtual const Set& getSet(void) const = 0; - virtual void deriv(P& result, const Handle< GaugeState >& state) const - virtual Double S(const Handle< GaugeState >& state) const = 0; - - class LinearGaugeAction : public GaugeAction< multi1d, multi1d > - typedef multi1d P; - */ +/////////////////////////////////////////////////////////////////////////////// +// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code +/////////////////////////////////////////////////////////////////////////////// +#include #endif diff --git a/lib/qcd/action/fermion/g5HermitianLinop.h b/lib/qcd/action/fermion/g5HermitianLinop.h new file mode 100644 index 00000000..9a8cc2eb --- /dev/null +++ b/lib/qcd/action/fermion/g5HermitianLinop.h @@ -0,0 +1,38 @@ +#ifndef G5_HERMITIAN_LINOP +#define G5_HERMITIAN_LINOP +namespace Grid { + namespace QCD { +//////////////////////////////////////////////////////////////////// +// Wrap an already herm matrix +//////////////////////////////////////////////////////////////////// +template +class Gamma5HermitianLinearOperator : public LinearOperatorBase { + Matrix &_Mat; +public: + Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; + void Op (const Field &in, Field &out){ + _Mat.M(in,out); + } + void AdjOp (const Field &in, Field &out){ + _Mat.M(in,out); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + + HermOp(in,out); + + ComplexD dot; + dot= innerProduct(in,out); + n1=real(dot); + + dot = innerProduct(out,out); + n2=real(dot); + } + void HermOp(const Field &in, Field &out){ + Field tmp(in._grid); + _Mat.M(in,tmp); + G5R5(out,tmp); + } +}; + +}} +#endif diff --git a/lib/qcd/Dirac.cc b/lib/qcd/spin/Dirac.cc similarity index 100% rename from lib/qcd/Dirac.cc rename to lib/qcd/spin/Dirac.cc diff --git a/lib/qcd/Dirac.h b/lib/qcd/spin/Dirac.h similarity index 100% rename from lib/qcd/Dirac.h rename to lib/qcd/spin/Dirac.h diff --git a/lib/qcd/TwoSpinor.h b/lib/qcd/spin/TwoSpinor.h similarity index 100% rename from lib/qcd/TwoSpinor.h rename to lib/qcd/spin/TwoSpinor.h diff --git a/lib/qcd/LinalgUtils.h b/lib/qcd/utils/LinalgUtils.h similarity index 88% rename from lib/qcd/LinalgUtils.h rename to lib/qcd/utils/LinalgUtils.h index 2b83d115..ef85e728 100644 --- a/lib/qcd/LinalgUtils.h +++ b/lib/qcd/utils/LinalgUtils.h @@ -109,5 +109,24 @@ PARALLEL_FOR_LOOP vstream(z._odata[ss+s],tmp); } } + +template +void G5R5(Lattice &z,const Lattice &x) +{ + GridBase *grid=x._grid; + z.checkerboard = x.checkerboard; + conformable(x,z); + int Ls = grid->_rdimensions[0]; +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + vobj tmp; + for(int s=0;s Date: Sun, 7 Jun 2015 17:06:25 +0100 Subject: [PATCH 02/24] Removed std::string calls from NerscIO map indexing --- lib/parallelIO/NerscIO.h | 44 ++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/lib/parallelIO/NerscIO.h b/lib/parallelIO/NerscIO.h index 8a1775f1..4db182e0 100644 --- a/lib/parallelIO/NerscIO.h +++ b/lib/parallelIO/NerscIO.h @@ -164,37 +164,37 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field) // chomp the values ////////////////////////////////////////////////// - field.hdr_version = header[std::string("HDR_VERSION")]; - field.data_type = header[std::string("DATATYPE")]; - field.storage_format = header[std::string("STORAGE_FORMAT")]; + field.hdr_version = header["HDR_VERSION"]; + field.data_type = header["DATATYPE"]; + field.storage_format = header["STORAGE_FORMAT"]; - field.dimension[0] = std::stol(header[std::string("DIMENSION_1")]); - field.dimension[1] = std::stol(header[std::string("DIMENSION_2")]); - field.dimension[2] = std::stol(header[std::string("DIMENSION_3")]); - field.dimension[3] = std::stol(header[std::string("DIMENSION_4")]); + field.dimension[0] = std::stol(header["DIMENSION_1"]); + field.dimension[1] = std::stol(header["DIMENSION_2"]); + field.dimension[2] = std::stol(header["DIMENSION_3"]); + field.dimension[3] = std::stol(header["DIMENSION_4"]); assert(grid->_ndimension == 4); for(int d=0;d<4;d++){ assert(grid->_fdimensions[d]==field.dimension[d]); } - field.link_trace = std::stod(header[std::string("LINK_TRACE")]); - field.plaquette = std::stod(header[std::string("PLAQUETTE")]); + field.link_trace = std::stod(header["LINK_TRACE"]); + field.plaquette = std::stod(header["PLAQUETTE"]); - field.boundary[0] = header[std::string("BOUNDARY_1")]; - field.boundary[1] = header[std::string("BOUNDARY_2")]; - field.boundary[2] = header[std::string("BOUNDARY_3")]; - field.boundary[3] = header[std::string("BOUNDARY_4")]; + field.boundary[0] = header["BOUNDARY_1"]; + field.boundary[1] = header["BOUNDARY_2"]; + field.boundary[2] = header["BOUNDARY_3"]; + field.boundary[3] = header["BOUNDARY_4"]; - field.checksum = std::stoul(header[std::string("CHECKSUM")],0,16); - field.ensemble_id = header[std::string("ENSEMBLE_ID")]; - field.ensemble_label = header[std::string("ENSEMBLE_LABEL")]; - field.sequence_number = std::stol(header[std::string("SEQUENCE_NUMBER")]); - field.creator = header[std::string("CREATOR")]; - field.creator_hardware = header[std::string("CREATOR_HARDWARE")]; - field.creation_date = header[std::string("CREATION_DATE")]; - field.archive_date = header[std::string("ARCHIVE_DATE")]; - field.floating_point = header[std::string("FLOATING_POINT")]; + field.checksum = std::stoul(header["CHECKSUM"],0,16); + field.ensemble_id = header["ENSEMBLE_ID"]; + field.ensemble_label = header["ENSEMBLE_LABEL"]; + field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]); + field.creator = header["CREATOR"]; + field.creator_hardware = header["CREATOR_HARDWARE"]; + field.creation_date = header["CREATION_DATE"]; + field.archive_date = header["ARCHIVE_DATE"]; + field.floating_point = header["FLOATING_POINT"]; return field.data_start; } From aad51ffe3acfae689b8d1d9e5a3856c708f8663d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 8 Jun 2015 12:02:26 +0100 Subject: [PATCH 03/24] Prep for mgrid --- lib/Algorithms.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Algorithms.h b/lib/Algorithms.h index c2491cf4..0252606f 100644 --- a/lib/Algorithms.h +++ b/lib/Algorithms.h @@ -3,6 +3,7 @@ #include #include +#include #include #include From 42c22d4caecc6ed6afb0f8ac631cc54ffde2fa8a Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 8 Jun 2015 12:02:53 +0100 Subject: [PATCH 04/24] Prep for multigrid --- lib/algorithms/LinearOperator.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index 492b34db..ee0b2ec1 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -16,6 +16,11 @@ namespace Grid { ///////////////////////////////////////////////////////////////////////////////////////////// template class LinearOperatorBase { public: + + // Support for coarsening to a multigrid + virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base + virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base + virtual void Op (const Field &in, Field &out) = 0; // Abstract base virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0; @@ -43,6 +48,14 @@ namespace Grid { Matrix &_Mat; public: MdagMLinearOperator(Matrix &Mat): _Mat(Mat){}; + + // Support for coarsening to a multigrid + void OpDiag (const Field &in, Field &out) { + _Mat.Mdiag(in,out); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + _Mat.Mdir(in,out,dir,disp); + } void Op (const Field &in, Field &out){ _Mat.M(in,out); } @@ -66,6 +79,13 @@ namespace Grid { Matrix &_Mat; public: HermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; + // Support for coarsening to a multigrid + void OpDiag (const Field &in, Field &out) { + _Mat.Mdiag(in,out); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + _Mat.Mdir(in,out,dir,disp); + } void Op (const Field &in, Field &out){ _Mat.M(in,out); } @@ -116,6 +136,14 @@ namespace Grid { void AdjOp (const Field &in, Field &out){ MpcDag(in,out); } + // Support for coarsening to a multigrid + void OpDiag (const Field &in, Field &out) { + assert(0); // must coarsen the unpreconditioned system + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + assert(0); + } + }; template class SchurDiagMooeeOperator : public SchurOperatorBase { From 769ef7b0f5d9e1edb45dc31d4ce51f430a5b0db8 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 8 Jun 2015 12:03:36 +0100 Subject: [PATCH 05/24] sqrt --- tests/Test_remez.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/Test_remez.cc b/tests/Test_remez.cc index b5993917..0d65ac7b 100644 --- a/tests/Test_remez.cc +++ b/tests/Test_remez.cc @@ -35,7 +35,7 @@ void MultiShiftFunction::csv(std::ostream &out) { for (double x=lo; x Date: Mon, 8 Jun 2015 12:04:59 +0100 Subject: [PATCH 06/24] Conjugate residual algorithm; some more unary functions --- lib/Make.inc | 2 +- lib/Simd.h | 3 + lib/Tensors.h | 1 + lib/algorithms/CoarsenedMatrix.h | 146 ++++++++++++++ lib/algorithms/SparseMatrix.h | 3 +- lib/algorithms/iterative/SchurRedBlack.h | 2 +- lib/lattice/Lattice_base.h | 1 + lib/lattice/Lattice_local.h | 3 +- lib/lattice/Lattice_reality.h | 8 +- lib/lattice/Lattice_transfer.h | 186 +++++++++++++----- lib/lattice/Lattice_unary.h | 32 +++ lib/qcd/action/fermion/CayleyFermion5D.cc | 21 ++ lib/qcd/action/fermion/CayleyFermion5D.h | 4 + .../fermion/ContinuedFractionFermion5D.cc | 12 ++ .../fermion/ContinuedFractionFermion5D.h | 3 + lib/qcd/action/fermion/FermionOperator.h | 3 + .../fermion/PartialFractionFermion5D.cc | 16 ++ .../action/fermion/PartialFractionFermion5D.h | 3 + lib/qcd/action/fermion/WilsonFermion.cc | 20 ++ lib/qcd/action/fermion/WilsonFermion.h | 6 +- lib/qcd/action/fermion/WilsonFermion5D.cc | 22 +++ lib/qcd/action/fermion/WilsonFermion5D.h | 4 + lib/qcd/action/fermion/WilsonKernels.cc | 148 ++++++++++++-- lib/qcd/action/fermion/WilsonKernels.h | 3 + lib/qcd/action/fermion/g5HermitianLinop.h | 15 +- lib/simd/Grid_avx.h | 5 +- lib/simd/Grid_vector_types.h | 30 ++- lib/simd/Grid_vector_unops.h | 60 ++++++ lib/tensors/Tensor_class.h | 7 + lib/tensors/Tensor_unary.h | 37 ++++ tests/Make.inc | 10 +- tests/Test_GaugeAction.cc | 2 +- tests/Test_cayley_coarsen_support.cc | 108 ++++++++++ tests/Test_cf_coarsen_support.cc | 87 ++++++++ tests/Test_cf_cr_unprec.cc | 58 ++++++ tests/Test_dwf_cr_unprec.cc | 63 ++++++ tests/Test_nersc_io.cc | 2 +- tests/Test_wilson_cr_unprec.cc | 59 ++++++ 38 files changed, 1116 insertions(+), 79 deletions(-) create mode 100644 lib/algorithms/CoarsenedMatrix.h create mode 100644 lib/lattice/Lattice_unary.h create mode 100644 lib/simd/Grid_vector_unops.h create mode 100644 lib/tensors/Tensor_unary.h create mode 100644 tests/Test_cayley_coarsen_support.cc create mode 100644 tests/Test_cf_coarsen_support.cc create mode 100644 tests/Test_cf_cr_unprec.cc create mode 100644 tests/Test_dwf_cr_unprec.cc create mode 100644 tests/Test_wilson_cr_unprec.cc diff --git a/lib/Make.inc b/lib/Make.inc index ba35e25f..069e818d 100644 --- a/lib/Make.inc +++ b/lib/Make.inc @@ -1,4 +1,4 @@ -HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h +HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc diff --git a/lib/Simd.h b/lib/Simd.h index 26edb4c9..f93c3070 100644 --- a/lib/Simd.h +++ b/lib/Simd.h @@ -35,6 +35,8 @@ namespace Grid { inline RealD conjugate(const RealD & r){ return r; } inline RealD real(const RealD & r){ return r; } + inline RealD sqrt(const RealD & r){ return std::sqrt(r); } + inline ComplexD conjugate(const ComplexD& r){ return(conj(r)); } inline ComplexD adj(const ComplexD& r){ return(conjugate(r)); } inline ComplexF conjugate(const ComplexF& r ){ return(conj(r)); } @@ -112,6 +114,7 @@ namespace Grid { }; #include +#include namespace Grid { // Default precision diff --git a/lib/Tensors.h b/lib/Tensors.h index fa0ca7a9..e585b116 100644 --- a/lib/Tensors.h +++ b/lib/Tensors.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #endif diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h new file mode 100644 index 00000000..54ea279e --- /dev/null +++ b/lib/algorithms/CoarsenedMatrix.h @@ -0,0 +1,146 @@ +#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H +#define GRID_ALGORITHM_COARSENED_MATRIX_H + +#include + +namespace Grid { + + class Geometry { + public: + int npoint; + int dimension; + std::vector directions ; + std::vector displacements; + + Geometry(int _d) : dimension(_d), npoint(2*_d+1), directions(npoint), displacements(npoint) { + for(int d=0;d GetDelta(int point) { + std::vector delta(dimension,0); + delta[directions[point]] = displacements[point]; + return delta; + }; + + }; + + // Fine Object == (per site) type of fine field + // nbasis == number of deflation vectors + template + class CoarsenedMatrix : public SparseMatrixBase > > { + public: + + typedef iVector siteVector; + typedef Lattice > CoarseVector; + typedef Lattice > CoarseMatrix; + + typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + //////////////////// + // Data members + //////////////////// + Geometry geom; + GridBase * _grid; + CartesianStencil Stencil; + std::vector A; + std::vector > comm_buf; + + /////////////////////// + // Interface + /////////////////////// + GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know + + RealD M (const CoarseVector &in, CoarseVector &out){ + + SimpleCompressor compressor; + Stencil.HaloExchange(in,comm_buf,compressor); + + //PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + siteVector res = zero; + siteVector tmp; + siteVector nbr; + + int offset,local,perm; + for(int point=0;point > &linop,std::vector > & subspace){ + + FineField phi(FineGrid); + FineField Mphi(FineGrid); + CoarseVector Proj(Grid()); + CoarseScalar InnerProd(Grid()); + + // Orthogonalise the subblocks over the basis + blockOrthogonalise(InnerProd,subspace); + + // Compute the matrix elements of linop between this orthonormal + // set of vectors. + for(int i=0;ioSites();ss++){ + for(int j=0;j #include #include +#include #include diff --git a/lib/lattice/Lattice_local.h b/lib/lattice/Lattice_local.h index 7b8bdf9b..ad7a1ff5 100644 --- a/lib/lattice/Lattice_local.h +++ b/lib/lattice/Lattice_local.h @@ -25,8 +25,7 @@ PARALLEL_FOR_LOOP // localInnerProduct template - inline auto localInnerProduct (const Lattice &lhs,const Lattice &rhs) - -> Lattice + inline auto localInnerProduct (const Lattice &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); PARALLEL_FOR_LOOP diff --git a/lib/lattice/Lattice_reality.h b/lib/lattice/Lattice_reality.h index 4ad6fb7b..8c8bc17a 100644 --- a/lib/lattice/Lattice_reality.h +++ b/lib/lattice/Lattice_reality.h @@ -27,9 +27,9 @@ PARALLEL_FOR_LOOP return ret; }; - template inline auto real(const Lattice &z) -> Lattice + template inline auto real(const Lattice &z) -> Lattice { - Lattice ret(z._grid); + Lattice ret(z._grid); PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss++){ ret._odata[ss] = real(z._odata[ss]); @@ -37,9 +37,9 @@ PARALLEL_FOR_LOOP return ret; } - template inline auto imag(const Lattice &z) -> Lattice + template inline auto imag(const Lattice &z) -> Lattice { - Lattice ret(z._grid); + Lattice ret(z._grid); PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss++){ ret._odata[ss] = imag(z._odata[ss]); diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index f0046b0e..871ed893 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -56,10 +56,10 @@ PARALLEL_FOR_LOOP } -template -inline void projectBlockBasis(Lattice > &coarseData, - const Lattice &fineData, - const std::vector > &Basis) +template +inline void blockProject(Lattice > &coarseData, + const Lattice &fineData, + const std::vector > &Basis) { GridBase * fine = fineData._grid; GridBase * coarse= coarseData._grid; @@ -69,13 +69,14 @@ inline void projectBlockBasis(Lattice > &coarseData, assert( nbasis == Basis.size() ); subdivides(coarse,fine); for(int i=0;i block_r (_ndimension); for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; + assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]); } coarseData=zero; @@ -92,20 +93,149 @@ inline void projectBlockBasis(Lattice > &coarseData, for(int i=0;i +inline void blockZAXPY(Lattice &fineZ, + const Lattice &coarseA, + const Lattice &fineX, + const Lattice &fineY) +{ + GridBase * fine = fineZ._grid; + GridBase * coarse= coarseA._grid; + + fineZ.checkerboard=fineX.checkerboard; + subdivides(coarse,fine); // require they map + conformable(fineX,fineY); + conformable(fineX,fineZ); + + int _ndimension = coarse->_ndimension; + std::vector block_r (_ndimension); + + // FIXME merge with subdivide checking routine as this is redundant + for(int d=0 ; d<_ndimension;d++){ + block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; + assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]); + } + +PARALLEL_FOR_LOOP + for(int sf=0;sfoSites();sf++){ + + int sc; + std::vector coor_c(_ndimension); + std::vector coor_f(_ndimension); + + GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + + // z = A x + y + fineZ._odata[sf]=coarseA._odata[sc]*fineX._odata[sf]+fineY._odata[sf]; + + } + + return; +} +template + inline void blockInnerProduct(Lattice &CoarseInner, + const Lattice &fineX, + const Lattice &fineY) +{ + typedef decltype(innerProduct(fineX._odata[0],fineY._odata[0])) dotp; + + GridBase *coarse(CoarseInner._grid); + GridBase *fine (fineX._grid); + + Lattice fine_inner(fine); + Lattice coarse_inner(coarse); + + fine_inner = localInnerProduct(fineX,fineY); + blockSum(coarse_inner,fine_inner); + for(int ss=0;ssoSites();ss++){ + CoarseInner._odata[ss] = coarse_inner._odata[ss]; + } +} +template +inline void blockNormalise(Lattice &ip,Lattice &fineX) +{ + GridBase *coarse = ip._grid; + blockInnerProduct(ip,fineX,fineX); + ip = rsqrt(ip); + blockZAXPY(fineX,ip,fineX,fineX); +} +// useful in multigrid project; +// Generic name : Coarsen? +template +inline void blockSum(Lattice &coarseData,const Lattice &fineData) +{ + GridBase * fine = fineData._grid; + GridBase * coarse= coarseData._grid; + + subdivides(coarse,fine); // require they map + + int _ndimension = coarse->_ndimension; + + std::vector block_r (_ndimension); + + for(int d=0 ; d<_ndimension;d++){ + block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; + } + + coarseData=zero; + for(int sf=0;sfoSites();sf++){ + + int sc; + std::vector coor_c(_ndimension); + std::vector coor_f(_ndimension); + + GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + + coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; + + } + return; } -template -inline void promoteBlockBasis(const Lattice > &coarseData, - Lattice &fineData, - const std::vector > &Basis) +template +inline void blockOrthogonalise(Lattice &ip,std::vector > &Basis) +{ + GridBase *coarse = ip._grid; + GridBase *fine = Basis[0]._grid; + + int nbasis = Basis.size() ; + int _ndimension = coarse->_ndimension; + + // checks + subdivides(coarse,fine); + for(int i=0;i (Basis[v],ip,Basis[u],Basis[v]); + } + blockNormalise(ip,Basis[v]); + } +} + +template +inline void blockPromote(const Lattice > &coarseData, + Lattice &fineData, + const std::vector > &Basis) { GridBase * fine = fineData._grid; GridBase * coarse= coarseData._grid; @@ -146,40 +276,6 @@ inline void promoteBlockBasis(const Lattice > &coarseD } -// useful in multigrid project; -// Generic name : Coarsen? -template -inline void sumBlocks(Lattice &coarseData,const Lattice &fineData) -{ - GridBase * fine = fineData._grid; - GridBase * coarse= coarseData._grid; - - subdivides(coarse,fine); // require they map - - int _ndimension = coarse->_ndimension; - - std::vector block_r (_ndimension); - - for(int d=0 ; d<_ndimension;d++){ - block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; - } - - coarseData=zero; - for(int sf=0;sfoSites();sf++){ - - int sc; - std::vector coor_c(_ndimension); - std::vector coor_f(_ndimension); - - GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - - coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; - - } - return; -} } #endif diff --git a/lib/lattice/Lattice_unary.h b/lib/lattice/Lattice_unary.h new file mode 100644 index 00000000..131d7d68 --- /dev/null +++ b/lib/lattice/Lattice_unary.h @@ -0,0 +1,32 @@ +#ifndef GRID_LATTICE_UNARY_H +#define GRID_LATTICE_UNARY_H + +namespace Grid { + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // avoid copy back routines for mult, mac, sub, add + ////////////////////////////////////////////////////////////////////////////////////////////////////// + template Lattice sqrt(const Lattice &rhs){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=sqrt(rhs._odata[ss]); + } + return ret; + } + template Lattice rsqrt(const Lattice &rhs){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=rsqrt(rhs._odata[ss]); + } + return ret; + } + + +} +#endif diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index e47ff331..7db67dd7 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -165,6 +165,27 @@ namespace QCD { } } + void CayleyFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){ + LatticeFermion tmp(psi._grid); + // Assemble the 5d matrix + for(int s=0;s(in,comm_buf,compressor); + + assert( (disp==1)||(disp==-1) ); + + int skip = (disp==1) ? 0 : 1; + + int dirdisp = dir+skip*4; + +PARALLEL_FOR_LOOP + for(int sss=0;sssoSites();sss++){ + DiracOpt::DhopDir(Stencil,Umu,comm_buf,sss,sss,in,out,dirdisp); + } + +}; void WilsonFermion::DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField & U, const LatticeFermion &in, LatticeFermion &out,int dag) diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index 6040d328..60726fdd 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -26,7 +26,7 @@ namespace Grid { void MeooeDag (const LatticeFermion &in, LatticeFermion &out); virtual void Mooee (const LatticeFermion &in, LatticeFermion &out); // remain virtual so we virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out); // can derive Clover - virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); // from Wilson bas + virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); // from Wilson base virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); // non-hermitian hopping term; half cb or both @@ -34,6 +34,10 @@ namespace Grid { void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag); void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag); + // Multigrid assistance + void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp); + void DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp); + /////////////////////////////////////////////////////////////// // Extra methods added by derived /////////////////////////////////////////////////////////////// diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index d22701b0..4ddd14ab 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -82,6 +82,28 @@ void WilsonFermion5D::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGau pokeIndex(Uds,U,mu+4); } } +void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp) +{ + assert( (disp==1)||(disp==-1) ); + + WilsonCompressor compressor(DaggerNo); + Stencil.HaloExchange(in,comm_buf,compressor); + + int skip = (disp==1) ? 0 : 1; + + int dirdisp = dir+skip*4; + +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + for(int s=0;s > &buf, + int sF,int sU,const LatticeFermion &in, LatticeFermion &out,int dirdisp) +{ + vHalfSpinColourVector tmp; + vHalfSpinColourVector chi; + vSpinColourVector result; + vHalfSpinColourVector Uchi; + int offset,local,perm, ptype; + int ss=sF; + + offset = st._offsets [dirdisp][ss]; + local = st._is_local[dirdisp][ss]; + perm = st._permute[dirdisp][ss]; + ptype = st._permute_type[dirdisp]; + + // Xp + if(dirdisp==Xp){ + if ( local && perm ) { + spProjXp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjXp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Xp),&chi()); + spReconXp(result,Uchi); + } + + // Yp + if ( dirdisp==Yp ){ + if ( local && perm ) { + spProjYp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjYp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Yp),&chi()); + spReconYp(result,Uchi); + } + + // Zp + if ( dirdisp ==Zp ){ + if ( local && perm ) { + spProjZp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjZp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Zp),&chi()); + spReconZp(result,Uchi); + } + + // Tp + if ( dirdisp ==Tp ){ + if ( local && perm ) { + spProjTp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjTp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Tp),&chi()); + spReconTp(result,Uchi); + } + + // Xm + if ( dirdisp==Xm ){ + if ( local && perm ) { + spProjXm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjXm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Xm),&chi()); + spReconXm(result,Uchi); + } + + // Ym + if ( dirdisp == Ym ){ + if ( local && perm ) { + spProjYm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjYm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Ym),&chi()); + spReconYm(result,Uchi); + } + + // Zm + if ( dirdisp == Zm ){ + if ( local && perm ) { + spProjZm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjZm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Zm),&chi()); + spReconZm(result,Uchi); + } + + // Tm + if ( dirdisp==Tm ) { + if ( local && perm ) { + spProjTm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjTm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Tm),&chi()); + spReconTm(result,Uchi); + } + + vstream(out._odata[ss],result*(-0.5)); +} + }} diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index cc535022..db384ddf 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -20,6 +20,9 @@ namespace Grid { static void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U, std::vector > &buf, int sF,int sU,const LatticeFermion &in, LatticeFermion &out); + static void DhopDir(CartesianStencil &st,LatticeDoubledGaugeField &U, + std::vector > &buf, + int sF,int sU,const LatticeFermion &in, LatticeFermion &out,int dirdisp); }; diff --git a/lib/qcd/action/fermion/g5HermitianLinop.h b/lib/qcd/action/fermion/g5HermitianLinop.h index 9a8cc2eb..5b3411ec 100644 --- a/lib/qcd/action/fermion/g5HermitianLinop.h +++ b/lib/qcd/action/fermion/g5HermitianLinop.h @@ -11,11 +11,22 @@ class Gamma5HermitianLinearOperator : public LinearOperatorBase { public: Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; void Op (const Field &in, Field &out){ - _Mat.M(in,out); + HermOp(in,out); } void AdjOp (const Field &in, Field &out){ - _Mat.M(in,out); + HermOp(in,out); } + void OpDiag (const Field &in, Field &out) { + Field tmp(in._grid); + _Mat.Mdiag(in,tmp); + G5R5(out,tmp); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + Field tmp(in._grid); + _Mat.Mdir(in,tmp,dir,disp); + G5R5(out,tmp); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ HermOp(in,out); diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h index ee4d03d5..ba023146 100644 --- a/lib/simd/Grid_avx.h +++ b/lib/simd/Grid_avx.h @@ -314,9 +314,9 @@ namespace Optimization { template<> inline Grid::ComplexF Reduce::operator()(__m256 in){ __m256 v1,v2; - Optimization::permute(v1,in,0); // sse 128; paired complex single + Optimization::permute(v1,in,0); // avx 256; quad complex single v1 = _mm256_add_ps(v1,in); - Optimization::permute(v2,v1,1); // avx 256; quad complex single + Optimization::permute(v2,v1,1); v1 = _mm256_add_ps(v1,v2); u256f conv; conv.v = v1; return Grid::ComplexF(conv.f[0],conv.f[1]); @@ -367,7 +367,6 @@ namespace Optimization { assert(0); } - } ////////////////////////////////////////////////////////////////////////////////////// diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index aa1abb47..9525e45e 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -78,11 +78,18 @@ namespace Grid { typedef typename RealPart < Scalar_type >::type Real; typedef Vector_type vector_type; typedef Scalar_type scalar_type; + + + typedef union conv_t_union { + Vector_type v; + Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)]; + conv_t_union(){}; + } conv_t; + Vector_type v; static inline int Nsimd(void) { return sizeof(Vector_type)/sizeof(Scalar_type);} - Grid_simd& operator=(const Grid_simd&& rhs){v=rhs.v;return *this;}; Grid_simd& operator=(const Grid_simd& rhs){v=rhs.v;return *this;}; //faster than not declaring it and leaving to the compiler @@ -192,6 +199,27 @@ namespace Grid { return *this; } + + /////////////////////////////////////// + // Not all functions are supported + // through SIMD and must breakout to + // scalar type and back again. This + // provides support + /////////////////////////////////////// + + template friend inline Grid_simd SimdApply (const functor &func,const Grid_simd &v) { + + Grid_simd ret; + Grid_simd::conv_t conv; + + conv.v = v.v; + for(int i=0;i struct SqrtRealFunctor { + scalar operator()(const scalar &a) const { + return sqrt(real(a)); + } + }; + + template struct RSqrtRealFunctor { + scalar operator()(const scalar &a) const { + return scalar(1.0/sqrt(real(a))); + } + }; + + template struct CosRealFunctor { + scalar operator()(const scalar &a) const { + return cos(real(a)); + } + }; + + template struct SinRealFunctor { + scalar operator()(const scalar &a) const { + return sin(real(a)); + } + }; + + template struct PowRealFunctor { + double y; + PowRealFunctor(double _y) : y(_y) {}; + scalar operator()(const scalar &a) const { + return pow(real(a),y); + } + }; + + template < class S, class V > + inline Grid_simd sqrt(const Grid_simd &r) { + return SimdApply(SqrtRealFunctor(),r); + } + template < class S, class V > + inline Grid_simd rsqrt(const Grid_simd &r) { + return SimdApply(RSqrtRealFunctor(),r); + } + template < class S, class V > + inline Grid_simd cos(const Grid_simd &r) { + return SimdApply(CosRealFunctor(),r); + } + template < class S, class V > + inline Grid_simd sin(const Grid_simd &r) { + return SimdApply(CosRealFunctor(),r); + } + template < class S, class V > + inline Grid_simd pow(const Grid_simd &r,double y) { + return SimdApply(PowRealFunctor(y),r); + } + +} +#endif diff --git a/lib/tensors/Tensor_class.h b/lib/tensors/Tensor_class.h index bd1902b8..3ed4f865 100644 --- a/lib/tensors/Tensor_class.h +++ b/lib/tensors/Tensor_class.h @@ -123,6 +123,13 @@ public: typedef iScalar tensor_reduced; typedef iVector scalar_object; + template::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iVector + { + zeroit(*this); + for(int i=0;i::TensorLevel + 1}; iVector(const Zero &z){ *this = zero; }; diff --git a/lib/tensors/Tensor_unary.h b/lib/tensors/Tensor_unary.h new file mode 100644 index 00000000..1fbec985 --- /dev/null +++ b/lib/tensors/Tensor_unary.h @@ -0,0 +1,37 @@ +#ifndef GRID_TENSOR_UNARY_H +#define GRID_TENSOR_UNARY_H +namespace Grid { + +#define UNARY_REAL(func)\ +template inline auto func(const iScalar &z) -> iScalar\ +{\ + iScalar ret;\ + ret._internal = func( (z._internal));\ + return ret;\ +}\ +template inline auto func(const iVector &z) -> iVector\ +{\ + iVector ret;\ + for(int c1=0;c1 inline auto func(const iMatrix &z) -> iMatrix\ +{\ + iMatrix ret;\ + for(int c1=0;c1 + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + + const int Ls=8; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + // Construct a coarsened grid + std::vector clatt = GridDefaultLatt(); + for(int d=0;d seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeFermion ref(FGrid); ref=zero; + LatticeFermion tmp(FGrid); + LatticeFermion err(FGrid); + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + + std::vector U(4,UGrid); + + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.5; + RealD M5=1.8; + + DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + Gamma5HermitianLinearOperator HermIndefOp(Ddwf); + + HermIndefOp.Op(src,ref); + HermIndefOp.OpDiag(src,result); + + for(int d=0;d<4;d++){ + HermIndefOp.OpDir(src,tmp,d,+1); result=result+tmp; + std::cout<<"dir "< subspace(nbasis,FGrid); + + for(int b=0;b LittleDiracOp(*Coarse5d); + + LittleDiracOp.CoarsenOperator(FGrid,HermIndefOp,subspace); + + typedef Lattice > coarse_vec; + + coarse_vec c_src (Coarse5d); c_src= zero; + coarse_vec c_res (Coarse5d); + + Complex one(1.0); + c_src = one; // 1 in every element for vector 1. + + // TODO + // -- promote from subspace, check we get the vector we wanted + // -- apply ldop; check we get the same as inner product of M times big vec + // -- pick blocks one by one. Evaluate matrix elements. + + std::cout << "Multiplying by LittleDiracOp "<< std::endl; + LittleDiracOp.M(c_src,c_res); + + std::cout << "Done "<< std::endl; + Grid_finalize(); +} diff --git a/tests/Test_cf_coarsen_support.cc b/tests/Test_cf_coarsen_support.cc new file mode 100644 index 00000000..9d81a798 --- /dev/null +++ b/tests/Test_cf_coarsen_support.cc @@ -0,0 +1,87 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=9; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeFermion ref(FGrid); ref=zero; + LatticeFermion tmp(FGrid); + LatticeFermion err(FGrid); + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + + std::vector U(4,UGrid); + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.1; + RealD M5=1.8; + + { + OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + HermitianLinearOperator HermIndefOp(Dcf); + + HermIndefOp.Op(src,ref); + HermIndefOp.OpDiag(src,result); + + for(int d=0;d<4;d++){ + HermIndefOp.OpDir(src,tmp,d,+1); result=result+tmp; + std::cout<<"dir "< HermIndefOp(Dpf); + + HermIndefOp.Op(src,ref); + HermIndefOp.OpDiag(src,result); + + for(int d=0;d<4;d++){ + HermIndefOp.OpDir(src,tmp,d,+1); result=result+tmp; + std::cout<<"dir "< + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=9; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + + std::vector U(4,UGrid); + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.1; + RealD M5=1.8; + + OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + + ConjugateResidual MCR(1.0e-8,10000); + + MdagMLinearOperator HermPosDefOp(Dcf); + MCR(HermPosDefOp,src,result); + + HermitianLinearOperator HermIndefOp(Dcf); + MCR(HermIndefOp,src,result); + + Grid_finalize(); +} diff --git a/tests/Test_dwf_cr_unprec.cc b/tests/Test_dwf_cr_unprec.cc new file mode 100644 index 00000000..cffa726c --- /dev/null +++ b/tests/Test_dwf_cr_unprec.cc @@ -0,0 +1,63 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + + const int Ls=8; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + + std::vector U(4,UGrid); + + for(int mu=0;mu(Umu,mu); + } + + ConjugateResidual MCR(1.0e-8,10000); + + RealD mass=0.5; + RealD M5=1.8; + DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + + MdagMLinearOperator HermOp(Ddwf); + MCR(HermOp,src,result); + + Gamma5HermitianLinearOperator g5HermOp(Ddwf); + MCR(g5HermOp,src,result); + + + Grid_finalize(); +} diff --git a/tests/Test_nersc_io.cc b/tests/Test_nersc_io.cc index 80d78291..403d2cab 100644 --- a/tests/Test_nersc_io.cc +++ b/tests/Test_nersc_io.cc @@ -83,7 +83,7 @@ int main (int argc, char ** argv) Complex l = TensorRemove(Tl); std::cout << "calculated link trace " < + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + + std::vector seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); + + LatticeFermion src(&Grid); random(pRNG,src); + RealD nrm = norm2(src); + LatticeFermion result(&Grid); result=zero; + LatticeGaugeField Umu(&Grid); random(pRNG,Umu); + + std::vector U(4,&Grid); + + double volume=1; + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.5; + WilsonFermion Dw(Umu,Grid,RBGrid,mass); + + MdagMLinearOperator HermOp(Dw); + + ConjugateResidual MCR(1.0e-8,10000); + + MCR(HermOp,src,result); + + Grid_finalize(); +} From a6ac2abb649148a13cd6f4f294663445e0bca703 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 8 Jun 2015 12:12:13 +0100 Subject: [PATCH 07/24] No compile fix after merge --- tests/Test_multishift_sqrt.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/Test_multishift_sqrt.cc b/tests/Test_multishift_sqrt.cc index 8e6b08d4..1ecc1fde 100644 --- a/tests/Test_multishift_sqrt.cc +++ b/tests/Test_multishift_sqrt.cc @@ -20,12 +20,20 @@ public: sqrtscale = sqrtscale * adj(sqrtscale);// force real pos def scale = sqrtscale * sqrtscale; } + // Support for coarsening to a multigrid + void OpDiag (const Field &in, Field &out) {}; + void OpDir (const Field &in, Field &out,int dir,int disp){}; + void Op (const Field &in, Field &out){ out = scale * in; } void AdjOp (const Field &in, Field &out){ out = scale * in; } + void HermOp(const Field &in, Field &out){ + double n1, n2; + HermOpAndNorm(in,out,n1,n2); + } void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){ ComplexD dot; From 744ac33e8b9d479069ac2f822fc57b521fd49548 Mon Sep 17 00:00:00 2001 From: neo Date: Tue, 9 Jun 2015 15:46:21 +0900 Subject: [PATCH 08/24] Experimental support for ARM --- .gitignore | 2 +- INSTALL | 2 +- configure | 29 ++- configure.ac | 22 ++- lib/GridConfig.h | 14 +- lib/GridConfig.h.in | 14 +- lib/Make.inc | 4 +- lib/algorithms/approx/.dirstamp | 0 lib/communicator/.dirstamp | 0 lib/qcd/spin/.dirstamp | 0 lib/qcd/utils/.dirstamp | 0 lib/simd/Grid_avx.h | 8 +- lib/simd/Grid_avx512.h | 8 +- lib/simd/Grid_empty.h | 289 +++++++++++++++++++++++++++ lib/simd/Grid_neon.h | 308 +++++++++++++++++++++++++++++ lib/simd/Grid_sse4.h | 9 +- lib/simd/Grid_vector_types.h | 10 +- lib/stencil/.dirstamp | 0 scripts/arm_configure.experimental | 1 + tests/Make.inc | 10 +- 20 files changed, 693 insertions(+), 37 deletions(-) create mode 100644 lib/algorithms/approx/.dirstamp create mode 100644 lib/communicator/.dirstamp create mode 100644 lib/qcd/spin/.dirstamp create mode 100644 lib/qcd/utils/.dirstamp create mode 100644 lib/simd/Grid_empty.h create mode 100644 lib/simd/Grid_neon.h create mode 100644 lib/stencil/.dirstamp create mode 100644 scripts/arm_configure.experimental diff --git a/.gitignore b/.gitignore index 82e09bc0..be00eb30 100644 --- a/.gitignore +++ b/.gitignore @@ -49,7 +49,7 @@ config.status /stamp-h1 /config.sub /config.guess - +/INSTALL # Packages # ############ diff --git a/INSTALL b/INSTALL index 80a61507..f812f5a2 120000 --- a/INSTALL +++ b/INSTALL @@ -1 +1 @@ -/opt/local/share/automake-1.15/INSTALL \ No newline at end of file +/usr/share/automake-1.14/INSTALL \ No newline at end of file diff --git a/configure b/configure index 36f85edb..702a52db 100755 --- a/configure +++ b/configure @@ -2574,7 +2574,7 @@ test -n "$target_alias" && NONENONEs,x,x, && program_prefix=${target_alias}- -am__api_version='1.15' +am__api_version='1.14' # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or @@ -2746,8 +2746,8 @@ test "$program_suffix" != NONE && ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` -# Expand $ac_aux_dir to an absolute path. -am_aux_dir=`cd "$ac_aux_dir" && pwd` +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` if test x"${MISSING+set}" != xset; then case $am_aux_dir in @@ -2766,7 +2766,7 @@ else $as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} fi -if test x"${install_sh+set}" != xset; then +if test x"${install_sh}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; @@ -3094,8 +3094,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} # mkdir_p='$(MKDIR_P)' -# We need awk for the "check" target (and possibly the TAP driver). The -# system "awk" is bad on some platforms. +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. # Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AMTAR='$${TAR-tar}' @@ -3154,7 +3154,6 @@ END fi - ac_config_headers="$ac_config_headers lib/GridConfig.h" # Check whether --enable-silent-rules was given. @@ -6003,6 +6002,7 @@ $as_echo "$as_me: WARNING: Your processor supports fma instructions but not your + # Checks for libraries. #AX_GCC_VAR_ATTRIBUTE(aligned) @@ -6709,8 +6709,21 @@ $as_echo "#define AVX512 1" >>confdefs.h supported="cross compilation" ;; + NEONv7) + echo Configuring for experimental ARMv7 support + +$as_echo "#define NEONv7 1" >>confdefs.h + + supported="cross compilation" + ;; + DEBUG) + echo Configuring without SIMD support - only for compiler DEBUGGING! + +$as_echo "#define EMPTY_SIMD 1" >>confdefs.h + + ;; *) - as_fn_error $? "${ac_SIMD} unsupported --enable-simd option" "$LINENO" 5; + as_fn_error $? "${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options" "$LINENO" 5; ;; esac diff --git a/configure.ac b/configure.ac index 59792fb0..cf0da9eb 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ # # Project Grid package # -# Time-stamp: <2015-05-27 18:51:47 neo> +# Time-stamp: <2015-06-09 15:26:39 neo> AC_PREREQ([2.63]) AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk]) @@ -29,6 +29,7 @@ AC_PROG_RANLIB AX_CXX_COMPILE_STDCXX_11(noext, mandatory) AX_EXT + # Checks for libraries. #AX_GCC_VAR_ATTRIBUTE(aligned) @@ -75,7 +76,7 @@ supported=no case ${ac_SIMD} in SSE4) echo Configuring for SSE4 - AC_DEFINE([SSE4],[1],[SSE4] ) + AC_DEFINE([SSE4],[1],[SSE4 Intrinsics] ) if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4 supported=yes else @@ -84,7 +85,7 @@ case ${ac_SIMD} in ;; AVX) echo Configuring for AVX - AC_DEFINE([AVX1],[1],[AVX] ) + AC_DEFINE([AVX1],[1],[AVX Intrinsics] ) if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX supported=yes else @@ -93,7 +94,7 @@ case ${ac_SIMD} in ;; AVX2) echo Configuring for AVX2 - AC_DEFINE([AVX2],[1],[AVX2] ) + AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] ) if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2 supported=yes else @@ -102,11 +103,20 @@ case ${ac_SIMD} in ;; AVX512|MIC) echo Configuring for AVX512 and MIC - AC_DEFINE([AVX512],[1],[AVX512] ) + AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Corner] ) supported="cross compilation" ;; + NEONv7) + echo Configuring for experimental ARMv7 support + AC_DEFINE([NEONv7],[1],[NEON ARMv7 Experimental support ] ) + supported="cross compilation" + ;; + DEBUG) + echo Configuring without SIMD support - only for compiler DEBUGGING! + AC_DEFINE([EMPTY_SIMD],[1],[EMPTY_SIMD only for DEBUGGING] ) + ;; *) - AC_MSG_ERROR([${ac_SIMD} unsupported --enable-simd option]); + AC_MSG_ERROR([${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options]); ;; esac diff --git a/lib/GridConfig.h b/lib/GridConfig.h index e5b75382..c9dea1cb 100644 --- a/lib/GridConfig.h +++ b/lib/GridConfig.h @@ -1,15 +1,18 @@ /* lib/GridConfig.h. Generated from GridConfig.h.in by configure. */ /* lib/GridConfig.h.in. Generated from configure.ac by autoheader. */ -/* AVX */ +/* AVX Intrinsics */ /* #undef AVX1 */ -/* AVX2 */ +/* AVX2 Intrinsics */ /* #undef AVX2 */ -/* AVX512 */ +/* AVX512 Intrinsics for Knights Corner */ /* #undef AVX512 */ +/* EMPTY_SIMD only for DEBUGGING */ +/* #undef EMPTY_SIMD */ + /* GRID_COMMS_MPI */ /* #undef GRID_COMMS_MPI */ @@ -111,6 +114,9 @@ /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 +/* NEON ARMv7 Experimental support */ +/* #undef NEONv7 */ + /* Name of package */ #define PACKAGE "grid" @@ -132,7 +138,7 @@ /* Define to the version of this package. */ #define PACKAGE_VERSION "1.0" -/* SSE4 */ +/* SSE4 Intrinsics */ #define SSE4 1 /* Define to 1 if you have the ANSI C header files. */ diff --git a/lib/GridConfig.h.in b/lib/GridConfig.h.in index afa1d1e4..8ff77f3d 100644 --- a/lib/GridConfig.h.in +++ b/lib/GridConfig.h.in @@ -1,14 +1,17 @@ /* lib/GridConfig.h.in. Generated from configure.ac by autoheader. */ -/* AVX */ +/* AVX Intrinsics */ #undef AVX1 -/* AVX2 */ +/* AVX2 Intrinsics */ #undef AVX2 -/* AVX512 */ +/* AVX512 Intrinsics for Knights Corner */ #undef AVX512 +/* EMPTY_SIMD only for DEBUGGING */ +#undef EMPTY_SIMD + /* GRID_COMMS_MPI */ #undef GRID_COMMS_MPI @@ -110,6 +113,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H +/* NEON ARMv7 Experimental support */ +#undef NEONv7 + /* Name of package */ #undef PACKAGE @@ -131,7 +137,7 @@ /* Define to the version of this package. */ #undef PACKAGE_VERSION -/* SSE4 */ +/* SSE4 Intrinsics */ #undef SSE4 /* Define to 1 if you have the ANSI C header files. */ diff --git a/lib/Make.inc b/lib/Make.inc index c38606e2..edbcff7f 100644 --- a/lib/Make.inc +++ b/lib/Make.inc @@ -1,4 +1,4 @@ -HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h +HFILES=./Cshift.h ./simd/Grid_avx.h ./simd/Grid_vector_types.h ./simd/Grid_sse4.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_vector_unops.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./Tensors.h ./Algorithms.h ./communicator/Communicator_base.h ./lattice/Lattice_rng.h ./lattice/Lattice_reduction.h ./lattice/Lattice_transfer.h ./lattice/Lattice_unary.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_comparison.h ./lattice/Lattice_overload.h ./lattice/Lattice_reality.h ./lattice/Lattice_local.h ./lattice/Lattice_conformable.h ./lattice/Lattice_where.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_ET.h ./lattice/Lattice_transpose.h ./lattice/Lattice_trace.h ./Stencil.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_poke.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_class.h ./tensors/Tensor_transpose.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_outer.h ./tensors/Tensor_inner.h ./tensors/Tensor_traits.h ./tensors/Tensor_Ta.h ./tensors/Tensor_unary.h ./tensors/Tensor_peek.h ./tensors/Tensor_arith.h ./tensors/Tensor_extract_merge.h ./Communicator.h ./Cartesian.h ./parallelIO/NerscIO.h ./qcd/QCD.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/LinalgUtils.h ./qcd/utils/CovariantCshift.h ./qcd/utils/WilsonLoops.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/spin/TwoSpinor.h ./qcd/spin/Dirac.h ./cshift/Cshift_common.h ./cshift/Cshift_none.h ./cshift/Cshift_mpi.h ./Simd.h ./GridConfig.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_red_black.h ./cartesian/Cartesian_full.h ./AlignedAllocator.h ./Lattice.h ./Threads.h ./Comparison.h ./Grid.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Remez.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./algorithms/CoarsenedMatrix.h ./stencil/Lebesgue.h -CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc +CCFILES=./qcd/utils/SpaceTimeGrid.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/spin/Dirac.cc ./GridInit.cc ./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc diff --git a/lib/algorithms/approx/.dirstamp b/lib/algorithms/approx/.dirstamp new file mode 100644 index 00000000..e69de29b diff --git a/lib/communicator/.dirstamp b/lib/communicator/.dirstamp new file mode 100644 index 00000000..e69de29b diff --git a/lib/qcd/spin/.dirstamp b/lib/qcd/spin/.dirstamp new file mode 100644 index 00000000..e69de29b diff --git a/lib/qcd/utils/.dirstamp b/lib/qcd/utils/.dirstamp new file mode 100644 index 00000000..e69de29b diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h index ba023146..2dc38cca 100644 --- a/lib/simd/Grid_avx.h +++ b/lib/simd/Grid_avx.h @@ -4,7 +4,7 @@ Using intrinsics */ -// Time-stamp: <2015-05-29 14:13:30 neo> +// Time-stamp: <2015-06-09 14:26:59 neo> //---------------------------------------------------------------------- #include @@ -383,6 +383,12 @@ namespace Grid { _mm_prefetch(ptr+i+512,_MM_HINT_T0); } } + inline void prefetch_HINT_T0(const char *ptr){ + _mm_prefetch(ptr,_MM_HINT_T0); + } + + + template < typename VectorSIMD > inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) { Optimization::permute(y.v,b.v,perm); diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index bb914270..10741174 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -4,7 +4,7 @@ Using intrinsics */ -// Time-stamp: <2015-05-27 12:08:50 neo> +// Time-stamp: <2015-06-09 14:27:28 neo> //---------------------------------------------------------------------- #include @@ -309,6 +309,12 @@ namespace Grid { _mm_prefetch(ptr+i+512,_MM_HINT_T0); } } + inline void prefetch_HINT_T0(const char *ptr){ + _mm_prefetch(ptr,_MM_HINT_T0); + } + + + // Gpermute utilities consider coalescing into 1 Gpermute template < typename VectorSIMD > diff --git a/lib/simd/Grid_empty.h b/lib/simd/Grid_empty.h new file mode 100644 index 00000000..1c088d87 --- /dev/null +++ b/lib/simd/Grid_empty.h @@ -0,0 +1,289 @@ +//---------------------------------------------------------------------- +/*! @file Grid_sse4.h + @brief Empty Optimization libraries for debugging + + Using intrinsics +*/ +// Time-stamp: <2015-06-09 14:28:02 neo> +//---------------------------------------------------------------------- + +namespace Optimization { + + template + union uconv { + float f; + vtype v; + }; + + union u128f { + float v; + float f[4]; + }; + union u128d { + double v; + double f[2]; + }; + + struct Vsplat{ + //Complex float + inline float operator()(float a, float b){ + return 0; + } + // Real float + inline float operator()(float a){ + return 0; + } + //Complex double + inline double operator()(double a, double b){ + return 0; + } + //Real double + inline double operator()(double a){ + return 0; + } + //Integer + inline int operator()(Integer a){ + return 0; + } + }; + + struct Vstore{ + //Float + inline void operator()(float a, float* F){ + + } + //Double + inline void operator()(double a, double* D){ + + } + //Integer + inline void operator()(int a, Integer* I){ + + } + + }; + + struct Vstream{ + //Float + inline void operator()(float * a, float b){ + + } + //Double + inline void operator()(double * a, double b){ + + } + + + }; + + struct Vset{ + // Complex float + inline float operator()(Grid::ComplexF *a){ + return 0; + } + // Complex double + inline double operator()(Grid::ComplexD *a){ + return 0; + } + // Real float + inline float operator()(float *a){ + return 0; + } + // Real double + inline double operator()(double *a){ + return 0; + } + // Integer + inline int operator()(Integer *a){ + return 0; + } + + + }; + + template + struct Reduce{ + //Need templated class to overload output type + //General form must generate error if compiled + inline Out_type operator()(In_type in){ + printf("Error, using wrong Reduce function\n"); + exit(1); + return 0; + } + }; + + ///////////////////////////////////////////////////// + // Arithmetic operations + ///////////////////////////////////////////////////// + struct Sum{ + //Complex/Real float + inline float operator()(float a, float b){ + return 0; + } + //Complex/Real double + inline double operator()(double a, double b){ + return 0; + } + //Integer + inline int operator()(int a, int b){ + return 0; + } + }; + + struct Sub{ + //Complex/Real float + inline float operator()(float a, float b){ + return 0; + } + //Complex/Real double + inline double operator()(double a, double b){ + return 0; + } + //Integer + inline int operator()(int a, int b){ + return 0; + } + }; + + struct MultComplex{ + // Complex float + inline float operator()(float a, float b){ + return 0; + } + // Complex double + inline double operator()(double a, double b){ + return 0; + } + }; + + struct Mult{ + // Real float + inline float operator()(float a, float b){ + return 0; + } + // Real double + inline double operator()(double a, double b){ + return 0; + } + // Integer + inline int operator()(int a, int b){ + return 0; + } + }; + + struct Conj{ + // Complex single + inline float operator()(float in){ + return 0; + } + // Complex double + inline double operator()(double in){ + return 0; + } + // do not define for integer input + }; + + struct TimesMinusI{ + //Complex single + inline float operator()(float in, float ret){ + return 0; + } + //Complex double + inline double operator()(double in, double ret){ + return 0; + } + + + }; + + struct TimesI{ + //Complex single + inline float operator()(float in, float ret){ + return 0; + } + //Complex double + inline double operator()(double in, double ret){ + return 0; + } + }; + + ////////////////////////////////////////////// + // Some Template specialization + template < typename vtype > + void permute(vtype &a, vtype b, int perm) { + }; + + //Complex float Reduce + template<> + inline Grid::ComplexF Reduce::operator()(float in){ + return 0; + } + //Real float Reduce + template<> + inline Grid::RealF Reduce::operator()(float in){ + return 0; + } + + + //Complex double Reduce + template<> + inline Grid::ComplexD Reduce::operator()(double in){ + return 0; + } + + //Real double Reduce + template<> + inline Grid::RealD Reduce::operator()(double in){ + return 0; + } + + //Integer Reduce + template<> + inline Integer Reduce::operator()(int in){ + // FIXME unimplemented + printf("Reduce : Missing integer implementation -> FIX\n"); + assert(0); + } +} + +////////////////////////////////////////////////////////////////////////////////////// +// Here assign types +namespace Grid { + + typedef float SIMD_Ftype; // Single precision type + typedef double SIMD_Dtype; // Double precision type + typedef int SIMD_Itype; // Integer type + + // prefetch utilities + inline void v_prefetch0(int size, const char *ptr){}; + inline void prefetch_HINT_T0(const char *ptr){}; + + + + // Gpermute function + template < typename VectorSIMD > + inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) { + Optimization::permute(y.v,b.v,perm); + } + + + // Function name aliases + typedef Optimization::Vsplat VsplatSIMD; + typedef Optimization::Vstore VstoreSIMD; + typedef Optimization::Vset VsetSIMD; + typedef Optimization::Vstream VstreamSIMD; + template using ReduceSIMD = Optimization::Reduce; + + + + + // Arithmetic operations + typedef Optimization::Sum SumSIMD; + typedef Optimization::Sub SubSIMD; + typedef Optimization::Mult MultSIMD; + typedef Optimization::MultComplex MultComplexSIMD; + typedef Optimization::Conj ConjSIMD; + typedef Optimization::TimesMinusI TimesMinusISIMD; + typedef Optimization::TimesI TimesISIMD; + +} diff --git a/lib/simd/Grid_neon.h b/lib/simd/Grid_neon.h new file mode 100644 index 00000000..73660b40 --- /dev/null +++ b/lib/simd/Grid_neon.h @@ -0,0 +1,308 @@ +//---------------------------------------------------------------------- +/*! @file Grid_sse4.h + @brief Optimization libraries for NEON (ARM) instructions set ARMv7 + + Experimental - Using intrinsics - DEVELOPING! +*/ +// Time-stamp: <2015-06-09 15:25:40 neo> +//---------------------------------------------------------------------- + +#include + +namespace Optimization { + + template + union uconv { + float32x4_t f; + vtype v; + }; + + union u128f { + float32x4_t v; + float f[4]; + }; + union u128d { + float32x4_t v; + float f[4]; + }; + + struct Vsplat{ + //Complex float + inline float32x4_t operator()(float a, float b){ + float32x4_t foo; + return foo; + } + // Real float + inline float32x4_t operator()(float a){ + float32x4_t foo; + return foo; + } + //Complex double + inline float32x4_t operator()(double a, double b){ + float32x4_t foo; + return foo; + } + //Real double + inline float32x4_t operator()(double a){ + float32x4_t foo; + return foo; + } + //Integer + inline uint32x4_t operator()(Integer a){ + uint32x4_t foo; + return foo; + } + }; + + struct Vstore{ + //Float + inline void operator()(float32x4_t a, float* F){ + + } + //Double + inline void operator()(float32x4_t a, double* D){ + + } + //Integer + inline void operator()(uint32x4_t a, Integer* I){ + + } + + }; + + struct Vstream{ + //Float + inline void operator()(float * a, float32x4_t b){ + + } + //Double + inline void operator()(double * a, float32x4_t b){ + + } + + + }; + + struct Vset{ + // Complex float + inline float32x4_t operator()(Grid::ComplexF *a){ + float32x4_t foo; + return foo; + } + // Complex double + inline float32x4_t operator()(Grid::ComplexD *a){ + float32x4_t foo; + return foo; + } + // Real float + inline float32x4_t operator()(float *a){ + float32x4_t foo; + return foo; + } + // Real double + inline float32x4_t operator()(double *a){ + float32x4_t foo; + return foo; + } + // Integer + inline uint32x4_t operator()(Integer *a){ + uint32x4_t foo; + return foo; + } + + + }; + + template + struct Reduce{ + //Need templated class to overload output type + //General form must generate error if compiled + inline Out_type operator()(In_type in){ + printf("Error, using wrong Reduce function\n"); + exit(1); + return 0; + } + }; + + ///////////////////////////////////////////////////// + // Arithmetic operations + ///////////////////////////////////////////////////// + struct Sum{ + //Complex/Real float + inline float32x4_t operator()(float32x4_t a, float32x4_t b){ + float32x4_t foo; + return foo; + } + //Complex/Real double + //inline float32x4_t operator()(float32x4_t a, float32x4_t b){ + // float32x4_t foo; + // return foo; + //} + //Integer + inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){ + uint32x4_t foo; + return foo; + } + }; + + struct Sub{ + //Complex/Real float + inline float32x4_t operator()(float32x4_t a, float32x4_t b){ + float32x4_t foo; + return foo; + } + //Complex/Real double + //inline float32x4_t operator()(float32x4_t a, float32x4_t b){ + // float32x4_t foo; + // return foo; + //} + //Integer + inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){ + uint32x4_t foo; + return foo; + } + }; + + struct MultComplex{ + // Complex float + inline float32x4_t operator()(float32x4_t a, float32x4_t b){ + float32x4_t foo; + return foo; + } + // Complex double + //inline float32x4_t operator()(float32x4_t a, float32x4_t b){ + // float32x4_t foo; + // return foo; + //} + }; + + struct Mult{ + // Real float + inline float32x4_t operator()(float32x4_t a, float32x4_t b){ + return a; + } + // Real double + //inline float32x4_t operator()(float32x4_t a, float32x4_t b){ + // return 0; + //} + // Integer + inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){ + return a; + } + }; + + struct Conj{ + // Complex single + inline float32x4_t operator()(float32x4_t in){ + return in; + } + // Complex double + //inline float32x4_t operator()(float32x4_t in){ + // return 0; + //} + // do not define for integer input + }; + + struct TimesMinusI{ + //Complex single + inline float32x4_t operator()(float32x4_t in, float32x4_t ret){ + return in; + } + //Complex double + //inline float32x4_t operator()(float32x4_t in, float32x4_t ret){ + // return in; + //} + + + }; + + struct TimesI{ + //Complex single + inline float32x4_t operator()(float32x4_t in, float32x4_t ret){ + return in; + } + //Complex double + //inline float32x4_t operator()(float32x4_t in, float32x4_t ret){ + // return 0; + //} + }; + + ////////////////////////////////////////////// + // Some Template specialization + template < typename vtype > + void permute(vtype &a, vtype b, int perm) { + + }; + + //Complex float Reduce + template<> + inline Grid::ComplexF Reduce::operator()(float32x4_t in){ + return 0; + } + //Real float Reduce + template<> + inline Grid::RealF Reduce::operator()(float32x4_t in){ + return 0; + } + + + //Complex double Reduce + template<> + inline Grid::ComplexD Reduce::operator()(float32x4_t in){ + return 0; + } + + //Real double Reduce + template<> + inline Grid::RealD Reduce::operator()(float32x4_t in){ + return 0; + } + + //Integer Reduce + template<> + inline Integer Reduce::operator()(uint32x4_t in){ + // FIXME unimplemented + printf("Reduce : Missing integer implementation -> FIX\n"); + assert(0); + } +} + +////////////////////////////////////////////////////////////////////////////////////// +// Here assign types +namespace Grid { + + typedef float32x4_t SIMD_Ftype; // Single precision type + typedef float32x4_t SIMD_Dtype; // Double precision type - no double on ARMv7 + typedef uint32x4_t SIMD_Itype; // Integer type + + inline void v_prefetch0(int size, const char *ptr){}; // prefetch utilities + inline void prefetch_HINT_T0(const char *ptr){}; + + + // Gpermute function + template < typename VectorSIMD > + inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) { + Optimization::permute(y.v,b.v,perm); + } + + + // Function name aliases + typedef Optimization::Vsplat VsplatSIMD; + typedef Optimization::Vstore VstoreSIMD; + typedef Optimization::Vset VsetSIMD; + typedef Optimization::Vstream VstreamSIMD; + template using ReduceSIMD = Optimization::Reduce; + + + + + // Arithmetic operations + typedef Optimization::Sum SumSIMD; + typedef Optimization::Sub SubSIMD; + typedef Optimization::Mult MultSIMD; + typedef Optimization::MultComplex MultComplexSIMD; + typedef Optimization::Conj ConjSIMD; + typedef Optimization::TimesMinusI TimesMinusISIMD; + typedef Optimization::TimesI TimesISIMD; + +} diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index 0da888f7..3a54c0b3 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -4,7 +4,7 @@ Using intrinsics */ -// Time-stamp: <2015-05-27 12:02:07 neo> +// Time-stamp: <2015-06-09 14:24:01 neo> //---------------------------------------------------------------------- #include @@ -297,7 +297,12 @@ namespace Grid { typedef __m128d SIMD_Dtype; // Double precision type typedef __m128i SIMD_Itype; // Integer type - inline void v_prefetch0(int size, const char *ptr){}; // prefetch utilities + // prefetch utilities + inline void v_prefetch0(int size, const char *ptr){}; + inline void prefetch_HINT_T0(const char *ptr){ + _mm_prefetch(ptr,_MM_HINT_T0); + } + // Gpermute function template < typename VectorSIMD > diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 9525e45e..c670f531 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -2,11 +2,14 @@ /*! @file Grid_vector_types.h @brief Defines templated class Grid_simd to deal with inner vector types */ -// Time-stamp: <2015-05-29 14:19:48 neo> +// Time-stamp: <2015-06-09 15:00:47 neo> //--------------------------------------------------------------------------- #ifndef GRID_VECTOR_TYPES #define GRID_VECTOR_TYPES +#ifdef EMPTY_SIMD +#include "Grid_empty.h" +#endif #ifdef SSE4 #include "Grid_sse4.h" #endif @@ -19,6 +22,9 @@ #if defined QPX #include "Grid_qpx.h" #endif +#ifdef NEONv7 +#include "Grid_neon.h" +#endif namespace Grid { @@ -152,7 +158,7 @@ namespace Grid { /////////////////////// friend inline void vprefetch(const Grid_simd &v) { - _mm_prefetch((const char*)&v.v,_MM_HINT_T0); + prefetch_HINT_T0((const char*)&v.v); } /////////////////////// diff --git a/lib/stencil/.dirstamp b/lib/stencil/.dirstamp new file mode 100644 index 00000000..e69de29b diff --git a/scripts/arm_configure.experimental b/scripts/arm_configure.experimental new file mode 100644 index 00000000..fbad84a5 --- /dev/null +++ b/scripts/arm_configure.experimental @@ -0,0 +1 @@ +./configure --host=arm-linux-gnueabihf CXX=clang++-3.5 CXXFLAGS='-std=c++11 -O3 -target arm-linux-gnueabihf -I/usr/arm-linux-gnueabihf/include/ -I/home/neo/Codes/gmp6.0/gmp-arm/include/ -I/usr/arm-linux-gnueabihf/include/c++/4.8.2/arm-linux-gnueabihf/ -L/home/neo/Codes/gmp6.0/gmp-arm/lib/ -I/home/neo/Codes/mpfr3.1.2/mpfr-arm/include/ -L/home/neo/Codes/mpfr3.1.2/mpfr-arm/lib/ -static -mcpu=cortex-a7' --enable-simd=NEONv7 diff --git a/tests/Make.inc b/tests/Make.inc index 2b132fd0..1f5573ca 100644 --- a/tests/Make.inc +++ b/tests/Make.inc @@ -1,9 +1,5 @@ -bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cf_coarsen_support Test_cf_cr_unprec Test_contfrac_cg Test_contfrac_even_odd Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_gamma Test_main Test_multishift_sqrt Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd - - -Test_GaugeAction_SOURCES=Test_GaugeAction.cc -Test_GaugeAction_LDADD=-lGrid +bin_PROGRAMS = Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cf_coarsen_support Test_cf_cr_unprec Test_contfrac_cg Test_contfrac_even_odd Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_gamma Test_GaugeAction Test_main Test_multishift_sqrt Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_cayley_cg_SOURCES=Test_cayley_cg.cc @@ -66,6 +62,10 @@ Test_gamma_SOURCES=Test_gamma.cc Test_gamma_LDADD=-lGrid +Test_GaugeAction_SOURCES=Test_GaugeAction.cc +Test_GaugeAction_LDADD=-lGrid + + Test_main_SOURCES=Test_main.cc Test_main_LDADD=-lGrid From f19518d56466b59344c208680e9e14d954ea7c4c Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 9 Jun 2015 10:25:29 +0100 Subject: [PATCH 09/24] Unary ops and coarse grid support --- lib/tensors/Tensor_traits.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/tensors/Tensor_traits.h b/lib/tensors/Tensor_traits.h index c47dca10..b5715c93 100644 --- a/lib/tensors/Tensor_traits.h +++ b/lib/tensors/Tensor_traits.h @@ -119,7 +119,6 @@ namespace Grid { static const bool value = true; static const bool notvalue = false; }; - template<> struct isGridTensor { static const bool value = false; static const bool notvalue = true; From 1048304f3081c1e4b7661c943e0dfb43f781f447 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 9 Jun 2015 10:26:19 +0100 Subject: [PATCH 10/24] Some unary ops and coarse grid support --- lib/Grid.h | 1 - lib/Make.inc | 2 +- lib/algorithms/CoarsenedMatrix.h | 134 +++++++++++++++--- lib/lattice/Lattice_base.h | 4 +- .../Lattice_comparison_utils.h} | 2 - lib/lattice/Lattice_coordinate.h | 14 +- lib/lattice/Lattice_transfer.h | 23 ++- lib/lattice/Lattice_unary.h | 41 ++++++ lib/qcd/action/fermion/WilsonFermion5D.cc | 4 + lib/simd/Grid_vector_unops.h | 12 ++ lib/tensors/Tensor_class.h | 9 +- lib/tensors/Tensor_unary.h | 29 ++++ scripts/linecount | 11 +- tests/Test_cayley_coarsen_support.cc | 19 ++- tests/Test_cshift.cc | 1 - 15 files changed, 258 insertions(+), 48 deletions(-) rename lib/{Comparison.h => lattice/Lattice_comparison_utils.h} (98%) diff --git a/lib/Grid.h b/lib/Grid.h index 16530434..aec0e9ac 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -55,7 +55,6 @@ #include // subdir aggregate #include // subdir aggregate #include // subdir aggregate -#include #include // subdir aggregate #include // subdir aggregate #include // subdir aggregate diff --git a/lib/Make.inc b/lib/Make.inc index c38606e2..7c7e02c8 100644 --- a/lib/Make.inc +++ b/lib/Make.inc @@ -1,4 +1,4 @@ -HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h +HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h index 54ea279e..006da07f 100644 --- a/lib/algorithms/CoarsenedMatrix.h +++ b/lib/algorithms/CoarsenedMatrix.h @@ -6,12 +6,42 @@ namespace Grid { class Geometry { + // int dimension; public: int npoint; - int dimension; std::vector directions ; std::vector displacements; + std::vector opdirs; + // FIXME -- don't like xposing the operator directions + // as different to the geometrical dirs + // Also don't like special casing five dim.. should pass an object in template + Geometry(int _d) { + + int base = (_d==5) ? 1:0; + + // make coarse grid stencil for 4d , not 5d + if ( _d==5 ) _d=4; + + npoint = 2*_d+1; + directions.resize(npoint); + displacements.resize(npoint); + opdirs.resize(npoint); + for(int d=0;d<_d;d++){ + directions[2*d ] = d+base; + directions[2*d+1] = d+base; + opdirs[2*d ] = d; + opdirs[2*d+1] = d; + displacements[2*d ] = +1; + displacements[2*d+1] = -1; + } + directions [2*_d]=0; + displacements[2*_d]=0; + opdirs [2*_d]=0; + } + + /* + // Original cleaner code Geometry(int _d) : dimension(_d), npoint(2*_d+1), directions(npoint), displacements(npoint) { for(int d=0;d GetDelta(int point) { std::vector delta(dimension,0); delta[directions[point]] = displacements[point]; return delta; }; + */ }; @@ -50,7 +80,10 @@ namespace Grid { Geometry geom; GridBase * _grid; CartesianStencil Stencil; + std::vector A; + std::vector Aslow; + std::vector > comm_buf; /////////////////////// @@ -63,7 +96,7 @@ namespace Grid { SimpleCompressor compressor; Stencil.HaloExchange(in,comm_buf,compressor); - //PARALLEL_FOR_LOOP +PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss++){ siteVector res = zero; siteVector tmp; @@ -101,43 +134,112 @@ namespace Grid { _grid(&CoarseGrid), geom(CoarseGrid._ndimension), Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements), - A(geom.npoint,&CoarseGrid) + A(geom.npoint,&CoarseGrid), + Aslow(geom.npoint,&CoarseGrid) { comm_buf.resize(Stencil._unified_buffer_size); }; void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase > &linop,std::vector > & subspace){ - FineField phi(FineGrid); - FineField Mphi(FineGrid); - CoarseVector Proj(Grid()); + FineField iblock(FineGrid); // contributions from within this block + FineField oblock(FineGrid); // contributions from outwith this block + + FineField phi(FineGrid); + FineField tmp(FineGrid); + FineField zz(FineGrid); zz=zero; + FineField Mphi(FineGrid); + + Lattice > coor(FineGrid); + + CoarseVector iProj(Grid()); + CoarseVector oProj(Grid()); CoarseScalar InnerProd(Grid()); // Orthogonalise the subblocks over the basis blockOrthogonalise(InnerProd,subspace); + blockProject(iProj,subspace[0],subspace); // Compute the matrix elements of linop between this orthonormal // set of vectors. + int self_stencil=-1; + for(int p=0;p_rdimensions[dir])/(Grid()->_rdimensions[dir]); - blockProject(Proj,Mphi,subspace); + LatticeCoordinate(coor,dir); - for(int ss=0;ssoSites();ss++){ - for(int j=0;joSites();ss++){ + for(int j=0;j bc(FineGrid->_ndimension,0); + + blockPick(Grid(),phi,tmp,bc); // Pick out a block + linop.Op(tmp,Mphi); // Apply big dop + blockProject(iProj,Mphi,subspace); // project it and print it + std::cout<< " Computed matrix elements from block zero only "< #include #include +#include +#include #include +#include #include #include #include - #endif diff --git a/lib/Comparison.h b/lib/lattice/Lattice_comparison_utils.h similarity index 98% rename from lib/Comparison.h rename to lib/lattice/Lattice_comparison_utils.h index ecd6ece0..d5e3b3e5 100644 --- a/lib/Comparison.h +++ b/lib/lattice/Lattice_comparison_utils.h @@ -141,7 +141,5 @@ namespace Grid { } } -#include -#include #endif diff --git a/lib/lattice/Lattice_coordinate.h b/lib/lattice/Lattice_coordinate.h index c49aed89..a83dbe49 100644 --- a/lib/lattice/Lattice_coordinate.h +++ b/lib/lattice/Lattice_coordinate.h @@ -3,20 +3,8 @@ namespace Grid { - /* - depbase=`echo Grid_main.o | sed 's|[^/]*$|.deps/&|;s|\.o$||'`;\ - icpc -DHAVE_CONFIG_H -I. -I../lib -I../lib -mmic -O3 -std=c++11 -fopenmp -MT Grid_main.o -MD -MP -MF $depbase.Tpo -c -o Grid_main.o Grid_main.cc &&\ - mv -f $depbase.Tpo $depbase.Po - ../lib/lattice/Grid_lattice_coordinate.h(25): error: no suitable user-defined conversion from "vector_type" to "const Grid::iScalar>>" exists - l._odata[o]=vI; - ^ - detected during instantiation of "void Grid::LatticeCoordinate(Grid::Lattice &, int) [with iobj=Grid::QCD::vTInteger]" at line 283 of "Grid_main.cc" - - compilation aborted for Grid_main.cc (code 2) -*/ template inline void LatticeCoordinate(Lattice &l,int mu) { - typedef typename iobj::scalar_object scalar_object; typedef typename iobj::scalar_type scalar_type; typedef typename iobj::vector_type vector_type; @@ -33,7 +21,7 @@ namespace Grid { mergebuf[i]=(Integer)gcoor[mu]; } merge(vI,mergebuf); - l._odata[o]._internal._internal._internal=vI; + l._odata[o]=vI; } }; diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 871ed893..12993680 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -166,9 +166,10 @@ template inline void blockNormalise(Lattice &ip,Lattice &fineX) { GridBase *coarse = ip._grid; + Lattice zz(fineX._grid); zz=zero; blockInnerProduct(ip,fineX,fineX); ip = rsqrt(ip); - blockZAXPY(fineX,ip,fineX,fineX); + blockZAXPY(fineX,ip,fineX,zz); } // useful in multigrid project; // Generic name : Coarsen? @@ -205,6 +206,26 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) return; } +template +inline void blockPick(GridBase *coarse,const Lattice &unpicked,Lattice &picked,std::vector coor) +{ + GridBase * fine = unpicked._grid; + + Lattice zz(fine); + Lattice > fcoor(fine); + + zz = zero; + + picked = unpicked; + for(int d=0;d_ndimension;d++){ + LatticeCoordinate(fcoor,d); + int block= fine->_rdimensions[d] / coarse->_rdimensions[d]; + int lo = (coor[d])*block; + int hi = (coor[d]+1)*block; + picked = where( (fcoor=lo), picked, zz); + } +} template inline void blockOrthogonalise(Lattice &ip,std::vector > &Basis) diff --git a/lib/lattice/Lattice_unary.h b/lib/lattice/Lattice_unary.h index 131d7d68..fcdcc484 100644 --- a/lib/lattice/Lattice_unary.h +++ b/lib/lattice/Lattice_unary.h @@ -26,7 +26,48 @@ PARALLEL_FOR_LOOP } return ret; } + template Lattice sin(const Lattice &rhs){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=sin(rhs._odata[ss]); + } + return ret; + } + template Lattice cos(const Lattice &rhs){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=cos(rhs._odata[ss]); + } + return ret; + } + template Lattice pow(const Lattice &rhs,RealD y){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=pow(rhs._odata[ss],y); + } + return ret; + } + template Lattice mod(const Lattice &rhs,Integer y){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=mod(rhs._odata[ss],y); + } + return ret; + } + } #endif diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 4ddd14ab..7d9ffe22 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -85,6 +85,7 @@ void WilsonFermion5D::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGau void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp) { assert( (disp==1)||(disp==-1) ); + assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t; WilsonCompressor compressor(DaggerNo); Stencil.HaloExchange(in,comm_buf,compressor); @@ -93,6 +94,9 @@ void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int int dirdisp = dir+skip*4; + assert(dirdisp<=7); + assert(dirdisp>=0); + PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss++){ for(int s=0;s struct ModIntFunctor { + Integer y; + ModIntFunctor(Integer _y) : y(_y) {}; + scalar operator()(const scalar &a) const { + return Integer(a)%y; + } + }; + template < class S, class V > inline Grid_simd sqrt(const Grid_simd &r) { return SimdApply(SqrtRealFunctor(),r); @@ -55,6 +63,10 @@ namespace Grid { inline Grid_simd pow(const Grid_simd &r,double y) { return SimdApply(PowRealFunctor(y),r); } + template < class S, class V > + inline Grid_simd mod(const Grid_simd &r,Integer y) { + return SimdApply(ModIntFunctor(y),r); + } } #endif diff --git a/lib/tensors/Tensor_class.h b/lib/tensors/Tensor_class.h index 3ed4f865..019e6fb1 100644 --- a/lib/tensors/Tensor_class.h +++ b/lib/tensors/Tensor_class.h @@ -90,10 +90,10 @@ public: operator ComplexD () const { return(TensorRemove(_internal)); }; operator RealD () const { return(real(TensorRemove(_internal))); } - // convert from a something to a scalar - template::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iScalar + // convert from a something to a scalar via constructor of something arg + template::value, T>::type* = nullptr > strong_inline iScalar operator = (T arg) { - _internal = vtype(arg); + _internal = arg; return *this; } @@ -316,7 +316,8 @@ public: stream< inline auto func(const iMatrix &z) -> iMatrix inline iScalar func(const iScalar &z,scal y) \ +{\ + iScalar ret;\ + ret._internal = func(z._internal,y); \ + return ret;\ +}\ + template inline iVector func(const iVector &z,scal y) \ +{\ + iVector ret;\ + for(int c1=0;c1 inline iMatrix func(const iMatrix &z, scal y) \ +{\ + iMatrix ret;\ + for(int c1=0;c1 U(4,UGrid); - - for(int mu=0;mu(Umu,mu); + Umu=zero; + Complex cone(1.0,0.0); + for(int nn=0;nn(Umu,U[nn],nn); } - +#endif RealD mass=0.5; RealD M5=1.8; @@ -75,7 +80,7 @@ int main (int argc, char ** argv) err = result-ref; std::cout<<"Error "< subspace(nbasis,FGrid); for(int b=0;b Date: Tue, 9 Jun 2015 18:59:45 +0900 Subject: [PATCH 11/24] Adding support for iMatrix exponentiation --- lib/simd/Old/Grid_vComplexD.h | 430 ------------------------------- lib/simd/Old/Grid_vComplexF.h | 463 ---------------------------------- lib/simd/Old/Grid_vInteger.h | 259 ------------------- lib/simd/Old/Grid_vRealD.h | 276 -------------------- lib/simd/Old/Grid_vRealF.h | 313 ----------------------- lib/tensors/Tensor_Ta.h | 84 +++++- tests/InvSqrt.gnu | 0 tests/Sqrt.gnu | 2 + tests/Test_main.cc | 3 + 9 files changed, 83 insertions(+), 1747 deletions(-) delete mode 100644 lib/simd/Old/Grid_vComplexD.h delete mode 100644 lib/simd/Old/Grid_vComplexF.h delete mode 100644 lib/simd/Old/Grid_vInteger.h delete mode 100644 lib/simd/Old/Grid_vRealD.h delete mode 100644 lib/simd/Old/Grid_vRealF.h create mode 100644 tests/InvSqrt.gnu create mode 100644 tests/Sqrt.gnu diff --git a/lib/simd/Old/Grid_vComplexD.h b/lib/simd/Old/Grid_vComplexD.h deleted file mode 100644 index 116c2866..00000000 --- a/lib/simd/Old/Grid_vComplexD.h +++ /dev/null @@ -1,430 +0,0 @@ -#ifndef GRID_VCOMPLEXD_H -#define GRID_VCOMPLEXD_H - -namespace Grid { - class vComplexD { - public: - zvec v; - public: - typedef zvec vector_type; - typedef ComplexD scalar_type; - - vComplexD & operator = ( Zero & z){ - vzero(*this); - return (*this); - } - vComplexD( Zero & z){ - vzero(*this); - } - vComplexD()=default; - vComplexD(ComplexD a){ - vsplat(*this,a); - }; - vComplexD(double a){ - vsplat(*this,ComplexD(a)); - }; - - /////////////////////////////////////////////// - // mac, mult, sub, add, adj - // Should do an AVX2 version with mac. - /////////////////////////////////////////////// - friend inline void mac (vComplexD * __restrict__ y,const vComplexD * __restrict__ a,const vComplexD *__restrict__ x) {*y = (*a)*(*x)+(*y);}; - friend inline void mult(vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) * (*r);} - friend inline void sub (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) - (*r);} - friend inline void add (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) + (*r);} - friend inline vComplexD adj(const vComplexD &in){ return conjugate(in); } - - ////////////////////////////////// - // Initialise to 1,0,i - ////////////////////////////////// - friend inline void vone (vComplexD &ret){ vsplat(ret,1.0,0.0);} - friend inline void vzero (vComplexD &ret){ vsplat(ret,0.0,0.0);} - friend inline void vcomplex_i(vComplexD &ret){ vsplat(ret,0.0,1.0);} - - //////////////////////////////////// - // Arithmetic operator overloads +,-,* - //////////////////////////////////// - friend inline vComplexD operator + (vComplexD a, vComplexD b) - { - vComplexD ret; -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_add_pd(a.v,b.v); -#endif -#ifdef SSE4 - ret.v = _mm_add_pd(a.v,b.v); -#endif -#ifdef AVX512 - ret.v = _mm512_add_pd(a.v,b.v); -#endif -#ifdef QPX - ret.v = vec_add(a.v,b.v); -#endif - return ret; - }; - - friend inline vComplexD operator - (vComplexD a, vComplexD b) - { - vComplexD ret; -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_sub_pd(a.v,b.v); -#endif -#ifdef SSE4 - ret.v = _mm_sub_pd(a.v,b.v); -#endif -#ifdef AVX512 - ret.v = _mm512_sub_pd(a.v,b.v); -#endif -#ifdef QPX - ret.v = vec_sub(a.v,b.v); -#endif - return ret; - }; - - friend inline vComplexD operator * (vComplexD a, vComplexD b) - { - vComplexD ret; - - //Multiplicationof (ak+ibk)*(ck+idk) - // a + i b can be stored as a data structure - //From intel optimisation reference guide - /* - movsldup xmm0, Src1; load real parts into the destination, - ; a1, a1, a0, a0 - movaps xmm1, src2; load the 2nd pair of complex values, ; i.e. d1, c1, d0, c0 - mulps xmm0, xmm1; temporary results, a1d1, a1c1, a0d0, ; a0c0 - shufps xmm1, xmm1, b1; reorder the real and imaginary ; parts, c1, d1, c0, d0 - movshdup xmm2, Src1; load the imaginary parts into the ; destination, b1, b1, b0, b0 - mulps xmm2, xmm1; temporary results, b1c1, b1d1, b0c0, ; b0d0 - addsubps xmm0, xmm2; b1c1+a1d1, a1c1 -b1d1, b0c0+a0d - VSHUFPD (VEX.256 encoded version) - IF IMM0[0] = 0 - THEN DEST[63:0]=SRC1[63:0] ELSE DEST[63:0]=SRC1[127:64] FI; - IF IMM0[1] = 0 - THEN DEST[127:64]=SRC2[63:0] ELSE DEST[127:64]=SRC2[127:64] FI; - IF IMM0[2] = 0 - THEN DEST[191:128]=SRC1[191:128] ELSE DEST[191:128]=SRC1[255:192] FI; - IF IMM0[3] = 0 - THEN DEST[255:192]=SRC2[191:128] ELSE DEST[255:192]=SRC2[255:192] FI; // Ox5 r<->i ; 0xC unchanged - */ -#if defined (AVX1)|| defined (AVX2) - zvec ymm0,ymm1,ymm2; - ymm0 = _mm256_shuffle_pd(a.v,a.v,0x0); // ymm0 <- ar ar, ar,ar b'00,00 - ymm0 = _mm256_mul_pd(ymm0,b.v); // ymm0 <- ar bi, ar br - ymm1 = _mm256_shuffle_pd(b.v,b.v,0x5); // ymm1 <- br,bi b'01,01 - ymm2 = _mm256_shuffle_pd(a.v,a.v,0xF); // ymm2 <- ai,ai b'11,11 - ymm1 = _mm256_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi - ret.v= _mm256_addsub_pd(ymm0,ymm1); -#endif -#ifdef SSE4 - zvec ymm0,ymm1,ymm2; - ymm0 = _mm_shuffle_pd(a.v,a.v,0x0); // ymm0 <- ar ar, - ymm0 = _mm_mul_pd(ymm0,b.v); // ymm0 <- ar bi, ar br - ymm1 = _mm_shuffle_pd(b.v,b.v,0x1); // ymm1 <- br,bi b01 - ymm2 = _mm_shuffle_pd(a.v,a.v,0x3); // ymm2 <- ai,ai b11 - ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi - ret.v= _mm_addsub_pd(ymm0,ymm1); -#endif -#ifdef AVX512 - /* This is from - * Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets - * @inproceedings{McFarlin:2011:ASV:1995896.1995938, - * author = {McFarlin, Daniel S. and Arbatov, Volodymyr and Franchetti, Franz and P\"{u}schel, Markus}, - * title = {Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets}, - * booktitle = {Proceedings of the International Conference on Supercomputing}, - * series = {ICS '11}, - * year = {2011}, - * isbn = {978-1-4503-0102-2}, - * location = {Tucson, Arizona, USA}, - * pages = {265--274}, - * numpages = {10}, - * url = {http://doi.acm.org/10.1145/1995896.1995938}, - * doi = {10.1145/1995896.1995938}, - * acmid = {1995938}, - * publisher = {ACM}, - * address = {New York, NY, USA}, - * keywords = {autovectorization, fourier transform, program generation, simd, super-optimization}, - * } - */ - zvec vzero,ymm0,ymm1,real,imag; - vzero =(zvec)_mm512_setzero(); - ymm0 = _mm512_swizzle_pd(a.v, _MM_SWIZ_REG_CDAB); // - real =(zvec)_mm512_mask_or_epi64((__m512i)a.v, 0xAA,(__m512i)vzero,(__m512i) ymm0); - imag = _mm512_mask_sub_pd(a.v, 0x55,vzero, ymm0); - ymm1 = _mm512_mul_pd(real, b.v); - ymm0 = _mm512_swizzle_pd(b.v, _MM_SWIZ_REG_CDAB); // OK - ret.v= _mm512_fmadd_pd(ymm0,imag,ymm1); - /* Imag OK */ -#endif -#ifdef QPX - ret.v = vec_mul(a.v,b.v); -#endif - return ret; - }; - - //////////////////////////////////////////////////////////////////// - // General permute; assumes vector length is same across - // all subtypes; may not be a good assumption, but could - // add the vector width as a template param for BG/Q for example - //////////////////////////////////////////////////////////////////// - friend inline void permute(vComplexD &y,vComplexD b,int perm) - { - Gpermute(y,b,perm); - } - /* - friend inline void merge(vComplexD &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const vComplexD &y,std::vector &extracted) - { - Gextract(y,extracted); - } - friend inline void merge(vComplexD &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const vComplexD &y,std::vector &extracted) - { - Gextract(y,extracted); - } - */ - - /////////////////////// - // Splat - /////////////////////// - friend inline void vsplat(vComplexD &ret,ComplexD c){ - float a= real(c); - float b= imag(c); - vsplat(ret,a,b); - } - - - friend inline void vsplat(vComplexD &ret,double rl,double ig){ -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_set_pd(ig,rl,ig,rl); -#endif -#ifdef SSE4 - ret.v = _mm_set_pd(ig,rl); -#endif -#ifdef AVX512 - ret.v = _mm512_set_pd(ig,rl,ig,rl,ig,rl,ig,rl); -#endif -#ifdef QPX - ret.v = {ig,rl,ig,rl}; -#endif - } - - friend inline void vset(vComplexD &ret,ComplexD *a){ -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_set_pd(a[1].imag(),a[1].real(),a[0].imag(),a[0].real()); -#endif -#ifdef SSE4 - ret.v = _mm_set_pd(a[0].imag(),a[0].real()); -#endif -#ifdef AVX512 - ret.v = _mm512_set_pd(a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),a[1].imag(),a[1].real(),a[0].imag(),a[0].real()); - // Note v has a0 a1 a2 a3 -#endif -#ifdef QPX - ret.v = {a[0].real(),a[0].imag(),a[1].real(),a[3].imag()}; -#endif - } - -friend inline void vstore(const vComplexD &ret, ComplexD *a){ -#if defined (AVX1)|| defined (AVX2) - _mm256_store_pd((double *)a,ret.v); -#endif -#ifdef SSE4 - _mm_store_pd((double *)a,ret.v); -#endif -#ifdef AVX512 - _mm512_store_pd((double *)a,ret.v); - //Note v has a3 a2 a1 a0 -#endif -#ifdef QPX - assert(0); -#endif - } - friend inline void vstream(vComplexD &out,const vComplexD &in){ -#if defined (AVX1)|| defined (AVX2) - _mm256_stream_pd((double *)&out.v,in.v); -#endif -#ifdef SSE4 - _mm_stream_pd((double *)&out.v,in.v); -#endif -#ifdef AVX512 - _mm512_storenrngo_pd((double *)&out.v,in.v); - // _mm512_stream_pd((double *)&out.v,in.v); - //Note v has a3 a2 a1 a0 -#endif -#ifdef QPX - assert(0); -#endif - } - friend inline void prefetch(const vComplexD &v) - { - _mm_prefetch((const char*)&v.v,_MM_HINT_T0); - } - - //////////////////////// - // Conjugate - //////////////////////// - friend inline vComplexD conjugate(const vComplexD &in){ - vComplexD ret ; vzero(ret); -#if defined (AVX1)|| defined (AVX2) - // addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ... - zvec tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5)); - ret.v =_mm256_shuffle_pd(tmp,tmp,0x5); -#endif -#ifdef SSE4 - zvec tmp = _mm_addsub_pd(ret.v,_mm_shuffle_pd(in.v,in.v,0x1)); - ret.v = _mm_shuffle_pd(tmp,tmp,0x1); -#endif -#ifdef AVX512 - ret.v = _mm512_mask_sub_pd(in.v, 0xaa,ret.v, in.v); -#endif -#ifdef QPX - assert(0); -#endif - return ret; - } - - friend inline void timesMinusI(vComplexD &ret,const vComplexD &in){ - vzero(ret); - vComplexD tmp; -#if defined (AVX1)|| defined (AVX2) - tmp.v =_mm256_addsub_pd(ret.v,in.v); // r,-i - ret.v =_mm256_shuffle_pd(tmp.v,tmp.v,0x5); -#endif -#ifdef SSE4 - tmp.v =_mm_addsub_pd(ret.v,in.v); // r,-i - ret.v =_mm_shuffle_pd(tmp.v,tmp.v,0x1); -#endif -#ifdef AVX512 - ret.v = _mm512_mask_sub_pd(in.v,0xaa,ret.v,in.v); // real -imag - ret.v = _mm512_swizzle_pd(ret.v, _MM_SWIZ_REG_CDAB);// OK -#endif -#ifdef QPX - assert(0); -#endif - } - - friend inline void timesI(vComplexD &ret, const vComplexD &in){ - vzero(ret); - vComplexD tmp; -#if defined (AVX1)|| defined (AVX2) - tmp.v =_mm256_shuffle_pd(in.v,in.v,0x5); - ret.v =_mm256_addsub_pd(ret.v,tmp.v); // i,-r -#endif -#ifdef SSE4 - tmp.v =_mm_shuffle_pd(in.v,in.v,0x1); - ret.v =_mm_addsub_pd(ret.v,tmp.v); // r,-i -#endif -#ifdef AVX512 - tmp.v = _mm512_swizzle_pd(in.v, _MM_SWIZ_REG_CDAB);// OK - ret.v = _mm512_mask_sub_pd(tmp.v,0xaa,ret.v,tmp.v); // real -imag -#endif -#ifdef QPX - assert(0); -#endif - } - - friend inline vComplexD timesMinusI(const vComplexD &in){ - vComplexD ret; - timesMinusI(ret,in); - return ret; - } - - friend inline vComplexD timesI(const vComplexD &in){ - vComplexD ret; - timesI(ret,in); - return ret; - } - - -// REDUCE FIXME must be a cleaner implementation - friend inline ComplexD Reduce(const vComplexD & in) - { - vComplexD v1,v2; - union { - zvec v; - double f[sizeof(zvec)/sizeof(double)]; - } conv; - -#ifdef SSE4 - v1=in; -#endif -#if defined(AVX1) || defined (AVX2) - permute(v1,in,0); // sse 128; paired complex single - v1=v1+in; -#endif -#ifdef AVX512 - permute(v1,in,0); // sse 128; paired complex single - v1=v1+in; - permute(v2,v1,1); // avx 256; quad complex single - v1=v1+v2; -#endif -#ifdef QPX -#error -#endif - conv.v = v1.v; - return ComplexD(conv.f[0],conv.f[1]); - } - - // Unary negation - friend inline vComplexD operator -(const vComplexD &r) { - vComplexD ret; - vzero(ret); - ret = ret - r; - return ret; - } - // *=,+=,-= operators - inline vComplexD &operator *=(const vComplexD &r) { - *this = (*this)*r; - return *this; - } - inline vComplexD &operator +=(const vComplexD &r) { - *this = *this+r; - return *this; - } - inline vComplexD &operator -=(const vComplexD &r) { - *this = *this-r; - return *this; - } - - public: - static int Nsimd(void) { return sizeof(zvec)/sizeof(double)/2;} - }; - - - inline vComplexD innerProduct(const vComplexD & l, const vComplexD & r) { return conjugate(l)*r; } - - - typedef vComplexD vDComplex; - inline void zeroit(vComplexD &z){ vzero(z);} - - inline vComplexD outerProduct(const vComplexD &l, const vComplexD& r) - { - return l*r; - } - inline vComplexD trace(const vComplexD &arg){ - return arg; - } -///////////////////////////////////////////////////////////////////////// -//// Generic routine to promote object -> object -//// Supports the array reordering transformation that gives me SIMD utilisation -/////////////////////////////////////////////////////////////////////////// -/* -template class object> -inline object splat(objects){ - object ret; - vComplex * v_ptr = (vComplex *)& ret; - Complex * s_ptr = (Complex *) &s; - for(int i=0;i(y,b,perm); - } - friend inline ComplexF Reduce(const vComplexF & in) - { - vComplexF v1,v2; - union { - cvec v; - float f[sizeof(cvec)/sizeof(float)]; - } conv; -#ifdef SSE4 - permute(v1,in,0); // sse 128; paired complex single - v1=v1+in; -#endif -#if defined(AVX1) || defined (AVX2) - permute(v1,in,0); // sse 128; paired complex single - v1=v1+in; - permute(v2,v1,1); // avx 256; quad complex single - v1=v1+v2; -#endif -#ifdef AVX512 - permute(v1,in,0); // avx512 octo-complex single - v1=v1+in; - permute(v2,v1,1); - v1=v1+v2; - permute(v2,v1,2); - v1=v1+v2; -#endif -#ifdef QPX -#error -#endif - conv.v = v1.v; - return ComplexF(conv.f[0],conv.f[1]); - } - - friend inline vComplexF operator * (const ComplexF &a, vComplexF b){ - vComplexF va; - vsplat(va,a); - return va*b; - } - friend inline vComplexF operator * (vComplexF b,const ComplexF &a){ - return a*b; - } - - /* - template - friend inline vComplexF operator * (vComplexF b,const real &a){ - vComplexF va; - Complex ca(a,0); - vsplat(va,ca); - return va*b; - } - template - friend inline vComplexF operator * (const real &a,vComplexF b){ - return a*b; - } - - friend inline vComplexF operator + (const Complex &a, vComplexF b){ - vComplexF va; - vsplat(va,a); - return va+b; - } - friend inline vComplexF operator + (vComplexF b,const Complex &a){ - return a+b; - } - template - friend inline vComplexF operator + (vComplexF b,const real &a){ - vComplexF va; - Complex ca(a,0); - vsplat(va,ca); - return va+b; - } - template - friend inline vComplexF operator + (const real &a,vComplexF b){ - return a+b; - } - friend inline vComplexF operator - (const Complex &a, vComplexF b){ - vComplexF va; - vsplat(va,a); - return va-b; - } - friend inline vComplexF operator - (vComplexF b,const Complex &a){ - vComplexF va; - vsplat(va,a); - return b-va; - } - template - friend inline vComplexF operator - (vComplexF b,const real &a){ - vComplexF va; - Complex ca(a,0); - vsplat(va,ca); - return b-va; - } - template - friend inline vComplexF operator - (const real &a,vComplexF b){ - vComplexF va; - Complex ca(a,0); - vsplat(va,ca); - return va-b; - } - */ - - - /////////////////////// - // Conjugate - /////////////////////// - - friend inline vComplexF conjugate(const vComplexF &in){ - vComplexF ret ; vzero(ret); -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_xor_ps(_mm256_addsub_ps(ret.v,in.v), _mm256_set1_ps(-0.f)); -#endif -#ifdef SSE4 - ret.v = _mm_xor_ps(_mm_addsub_ps(ret.v,in.v), _mm_set1_ps(-0.f)); -#endif -#ifdef AVX512 - ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // Zero out 0+real 0-imag -#endif -#ifdef QPX - assert(0); -#endif - return ret; - } - friend inline void timesMinusI( vComplexF &ret,const vComplexF &in){ - vzero(ret); -#if defined (AVX1)|| defined (AVX2) - cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i - ret.v = _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); //-i,r -#endif -#ifdef SSE4 - cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i - ret.v = _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); -#endif -#ifdef AVX512 - ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag - ret.v = _mm512_swizzle_ps(ret.v, _MM_SWIZ_REG_CDAB);// OK -#endif -#ifdef QPX - assert(0); -#endif - } - friend inline void timesI(vComplexF &ret,const vComplexF &in){ - vzero(ret); -#if defined (AVX1)|| defined (AVX2) - cvec tmp =_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));//i,r - ret.v =_mm256_addsub_ps(ret.v,tmp); //i,-r -#endif -#ifdef SSE4 - cvec tmp =_mm_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1)); - ret.v = _mm_addsub_ps(ret.v,tmp); // r,-i -#endif -#ifdef AVX512 - cvec tmp = _mm512_swizzle_ps(in.v, _MM_SWIZ_REG_CDAB);// OK - ret.v = _mm512_mask_sub_ps(tmp,0xaaaa,ret.v,tmp); // real -imag -#endif -#ifdef QPX - assert(0); -#endif - } - friend inline vComplexF timesMinusI(const vComplexF &in){ - vComplexF ret; - timesMinusI(ret,in); - return ret; - } - friend inline vComplexF timesI(const vComplexF &in){ - vComplexF ret; - timesI(ret,in); - return ret; - } - - - // Unary negation - friend inline vComplexF operator -(const vComplexF &r) { - vComplexF ret; - vzero(ret); - ret = ret - r; - return ret; - } - // *=,+=,-= operators - inline vComplexF &operator *=(const vComplexF &r) { - *this = (*this)*r; - return *this; - } - inline vComplexF &operator +=(const vComplexF &r) { - *this = *this+r; - return *this; - } - inline vComplexF &operator -=(const vComplexF &r) { - *this = *this-r; - return *this; - } - - /* - friend inline void merge(vComplexF &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const vComplexF &y,std::vector &extracted) - { - Gextract(y,extracted); - } - friend inline void merge(vComplexF &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const vComplexF &y,std::vector &extracted) - { - Gextract(y,extracted); - } - */ - - }; - - inline vComplexF innerProduct(const vComplexF & l, const vComplexF & r) - { - return conjugate(l)*r; - } - - inline void zeroit(vComplexF &z){ vzero(z);} - - inline vComplexF outerProduct(const vComplexF &l, const vComplexF& r) - { - return l*r; - } - inline vComplexF trace(const vComplexF &arg){ - return arg; - } - - -} - -#endif diff --git a/lib/simd/Old/Grid_vInteger.h b/lib/simd/Old/Grid_vInteger.h deleted file mode 100644 index ee4a3633..00000000 --- a/lib/simd/Old/Grid_vInteger.h +++ /dev/null @@ -1,259 +0,0 @@ -#ifndef GRID_VINTEGER_H -#define GRID_VINTEGER_H - -namespace Grid { - - - class vInteger { - protected: - - public: - - ivec v; - - typedef ivec vector_type; - typedef Integer scalar_type; - - vInteger(){}; - vInteger & operator = (const Zero & z){ - vzero(*this); - return (*this); - } - vInteger(Integer a){ - vsplat(*this,a); - }; - //////////////////////////////////// - // Arithmetic operator overloads +,-,* - //////////////////////////////////// - friend inline vInteger operator + ( vInteger a, vInteger b) - { - vInteger ret; -#if defined (AVX1) - __m128i a0,a1; - __m128i b0,b1; - a0 = _mm256_extractf128_si256(a.v,0); - b0 = _mm256_extractf128_si256(b.v,0); - a1 = _mm256_extractf128_si256(a.v,1); - b1 = _mm256_extractf128_si256(b.v,1); - a0 = _mm_add_epi32(a0,b0); - a1 = _mm_add_epi32(a1,b1); - ret.v = _mm256_set_m128i(a1,a0); -#endif -#if defined (AVX2) - ret.v = _mm256_add_epi32(a.v,b.v); -#endif -#ifdef SSE4 - ret.v = _mm_add_epi32(a.v,b.v); -#endif -#ifdef AVX512 - ret.v = _mm512_add_epi32(a.v,b.v); -#endif -#ifdef QPX - // Implement as array of ints is only option -#error -#endif - return ret; - }; - - friend inline vInteger operator - ( vInteger a, vInteger b) - { - vInteger ret; -#if defined (AVX1) - __m128i a0,a1; - __m128i b0,b1; - a0 = _mm256_extractf128_si256(a.v,0); - b0 = _mm256_extractf128_si256(b.v,0); - a1 = _mm256_extractf128_si256(a.v,1); - b1 = _mm256_extractf128_si256(b.v,1); - a0 = _mm_sub_epi32(a0,b0); - a1 = _mm_sub_epi32(a1,b1); - ret.v = _mm256_set_m128i(a1,a0); -#endif -#if defined (AVX2) - ret.v = _mm256_sub_epi32(a.v,b.v); -#endif -#ifdef SSE4 - ret.v = _mm_sub_epi32(a.v,b.v); -#endif -#ifdef AVX512 - ret.v = _mm512_sub_epi32(a.v,b.v); -#endif -#ifdef QPX - // Implement as array of ints is only option -#error -#endif - return ret; - }; - - friend inline vInteger operator * ( vInteger a, vInteger b) - { - vInteger ret; -#if defined (AVX1) - __m128i a0,a1; - __m128i b0,b1; - a0 = _mm256_extractf128_si256(a.v,0); - b0 = _mm256_extractf128_si256(b.v,0); - a1 = _mm256_extractf128_si256(a.v,1); - b1 = _mm256_extractf128_si256(b.v,1); - a0 = _mm_mul_epi32(a0,b0); - a1 = _mm_mul_epi32(a1,b1); - ret.v = _mm256_set_m128i(a1,a0); -#endif -#if defined (AVX2) - ret.v = _mm256_mul_epi32(a.v,b.v); -#endif -#ifdef SSE4 - ret.v = _mm_mul_epi32(a.v,b.v); -#endif -#ifdef AVX512 - // ret.v = _mm512_mul_epi32(a.v,b.v); - ret.v = _mm512_mullo_epi32(a.v,b.v); -#endif -#ifdef QPX - // Implement as array of ints is only option -#error -#endif - return ret; - }; - - /////////////////////////////////////////////// - // mult, sub, add, adj,conjugate, mac functions - /////////////////////////////////////////////// - friend inline void mult(vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) * (*r);} - friend inline void sub (vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) - (*r);} - friend inline void add (vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) + (*r);} - friend inline void mac (vInteger &y,const vInteger a,const vInteger x){ - y = a*x+y; - } - - ////////////////////////////////// - // Initialise to 1,0,i - ////////////////////////////////// - friend inline void vone (vInteger &ret){vsplat(ret,1);} - friend inline void vzero(vInteger &ret){vsplat(ret,0);} - friend inline void vtrue (vInteger &ret){vsplat(ret,0xFFFFFFFF);} - friend inline void vfalse(vInteger &ret){vsplat(ret,0);} - - - ///////////////////////////////////////////////////// - // Broadcast a value across Nsimd copies. - ///////////////////////////////////////////////////// - friend inline void vsplat(vInteger &ret,scalar_type a){ -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_set1_epi32(a); -#endif -#ifdef SSE4 - ret.v = _mm_set1_epi32(a); -#endif -#ifdef AVX512 - ret.v = _mm512_set1_epi32(a); -#endif -#ifdef QPX -#error -#endif - } - friend inline void vset(vInteger &ret,scalar_type *a){ -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_set_epi32(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]); -#endif -#ifdef SSE4 - ret.v = _mm_set_epi32(a[0],a[1],a[2],a[3]); -#endif -#ifdef AVX512 - ret.v = _mm512_set_epi32( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8], - a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]); -#endif -#ifdef QPX -#error -#endif - } - - friend inline void vstore(const vInteger &ret, Integer *a){ -#if defined (AVX1)|| defined (AVX2) - _mm256_store_si256((__m256i*)a,ret.v); -#endif -#ifdef SSE4 - _mm_store_si128((__m128i *)a,ret.v); -#endif -#ifdef AVX512 - _mm512_store_si512(a,ret.v); -#endif -#ifdef QPX - assert(0); -#endif - } - - friend inline void vstream(vInteger & out,const vInteger &in){ - out=in; - } - - friend inline void prefetch(const vInteger &v) - { - _mm_prefetch((const char*)&v.v,_MM_HINT_T0); - } - - // Unary negation - friend inline vInteger operator -(const vInteger &r) { - vInteger ret; - vzero(ret); - ret = ret - r; - return ret; - } - friend inline Integer Reduce(const vInteger & in) - { - // unimplemented - assert(0); - } - // *=,+=,-= operators - inline vInteger &operator *=(const vInteger &r) { - *this = (*this)*r; - return *this; - } - inline vInteger &operator +=(const vInteger &r) { - *this = *this+r; - return *this; - } - inline vInteger &operator -=(const vInteger &r) { - *this = *this-r; - return *this; - } - - friend inline void permute(vInteger &y,const vInteger b,int perm) - { - Gpermute(y,b,perm); - } - /* - friend inline void merge(vInteger &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const vInteger &y,std::vector &extracted) - { - Gextract(y,extracted); - } - friend inline void merge(vInteger &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const vInteger &y,std::vector &extracted) - { - Gextract(y,extracted); - } - */ - - public: - static inline int Nsimd(void) { return sizeof(ivec)/sizeof(Integer);} - }; - - inline vInteger localInnerProduct(const vInteger & l, const vInteger & r) { return l*r; } - - inline void zeroit(vInteger &z){ vzero(z);} - - inline vInteger outerProduct(const vInteger &l, const vInteger& r) - { - return l*r; - } - -} - -#endif diff --git a/lib/simd/Old/Grid_vRealD.h b/lib/simd/Old/Grid_vRealD.h deleted file mode 100644 index a32e579e..00000000 --- a/lib/simd/Old/Grid_vRealD.h +++ /dev/null @@ -1,276 +0,0 @@ -#ifndef GRID_VREALD_H -#define GRID_VREALD_H - -namespace Grid { - class vRealD { - public: - dvec v; // dvec is double precision vector - - public: - typedef dvec vector_type; - typedef RealD scalar_type; - - vRealD()=default; - vRealD(RealD a){ - vsplat(*this,a); - }; - vRealD(Zero &zero){ - zeroit(*this); - } - vRealD & operator = ( Zero & z){ - vzero(*this); - return (*this); - } - - friend inline void mult(vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) * (*r);} - friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);} - friend inline void add (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) + (*r);} - friend inline vRealD adj(const vRealD &in) { return in; } - friend inline vRealD conjugate(const vRealD &in){ return in; } - - friend inline void mac (vRealD &y,const vRealD a,const vRealD x){ -#if defined (AVX1) || defined (SSE4) - y = a*x+y; -#endif -#ifdef AVX2 // AVX 2 introduced FMA support. FMA4 eliminates a copy, but AVX only has FMA3 - // accelerates multiply accumulate, but not general multiply add - y.v = _mm256_fmadd_pd(a.v,x.v,y.v); -#endif -#ifdef AVX512 - // here precision of vector are still single - y.v = _mm512_fmadd_pd(a.v,x.v,y.v); -#endif -#ifdef QPX - y.v = vec_madd(a.v,x.v,y.v); -#endif - } - ////////////////////////////////// - // Initialise to 1,0 - ////////////////////////////////// - friend inline void vone (vRealD &ret){ vsplat(ret,1.0);} - friend inline void vzero(vRealD &ret){ vsplat(ret,0.0);} - - - //////////////////////////////////// - // Arithmetic operator overloads +,-,* - //////////////////////////////////// - friend inline vRealD operator + (vRealD a, vRealD b) - { - vRealD ret; -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_add_pd(a.v,b.v); -#endif -#ifdef SSE4 - ret.v = _mm_add_pd(a.v,b.v); -#endif -#ifdef AVX512 - ret.v = _mm512_add_pd(a.v,b.v); -#endif -#ifdef QPX - ret.v = vec_add(a.v,b.v); -#endif - return ret; - }; - friend inline vRealD operator - (vRealD a, vRealD b) - { - vRealD ret; -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_sub_pd(a.v,b.v); -#endif -#ifdef SSE4 - ret.v = _mm_sub_pd(a.v,b.v); -#endif -#ifdef AVX512 - ret.v = _mm512_sub_pd(a.v,b.v); -#endif -#ifdef QPX - ret.v = vec_sub(a.v,b.v); -#endif - return ret; - }; - - friend inline vRealD operator * (vRealD a, vRealD b) - { - vRealD ret; -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_mul_pd(a.v,b.v); -#endif -#ifdef SSE4 - ret.v = _mm_mul_pd(a.v,b.v); -#endif -#ifdef AVX512 - ret.v = _mm512_mul_pd(a.v,b.v); -#endif -#ifdef QPX - ret.v = vec_mul(a.v,b.v); -#endif - return ret; - }; - - //////////////////////////////////////////////////////////////////// - // General permute; assumes vector length is same across - // all subtypes; may not be a good assumption, but could - // add the vector width as a template param for BG/Q for example - //////////////////////////////////////////////////////////////////// - - friend inline void permute(vRealD &y,vRealD b,int perm) - { - Gpermute(y,b,perm); - } - /* - friend inline void merge(vRealD &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const vRealD &y,std::vector &extracted) - { - Gextract(y,extracted); - } - friend inline void merge(vRealD &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const vRealD &y,std::vector &extracted) - { - Gextract(y,extracted); - } - */ - - friend inline void vsplat(vRealD &ret,double a){ -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_set_pd(a,a,a,a); -#endif -#ifdef SSE4 - ret.v = _mm_set_pd(a,a); -#endif -#ifdef AVX512 - ret.v = _mm512_set1_pd(a); -#endif -#ifdef QPX - ret.v = {a,a,a,a}; -#endif - } - friend inline void vset(vRealD &ret, double *a){ -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_set_pd(a[3],a[2],a[1],a[0]); -#endif -#ifdef SSE4 - ret.v = _mm_set_pd(a[1],a[0]); -#endif -#ifdef AVX512 - ret.v = _mm512_set_pd(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]); - // Note v has a0 a1 a2 a3 a4 a5 a6 a7 -#endif -#ifdef QPX - ret.v = {a[0],a[1],a[2],a[3]}; -#endif - } - - friend inline void vstore(const vRealD &ret, double *a){ -#if defined (AVX1)|| defined (AVX2) - _mm256_store_pd(a,ret.v); -#endif -#ifdef SSE4 - _mm_store_pd(a,ret.v); -#endif -#ifdef AVX512 - _mm512_store_pd(a,ret.v); - // Note v has a7 a6 a5ba4 a3 a2 a1 a0 -#endif -#ifdef QPX - assert(0); -#endif - } - friend inline void vstream(vRealD &out,const vRealD &in){ -#if defined (AVX1)|| defined (AVX2) - _mm256_stream_pd((double *)&out.v,in.v); -#endif -#ifdef SSE4 - _mm_stream_pd((double *)&out.v,in.v); -#endif -#ifdef AVX512 - _mm512_storenrngo_pd((double *)&out.v,in.v); - //Note v has a3 a2 a1 a0 -#endif -#ifdef QPX - assert(0); -#endif - } - friend inline void prefetch(const vRealD &v) - { - _mm_prefetch((const char*)&v.v,_MM_HINT_T0); - } - // Unary negation - friend inline vRealD operator -(const vRealD &r) { - vRealD ret; - vzero(ret); - ret = ret - r; - return ret; - } - - friend inline RealD Reduce(const vRealD & in) - { - vRealD v1,v2; - union { - dvec v; - double f[sizeof(dvec)/sizeof(double)]; - } conv; -#ifdef SSE4 - permute(v1,in,0); // sse 128; paired real double - v1=v1+in; -#endif -#if defined(AVX1) || defined (AVX2) - permute(v1,in,0); // avx 256; quad double - v1=v1+in; - permute(v2,v1,1); - v1=v1+v2; -#endif -#ifdef AVX512 - permute(v1,in,0); // avx 512; octo-double - v1=v1+in; - permute(v2,v1,1); - v1=v1+v2; - permute(v2,v1,2); - v1=v1+v2; -#endif -#ifdef QPX -#endif - conv.v=v1.v; - return conv.f[0]; - } - - // *=,+=,-= operators - inline vRealD &operator *=(const vRealD &r) { - *this = (*this)*r; - return *this; - } - inline vRealD &operator +=(const vRealD &r) { - *this = *this+r; - return *this; - } - inline vRealD &operator -=(const vRealD &r) { - *this = *this-r; - return *this; - } - - public: - static int Nsimd(void) { return sizeof(dvec)/sizeof(double);} - }; - - inline vRealD innerProduct(const vRealD & l, const vRealD & r) { return conjugate(l)*r; } - inline void zeroit(vRealD &z){ vzero(z);} - - inline vRealD outerProduct(const vRealD &l, const vRealD& r) - { - return l*r; - } - inline vRealD trace(const vRealD &arg){ - return arg; - } - inline vRealD real(const vRealD &arg){ - return arg; - } - - -} -#endif diff --git a/lib/simd/Old/Grid_vRealF.h b/lib/simd/Old/Grid_vRealF.h deleted file mode 100644 index 05bfa2f5..00000000 --- a/lib/simd/Old/Grid_vRealF.h +++ /dev/null @@ -1,313 +0,0 @@ -#ifndef GRID_VREALF_H -#define GRID_VREALF_H - - -namespace Grid { - class vRealF { - public: - fvec v; - - public: - typedef fvec vector_type; - typedef RealF scalar_type; - - vRealF()=default; - vRealF(RealF a){ - vsplat(*this,a); - }; - vRealF(Zero &zero){ - zeroit(*this); - } - vRealF & operator = ( Zero & z){ - vzero(*this); - return (*this); - } - //////////////////////////////////// - // Arithmetic operator overloads +,-,* - //////////////////////////////////// - friend inline vRealF operator + ( vRealF a, vRealF b) - { - vRealF ret; -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_add_ps(a.v,b.v); -#endif -#ifdef SSE4 - ret.v = _mm_add_ps(a.v,b.v); -#endif -#ifdef AVX512 - ret.v = _mm512_add_ps(a.v,b.v); -#endif -#ifdef QPX - vector4double aa,bb,cc; - aa = vec_lda(0,(float *)&a); - bb = vec_lda(0,(float *)&b); - cc = vec_add(aa,bb); - vec_sta(cc,0,(float *)&ret.v); -#endif - return ret; - }; - - friend inline vRealF operator - ( vRealF a, vRealF b) - { - vRealF ret; -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_sub_ps(a.v,b.v); -#endif -#ifdef SSE4 - ret.v = _mm_sub_ps(a.v,b.v); -#endif -#ifdef AVX512 - ret.v = _mm512_sub_ps(a.v,b.v); -#endif -#ifdef QPX - vector4double aa,bb,cc; - aa = vec_lda(0,(float *)&a); - bb = vec_lda(0,(float *)&b); - cc = vec_sub(aa,bb); - vec_sta(cc,0,(float *)&ret.v); -#endif - return ret; - }; - - friend inline vRealF operator * ( vRealF a, vRealF b) - { - vRealF ret; -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_mul_ps(a.v,b.v); -#endif -#ifdef SSE4 - ret.v = _mm_mul_ps(a.v,b.v); -#endif -#ifdef AVX512 - ret.v = _mm512_mul_ps(a.v,b.v); -#endif -#ifdef QPX - vector4double aa,bb,cc; // QPX single we are forced to load as this promotes single mem->double regs. - aa = vec_lda(0,(float *)&a); - bb = vec_lda(0,(float *)&b); - cc = vec_mul(aa,bb); - vec_sta(cc,0,(float *)&ret.v); -#endif - return ret; - }; - - /////////////////////////////////////////////// - // mult, sub, add, adj,conjugate, mac functions - /////////////////////////////////////////////// - friend inline void mult(vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) * (*r);} - friend inline void sub (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) - (*r);} - friend inline void add (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) + (*r);} - friend inline vRealF adj(const vRealF &in) { return in; } - friend inline vRealF conjugate(const vRealF &in){ return in; } - - friend inline void mac (vRealF &y,const vRealF a,const vRealF x){ -#if defined (AVX1) || defined (SSE4) - y = a*x+y; -#endif -#ifdef AVX2 // AVX 2 introduced FMA support. FMA4 eliminates a copy, but AVX only has FMA3 - // accelerates multiply accumulate, but not general multiply add - y.v = _mm256_fmadd_ps(a.v,x.v,y.v); -#endif -#ifdef AVX512 - y.v = _mm512_fmadd_ps(a.v,x.v,y.v); -#endif -#ifdef QPX - vector4double aa,xx,yy; // QPX single we are forced to load as this promotes single mem->double regs. - aa = vec_lda(0,(float *)&a.v); - xx = vec_lda(0,(float *)&x.v); - yy = vec_lda(0,(float *)&y.v); - yy = vec_madd(aa,xx,yy); - vec_sta(yy,0,(float *)&y.v); -#endif - } - - ////////////////////////////////// - // Initialise to 1,0,i - ////////////////////////////////// - friend inline void vone (vRealF &ret){vsplat(ret,1.0);} - friend inline void vzero(vRealF &ret){vsplat(ret,0.0);} - - - //////////////////////////////////////////////////////////////////// - // General permute; assumes vector length is same across - // all subtypes; may not be a good assumption, but could - // add the vector width as a template param for BG/Q for example - //////////////////////////////////////////////////////////////////// - - friend inline void permute(vRealF &y,vRealF b,int perm) - { - Gpermute(y,b,perm); - } - /* - friend inline void merge(vRealF &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const vRealF &y,std::vector &extracted) - { - Gextract(y,extracted); - } - friend inline void merge(vRealF &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const vRealF &y,std::vector &extracted) - { - Gextract(y,extracted); - } - */ - - ///////////////////////////////////////////////////// - // Broadcast a value across Nsimd copies. - ///////////////////////////////////////////////////// - friend inline void vsplat(vRealF &ret,float a){ -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_set_ps(a,a,a,a,a,a,a,a); -#endif -#ifdef SSE4 - ret.v = _mm_set_ps(a,a,a,a); -#endif -#ifdef AVX512 - //ret.v = _mm512_set_ps(a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a); - ret.v = _mm512_set1_ps(a); -#endif -#ifdef QPX - ret.v = {a,a,a,a}; -#endif - } - - - friend inline void vset(vRealF &ret, float *a){ -#if defined (AVX1)|| defined (AVX2) - ret.v = _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]); -#endif -#ifdef SSE4 - ret.v = _mm_set_ps(a[3],a[2],a[1],a[0]); -#endif -#ifdef AVX512 - ret.v = _mm512_set_ps( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8], - a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]); - // Note v has a0 a1 a2 a3 a4 a5 a6 a7 -#endif -#ifdef QPX - ret.v = {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]}; -#endif - } - - //////////////////////////////////////////////////////////////////////// - // FIXME: gonna remove these load/store, get, set, prefetch - //////////////////////////////////////////////////////////////////////// -friend inline void vstore(const vRealF &ret, float *a){ -#if defined (AVX1)|| defined (AVX2) - _mm256_store_ps(a,ret.v); -#endif -#ifdef SSE4 - _mm_store_ps(a,ret.v); -#endif -#ifdef AVX512 - _mm512_store_ps(a,ret.v); - // Note v has a7 a6 a5ba4 a3 a2 a1 a0 -#endif -#ifdef QPX - assert(0); -#endif - } - friend inline void vstream(vRealF &out,const vRealF &in){ -#if defined (AVX1)|| defined (AVX2) - _mm256_stream_ps((float *)&out.v,in.v); -#endif -#ifdef SSE4 - _mm_stream_ps((float *)&out.v,in.v); -#endif -#ifdef AVX512 - _mm512_storenrngo_ps((float *)&out.v,in.v); - // _mm512_stream_ps((float *)&out.v,in.v); - //Note v has a3 a2 a1 a0 -#endif -#ifdef QPX - assert(0); -#endif - } - - - friend inline void prefetch(const vRealF &v) - { - _mm_prefetch((const char*)&v.v,_MM_HINT_T0); - } - // Unary negation - friend inline vRealF operator -(const vRealF &r) { - vRealF ret; - vzero(ret); - ret = ret - r; - return ret; - } - friend inline RealF Reduce(const vRealF & in) - { - vRealF v1,v2; - union { - fvec v; - float f[sizeof(fvec)/sizeof(double)]; - } conv; -#ifdef SSE4 - permute(v1,in,0); // sse 128; quad single - v1=v1+in; - permute(v2,v1,1); - v1=v1+v2; -#endif -#if defined(AVX1) || defined (AVX2) - permute(v1,in,0); // avx 256; octo-double - v1=v1+in; - permute(v2,v1,1); - v1=v1+v2; - permute(v2,v1,2); - v1=v1+v2; -#endif -#ifdef AVX512 - permute(v1,in,0); // avx 256; octo-double - v1=v1+in; - permute(v2,v1,1); - v1=v1+v2; - permute(v2,v1,2); - v1=v1+v2; - permute(v2,v1,3); - v1=v1+v2; -#endif -#ifdef QPX -#endif - conv.v=v1.v; - return conv.f[0]; - } - - // *=,+=,-= operators - inline vRealF &operator *=(const vRealF &r) { - *this = (*this)*r; - return *this; - } - inline vRealF &operator +=(const vRealF &r) { - *this = *this+r; - return *this; - } - inline vRealF &operator -=(const vRealF &r) { - *this = *this-r; - return *this; - } - public: - static inline int Nsimd(void) { return sizeof(fvec)/sizeof(float);} - }; - inline vRealF innerProduct(const vRealF & l, const vRealF & r) { return conjugate(l)*r; } - inline void zeroit(vRealF &z){ vzero(z);} - - inline vRealF outerProduct(const vRealF &l, const vRealF& r) - { - return l*r; - } - inline vRealF trace(const vRealF &arg){ - return arg; - } - inline vRealF real(const vRealF &arg){ - return arg; - } - - -} -#endif diff --git a/lib/tensors/Tensor_Ta.h b/lib/tensors/Tensor_Ta.h index 3a903364..f06dbf04 100644 --- a/lib/tensors/Tensor_Ta.h +++ b/lib/tensors/Tensor_Ta.h @@ -29,15 +29,87 @@ namespace Grid { { iMatrix ret(arg); double factor = (1/(double)N); - for(int c1=0;c1 inline iScalar ProjectOnGroup(const iScalar&r) + { + iScalar ret; + ret._internal = ProjectOnGroup(r._internal); + return ret; + } + template inline iVector ProjectOnGroup(const iVector&r) + { + iVector ret; + for(int i=0;i::TensorLevel == 0 >::type * =nullptr> + inline iMatrix ProjectOnGroup(const iMatrix &arg) + { + // need a check for the group type? + iMatrix ret(arg); + double nrm; + for(int c1=0;c1 inline iScalar Exponentiate(const iScalar&r, double alpha, int Nexp) + { + iScalar ret; + ret._internal = Exponentiate(r._internal, alpha, Nexp); + return ret; + } + + + template::TensorLevel == 0 >::type * =nullptr> + inline iMatrix Exponentiate(const iMatrix &arg, double alpha, int Nexp) + { + iMatrix unit(1.0); + iMatrix temp(unit); + + for(int i=Nexp; i>=1;--i){ + temp *= alpha/double(i); + temp = unit + temp*arg; + } + return ProjectOnGroup(temp); + } + + + } #endif diff --git a/tests/InvSqrt.gnu b/tests/InvSqrt.gnu new file mode 100644 index 00000000..e69de29b diff --git a/tests/Sqrt.gnu b/tests/Sqrt.gnu new file mode 100644 index 00000000..ae56ab97 --- /dev/null +++ b/tests/Sqrt.gnu @@ -0,0 +1,2 @@ +f(x) = 6.81384+(-2.34645e-06/(x+0.000228091))+(-1.51593e-05/(x+0.00112084))+(-6.89254e-05/(x+0.003496))+(-0.000288983/(x+0.00954309))+(-0.00119277/(x+0.024928))+(-0.0050183/(x+0.0646627))+(-0.0226449/(x+0.171576))+(-0.123767/(x+0.491792))+(-1.1705/(x+1.78667))+(-102.992/(x+18.4866)); +f(x) = 0.14676+(0.00952992/(x+5.40933e-05))+(0.0115952/(x+0.000559699))+(0.0161824/(x+0.00203338))+(0.0243252/(x+0.00582831))+(0.0379533/(x+0.0154649))+(0.060699/(x+0.0401156))+(0.100345/(x+0.104788))+(0.178335/(x+0.286042))+(0.381586/(x+0.892189))+(1.42625/(x+4.38422)); diff --git a/tests/Test_main.cc b/tests/Test_main.cc index cef2d848..88d69192 100644 --- a/tests/Test_main.cc +++ b/tests/Test_main.cc @@ -223,6 +223,9 @@ int main (int argc, char ** argv) //TComplex tracecm= trace(cm); //std::cout << cm << " "<< tracecm << std::endl; + cm = ProjectOnGroup(cm); + + cm = Exponentiate(cm, 1.0, 10); // Foo = Foo+scalar; // LatticeColourMatrix+Scalar // Foo = Foo*scalar; // LatticeColourMatrix*Scalar From 4d11fc03301df7da821e08ba50795879c6f68e4c Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 9 Jun 2015 22:36:48 +0100 Subject: [PATCH 12/24] silly change --- tests/Test_nersc_io.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/Test_nersc_io.cc b/tests/Test_nersc_io.cc index 403d2cab..f473781e 100644 --- a/tests/Test_nersc_io.cc +++ b/tests/Test_nersc_io.cc @@ -25,7 +25,6 @@ int main (int argc, char ** argv) std::vector U(4,&Fine); NerscField header; - std::string file("./ckpoint_lat.4000"); readNerscConfiguration(Umu,header,file); From 6403e3021ff976f186deec45073057fad3cd5438 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 9 Jun 2015 22:37:21 +0100 Subject: [PATCH 13/24] Debugged finally. A silly mistake in permute cost me a day of debug. --- lib/algorithms/CoarsenedMatrix.h | 104 ++++++++++++++++++++++++------- 1 file changed, 82 insertions(+), 22 deletions(-) diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h index 006da07f..ba417be3 100644 --- a/lib/algorithms/CoarsenedMatrix.h +++ b/lib/algorithms/CoarsenedMatrix.h @@ -11,7 +11,6 @@ namespace Grid { int npoint; std::vector directions ; std::vector displacements; - std::vector opdirs; // FIXME -- don't like xposing the operator directions // as different to the geometrical dirs @@ -26,18 +25,22 @@ namespace Grid { npoint = 2*_d+1; directions.resize(npoint); displacements.resize(npoint); - opdirs.resize(npoint); for(int d=0;d<_d;d++){ directions[2*d ] = d+base; directions[2*d+1] = d+base; - opdirs[2*d ] = d; - opdirs[2*d+1] = d; displacements[2*d ] = +1; displacements[2*d+1] = -1; } directions [2*_d]=0; displacements[2*_d]=0; - opdirs [2*_d]=0; + + //// report back + std::cout<<"directions :"; + for(int d=0;d - class CoarsenedMatrix : public SparseMatrixBase > > { + class CoarsenedMatrix : public SparseMatrixBase > > { public: - typedef iVector siteVector; - typedef Lattice > CoarseVector; - typedef Lattice > CoarseMatrix; + typedef iVector siteVector; + typedef Lattice CoarseVector; + typedef Lattice > CoarseMatrix; typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field typedef Lattice FineField; @@ -82,7 +85,6 @@ namespace Grid { CartesianStencil Stencil; std::vector A; - std::vector Aslow; std::vector > comm_buf; @@ -91,25 +93,28 @@ namespace Grid { /////////////////////// GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know - RealD M (const CoarseVector &in, CoarseVector &out){ - + RealD M (const CoarseVector &in, CoarseVector &out){ + + conformable(_grid,in._grid); + conformable(in._grid,out._grid); + SimpleCompressor compressor; Stencil.HaloExchange(in,comm_buf,compressor); -PARALLEL_FOR_LOOP + //PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss++){ siteVector res = zero; - siteVector tmp; siteVector nbr; + int offset,local,perm,ptype; - int offset,local,perm; for(int point=0;point_rdimensions[dir])/(Grid()->_rdimensions[dir]); @@ -187,7 +191,7 @@ PARALLEL_FOR_LOOP linop.OpDiag(phi,Mphi); } else { - linop.OpDir(phi,Mphi,opdir,disp); + linop.OpDir(phi,Mphi,dir,disp); } //////////////////////////////////////////////////////////////////////// @@ -240,6 +244,62 @@ PARALLEL_FOR_LOOP std::cout<< iProj < > val(Grid()); random(RNG,val); + + Complex one(1.0); + + iMatrix ident; ident=one; + + val = val*adj(val); + val = val + 1.0; + + A[8] = val*ident; + + // for(int s=0;soSites();s++) { + // A[8]._odata[s]=val._odata[s]; + // } + } + void ForceHermitian(void) { + for(int d=0;d<4;d++){ + int dd=d+1; + A[2*d] = adj(Cshift(A[2*d+1],dd,1)); + } + A[8] = 0.5*(A[8] + adj(A[8])); + } + void AssertHermitian(void) { + CoarseMatrix AA (Grid()); + CoarseMatrix AAc (Grid()); + CoarseMatrix Diff (Grid()); + for(int d=0;d<4;d++){ + + int dd=d+1; + AAc = Cshift(A[2*d+1],dd,1); + AA = A[2*d]; + + Diff = AA - adj(AAc); + + std::cout<<"Norm diff dim "< Date: Tue, 9 Jun 2015 22:38:13 +0100 Subject: [PATCH 14/24] Starting to use --- lib/algorithms/approx/Chebyshev.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/algorithms/approx/Chebyshev.h b/lib/algorithms/approx/Chebyshev.h index 5c7741e4..0dad7179 100644 --- a/lib/algorithms/approx/Chebyshev.h +++ b/lib/algorithms/approx/Chebyshev.h @@ -42,6 +42,14 @@ namespace Grid { double lo; public: + void csv(std::ostream &out){ + for (double x=lo; x Date: Tue, 9 Jun 2015 22:39:13 +0100 Subject: [PATCH 15/24] Got this sorted with the promote working in a test --- lib/lattice/Lattice_transfer.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 12993680..fb6b39f1 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -94,7 +94,7 @@ inline void blockProject(Lattice > &coarseData, for(int i=0;i > &coarseData, assert( nbasis == Basis.size() ); subdivides(coarse,fine); for(int i=0;i block_r (_ndimension); @@ -287,9 +287,8 @@ inline void blockPromote(const Lattice > &coarseData, GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions); for(int i=0;i Date: Tue, 9 Jun 2015 22:39:37 +0100 Subject: [PATCH 16/24] Prettier reporting --- scripts/linecount | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/scripts/linecount b/scripts/linecount index 0be67142..4463ce84 100755 --- a/scripts/linecount +++ b/scripts/linecount @@ -1,12 +1,30 @@ #!/bin/sh +if [ $# -eq 1 ] +then + wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc tests/*.cc benchmarks/*.cc lib/*/*/*/*.cc lib/*/*/*/*.h >& tmp.fil + count=`grep total tmp.fil` + echo $count " in Grid library" +else + wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc tests/*.cc benchmarks/*.cc lib/*/*/*/*.cc lib/*/*/*/*.h +fi -wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc tests/*.cc benchmarks/*.cc lib/*/*/*/*.cc lib/*/*/*/*.h +wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc lib/*/*/*/*.cc lib/*/*/*/*.h >& tmp.fil +count=`grep total tmp.fil` +echo $count " in lib" -echo "lib" -wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc lib/*/*/*/*.cc lib/*/*/*/*.h | grep total +for sdir in `ls -F lib/| grep /` +do +wc -l lib/${sdir}/*.h lib/${sdir}/*/*.h lib/${sdir}/*.cc lib/${sdir}/*/*.cc lib/${sdir}/*/*/*.cc lib/${sdir}/*/*/*.h >& tmp.fil +count=`grep total tmp.fil` +echo $count " in lib/${sdir}" +done -echo "tests" -wc -l tests/*.cc | grep total +wc -l tests/*.cc | grep total >& tmp.fil +count=`grep total tmp.fil` +echo $count " in tests" -echo "benchmarks" -wc -l benchmarks/*.cc | grep total +wc -l benchmarks/*.cc | grep total >& tmp.fil +count=`grep total tmp.fil` +echo $count " in benchmarks" + +rm tmp.fil From 61770d447242dee8976c018631bc09721327bd2a Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 9 Jun 2015 22:40:12 +0100 Subject: [PATCH 17/24] Files update --- tests/Make.inc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/Make.inc b/tests/Make.inc index 2b132fd0..64ba4768 100644 --- a/tests/Make.inc +++ b/tests/Make.inc @@ -1,5 +1,5 @@ -bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cf_coarsen_support Test_cf_cr_unprec Test_contfrac_cg Test_contfrac_even_odd Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_gamma Test_main Test_multishift_sqrt Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd +bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cg Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_contfrac_cg Test_contfrac_even_odd Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_gamma Test_main Test_multishift_sqrt Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_GaugeAction_SOURCES=Test_GaugeAction.cc @@ -18,6 +18,14 @@ Test_cayley_even_odd_SOURCES=Test_cayley_even_odd.cc Test_cayley_even_odd_LDADD=-lGrid +Test_cayley_ldop_cg_SOURCES=Test_cayley_ldop_cg.cc +Test_cayley_ldop_cg_LDADD=-lGrid + + +Test_cayley_ldop_cr_SOURCES=Test_cayley_ldop_cr.cc +Test_cayley_ldop_cr_LDADD=-lGrid + + Test_cf_coarsen_support_SOURCES=Test_cf_coarsen_support.cc Test_cf_coarsen_support_LDADD=-lGrid From 708d4f7533c2c89cd7358bae073d672622e112ca Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 9 Jun 2015 22:40:58 +0100 Subject: [PATCH 18/24] g5 and g5R5 hermitian are now differentiated --- lib/qcd/action/fermion/g5HermitianLinop.h | 47 ++++++++++++++++++++++- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/lib/qcd/action/fermion/g5HermitianLinop.h b/lib/qcd/action/fermion/g5HermitianLinop.h index 5b3411ec..e5b691d8 100644 --- a/lib/qcd/action/fermion/g5HermitianLinop.h +++ b/lib/qcd/action/fermion/g5HermitianLinop.h @@ -6,10 +6,10 @@ namespace Grid { // Wrap an already herm matrix //////////////////////////////////////////////////////////////////// template -class Gamma5HermitianLinearOperator : public LinearOperatorBase { +class Gamma5R5HermitianLinearOperator : public LinearOperatorBase { Matrix &_Mat; public: - Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; + Gamma5R5HermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; void Op (const Field &in, Field &out){ HermOp(in,out); } @@ -45,5 +45,48 @@ public: } }; + +template +class Gamma5HermitianLinearOperator : public LinearOperatorBase { + Matrix &_Mat; + Gamma g5; +public: + Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat), g5(Gamma::Gamma5) {}; + void Op (const Field &in, Field &out){ + HermOp(in,out); + } + void AdjOp (const Field &in, Field &out){ + HermOp(in,out); + } + void OpDiag (const Field &in, Field &out) { + Field tmp(in._grid); + _Mat.Mdiag(in,tmp); + out=g5*tmp; + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + Field tmp(in._grid); + _Mat.Mdir(in,tmp,dir,disp); + out=g5*tmp; + } + + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + + HermOp(in,out); + + ComplexD dot; + dot= innerProduct(in,out); + n1=real(dot); + + dot = innerProduct(out,out); + n2=real(dot); + } + void HermOp(const Field &in, Field &out){ + Field tmp(in._grid); + _Mat.M(in,tmp); + out=g5*tmp; + } +}; + + }} #endif From 3a52bc4ce1a79fb75869e54a345c20748fccdc53 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 9 Jun 2015 22:41:27 +0100 Subject: [PATCH 19/24] G5R5 update --- tests/Test_dwf_cr_unprec.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Test_dwf_cr_unprec.cc b/tests/Test_dwf_cr_unprec.cc index cffa726c..6cd2b32a 100644 --- a/tests/Test_dwf_cr_unprec.cc +++ b/tests/Test_dwf_cr_unprec.cc @@ -55,7 +55,7 @@ int main (int argc, char ** argv) MdagMLinearOperator HermOp(Ddwf); MCR(HermOp,src,result); - Gamma5HermitianLinearOperator g5HermOp(Ddwf); + Gamma5R5HermitianLinearOperator g5HermOp(Ddwf); MCR(g5HermOp,src,result); From 4963f7356ac0973171c52fbe5bcb503fe77c1062 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 9 Jun 2015 22:41:59 +0100 Subject: [PATCH 20/24] 5d OpDir direction interface refers to the 5d dims, not 4d to present a sensible and consistent external interface. --- lib/qcd/action/fermion/WilsonFermion5D.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 7d9ffe22..7bbeeef9 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -72,6 +72,7 @@ namespace QCD { } void WilsonFermion5D::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu) { + assert(GaugeGrid()->_ndimension==4); conformable(Uds._grid,GaugeGrid()); conformable(Umu._grid,GaugeGrid()); LatticeColourMatrix U(GaugeGrid()); @@ -82,8 +83,10 @@ void WilsonFermion5D::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGau pokeIndex(Uds,U,mu+4); } } -void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp) +void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir5,int disp) { + int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil + // we drop off the innermost fifth dimension assert( (disp==1)||(disp==-1) ); assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t; From 4357f5acdc6038f9b157271eea0c536496053c93 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 9 Jun 2015 22:43:10 +0100 Subject: [PATCH 21/24] Some extra tests --- tests/Test_cayley_coarsen_support.cc | 99 ++++++++++++++++++++++------ 1 file changed, 80 insertions(+), 19 deletions(-) diff --git a/tests/Test_cayley_coarsen_support.cc b/tests/Test_cayley_coarsen_support.cc index 064f5f13..b68a5cc3 100644 --- a/tests/Test_cayley_coarsen_support.cc +++ b/tests/Test_cayley_coarsen_support.cc @@ -21,9 +21,7 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); - - - const int Ls=4; + const int Ls=8; GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); @@ -41,8 +39,10 @@ int main (int argc, char ** argv) std::vector seeds4({1,2,3,4}); std::vector seeds5({5,6,7,8}); - GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); - GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + std::vector cseeds({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); LatticeFermion src(FGrid); random(RNG5,src); LatticeFermion result(FGrid); result=zero; @@ -50,31 +50,33 @@ int main (int argc, char ** argv) LatticeFermion tmp(FGrid); LatticeFermion err(FGrid); LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + #if 0 std::vector U(4,UGrid); Umu=zero; Complex cone(1.0,0.0); for(int nn=0;nn2) { U[nn]=zero; std::cout << "zeroing gauge field in dir "<(Umu,U[nn],nn); } #endif + RealD mass=0.5; RealD M5=1.8; DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - Gamma5HermitianLinearOperator HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); HermIndefOp.Op(src,ref); HermIndefOp.OpDiag(src,result); for(int d=0;d<4;d++){ - HermIndefOp.OpDir(src,tmp,d,+1); result=result+tmp; + HermIndefOp.OpDir(src,tmp,d+1,+1); result=result+tmp; std::cout<<"dir "< subspace(nbasis,FGrid); - + LatticeFermion prom(FGrid); + for(int b=0;b LittleDiracOp(*Coarse5d); + typedef CoarsenedMatrix LittleDiracOperator; + typedef LittleDiracOperator::CoarseVector CoarseVector; + + LittleDiracOperator LittleDiracOp(*Coarse5d); LittleDiracOp.CoarsenOperator(FGrid,HermIndefOp,subspace); - - typedef Lattice > coarse_vec; - coarse_vec c_src (Coarse5d); c_src= zero; - coarse_vec c_res (Coarse5d); + CoarseVector c_src (Coarse5d); + CoarseVector c_res (Coarse5d); + CoarseVector c_proj(Coarse5d); - Complex one(1.0); - c_src = one; // 1 in every element for vector 1. - // TODO // -- promote from subspace, check we get the vector we wanted // -- apply ldop; check we get the same as inner product of M times big vec // -- pick blocks one by one. Evaluate matrix elements. + Complex one(1.0); + c_src = one; // 1 in every element for vector 1. + + blockPromote(c_src,err,subspace); + + + prom=zero; + for(int b=0;b Date: Tue, 9 Jun 2015 22:43:41 +0100 Subject: [PATCH 22/24] Remove extra layers of checks now it works --- lib/algorithms/CoarsenedMatrix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h index ba417be3..96a40fe7 100644 --- a/lib/algorithms/CoarsenedMatrix.h +++ b/lib/algorithms/CoarsenedMatrix.h @@ -244,7 +244,7 @@ namespace Grid { std::cout<< iProj < Date: Tue, 9 Jun 2015 22:50:45 +0100 Subject: [PATCH 23/24] Solver converges --- tests/Test_cayley_ldop_cg.cc | 136 +++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 tests/Test_cayley_ldop_cg.cc diff --git a/tests/Test_cayley_ldop_cg.cc b/tests/Test_cayley_ldop_cg.cc new file mode 100644 index 00000000..b146002d --- /dev/null +++ b/tests/Test_cayley_ldop_cg.cc @@ -0,0 +1,136 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +double lowpass(double x) +{ + return pow(x*x+1.0,-2); +} + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + Chebyshev filter(-150.0, 150.0,16, lowpass); + ofstream csv(std::string("filter.dat"),std::ios::out|std::ios::trunc); + filter.csv(csv); + csv.close(); + + const int Ls=8; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + // Construct a coarsened grid + std::vector clatt = GridDefaultLatt(); + for(int d=0;d seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + std::vector cseeds({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); + + LatticeFermion src(FGrid); gaussian(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeFermion ref(FGrid); ref=zero; + LatticeFermion tmp(FGrid); + LatticeFermion err(FGrid); + LatticeGaugeField Umu(UGrid); gaussian(RNG4,Umu); + + + //random(RNG4,Umu); + NerscField header; + std::string file("./ckpoint_lat.400"); + readNerscConfiguration(Umu,header,file); + +#if 0 + LatticeColourMatrix U(UGrid); + U=zero; + for(int nn=0;nn2) { + pokeIndex(Umu,U,nn); + } + } +#endif + RealD mass=0.1; + RealD M5=1.0; + + DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + + const int nbasis = 8; + std::vector subspace(nbasis,FGrid); + LatticeFermion noise(FGrid); + LatticeFermion ms(FGrid); + for(int b=0;b HermDefOp(Ddwf); + ConjugateGradient CG(1.0e-5,10000); + + for(int i=0;i<4;i++){ + + CG(HermDefOp,noise,subspace[b]); + noise = subspace[b]; + + scale = pow(norm2(noise),-0.5); + noise=noise*scale; + HermDefOp.HermOp(noise,ms); std::cout << "filt "< "< LittleDiracOperator; + typedef LittleDiracOperator::CoarseVector CoarseVector; + + LittleDiracOperator LittleDiracOp(*Coarse5d); + LittleDiracOp.CoarsenOperator(FGrid,HermIndefOp,subspace); + + CoarseVector c_src (Coarse5d); + CoarseVector c_res (Coarse5d); + gaussian(CRNG,c_src); + c_res=zero; + + std::cout << "Solving CG on coarse space "<< std::endl; + MdagMLinearOperator PosdefLdop(LittleDiracOp); + ConjugateGradient CG(1.0e-8,10000); + CG(PosdefLdop,c_src,c_res); + + std::cout << "Done "<< std::endl; + Grid_finalize(); +} From 5a53086e41d959140460cdc96cfca575547b3918 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 9 Jun 2015 22:51:02 +0100 Subject: [PATCH 24/24] Solver converges --- tests/Test_cayley_ldop_cr.cc | 138 +++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 tests/Test_cayley_ldop_cr.cc diff --git a/tests/Test_cayley_ldop_cr.cc b/tests/Test_cayley_ldop_cr.cc new file mode 100644 index 00000000..afa773b2 --- /dev/null +++ b/tests/Test_cayley_ldop_cr.cc @@ -0,0 +1,138 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +double lowpass(double x) +{ + return pow(x*x+1.0,-2); +} + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + Chebyshev filter(-150.0, 150.0,16, lowpass); + ofstream csv(std::string("filter.dat"),std::ios::out|std::ios::trunc); + filter.csv(csv); + csv.close(); + + const int Ls=8; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + // Construct a coarsened grid + std::vector clatt = GridDefaultLatt(); + for(int d=0;d seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + std::vector cseeds({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); + + LatticeFermion src(FGrid); gaussian(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeFermion ref(FGrid); ref=zero; + LatticeFermion tmp(FGrid); + LatticeFermion err(FGrid); + LatticeGaugeField Umu(UGrid); gaussian(RNG4,Umu); + + + //random(RNG4,Umu); + NerscField header; + std::string file("./ckpoint_lat.400"); + readNerscConfiguration(Umu,header,file); + +#if 0 + LatticeColourMatrix U(UGrid); + Complex cone(1.0,0.0); + for(int nn=0;nn(Umu,U,nn); + } + } +#endif + RealD mass=0.5; + RealD M5=1.8; + + DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + + const int nbasis = 8; + std::vector subspace(nbasis,FGrid); + LatticeFermion noise(FGrid); + LatticeFermion ms(FGrid); + for(int b=0;b HermDefOp(Ddwf); + ConjugateGradient CG(1.0e-5,10000); + + for(int i=0;i<4;i++){ + + CG(HermDefOp,noise,subspace[b]); + noise = subspace[b]; + + scale = pow(norm2(noise),-0.5); + noise=noise*scale; + HermDefOp.HermOp(noise,ms); std::cout << "filt "< "< LittleDiracOperator; + typedef LittleDiracOperator::CoarseVector CoarseVector; + + LittleDiracOperator LittleDiracOp(*Coarse5d); + LittleDiracOp.CoarsenOperator(FGrid,HermIndefOp,subspace); + + CoarseVector c_src (Coarse5d); + CoarseVector c_res (Coarse5d); + gaussian(CRNG,c_src); + c_res=zero; + + std::cout << "Solving MCR on coarse space "<< std::endl; + HermitianLinearOperator HermIndefLdop(LittleDiracOp); + ConjugateResidual MCR(1.0e-8,10000); + MCR(HermIndefLdop,c_src,c_res); + + std::cout << "Done "<< std::endl; + Grid_finalize(); +}