From 1e5b015ee3d8cb268aae10f943b10985a211f2b8 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 9 Jun 2015 10:26:19 +0100 Subject: [PATCH] Some unary ops and coarse grid support --- lib/Grid.h | 1 - lib/Make.inc | 2 +- lib/algorithms/CoarsenedMatrix.h | 134 +++++++++++++++--- lib/lattice/Lattice_base.h | 4 +- .../Lattice_comparison_utils.h} | 2 - lib/lattice/Lattice_coordinate.h | 14 +- lib/lattice/Lattice_transfer.h | 23 ++- lib/lattice/Lattice_unary.h | 41 ++++++ lib/qcd/action/fermion/WilsonFermion5D.cc | 4 + lib/simd/Grid_vector_unops.h | 12 ++ lib/tensors/Tensor_class.h | 9 +- lib/tensors/Tensor_unary.h | 29 ++++ scripts/linecount | 11 +- tests/Test_cayley_coarsen_support.cc | 19 ++- tests/Test_cshift.cc | 1 - 15 files changed, 258 insertions(+), 48 deletions(-) rename lib/{Comparison.h => lattice/Lattice_comparison_utils.h} (98%) diff --git a/lib/Grid.h b/lib/Grid.h index 16530434..aec0e9ac 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -55,7 +55,6 @@ #include // subdir aggregate #include // subdir aggregate #include // subdir aggregate -#include #include // subdir aggregate #include // subdir aggregate #include // subdir aggregate diff --git a/lib/Make.inc b/lib/Make.inc index c38606e2..7c7e02c8 100644 --- a/lib/Make.inc +++ b/lib/Make.inc @@ -1,4 +1,4 @@ -HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h +HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h index 54ea279e..006da07f 100644 --- a/lib/algorithms/CoarsenedMatrix.h +++ b/lib/algorithms/CoarsenedMatrix.h @@ -6,12 +6,42 @@ namespace Grid { class Geometry { + // int dimension; public: int npoint; - int dimension; std::vector directions ; std::vector displacements; + std::vector opdirs; + // FIXME -- don't like xposing the operator directions + // as different to the geometrical dirs + // Also don't like special casing five dim.. should pass an object in template + Geometry(int _d) { + + int base = (_d==5) ? 1:0; + + // make coarse grid stencil for 4d , not 5d + if ( _d==5 ) _d=4; + + npoint = 2*_d+1; + directions.resize(npoint); + displacements.resize(npoint); + opdirs.resize(npoint); + for(int d=0;d<_d;d++){ + directions[2*d ] = d+base; + directions[2*d+1] = d+base; + opdirs[2*d ] = d; + opdirs[2*d+1] = d; + displacements[2*d ] = +1; + displacements[2*d+1] = -1; + } + directions [2*_d]=0; + displacements[2*_d]=0; + opdirs [2*_d]=0; + } + + /* + // Original cleaner code Geometry(int _d) : dimension(_d), npoint(2*_d+1), directions(npoint), displacements(npoint) { for(int d=0;d GetDelta(int point) { std::vector delta(dimension,0); delta[directions[point]] = displacements[point]; return delta; }; + */ }; @@ -50,7 +80,10 @@ namespace Grid { Geometry geom; GridBase * _grid; CartesianStencil Stencil; + std::vector A; + std::vector Aslow; + std::vector > comm_buf; /////////////////////// @@ -63,7 +96,7 @@ namespace Grid { SimpleCompressor compressor; Stencil.HaloExchange(in,comm_buf,compressor); - //PARALLEL_FOR_LOOP +PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss++){ siteVector res = zero; siteVector tmp; @@ -101,43 +134,112 @@ namespace Grid { _grid(&CoarseGrid), geom(CoarseGrid._ndimension), Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements), - A(geom.npoint,&CoarseGrid) + A(geom.npoint,&CoarseGrid), + Aslow(geom.npoint,&CoarseGrid) { comm_buf.resize(Stencil._unified_buffer_size); }; void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase > &linop,std::vector > & subspace){ - FineField phi(FineGrid); - FineField Mphi(FineGrid); - CoarseVector Proj(Grid()); + FineField iblock(FineGrid); // contributions from within this block + FineField oblock(FineGrid); // contributions from outwith this block + + FineField phi(FineGrid); + FineField tmp(FineGrid); + FineField zz(FineGrid); zz=zero; + FineField Mphi(FineGrid); + + Lattice > coor(FineGrid); + + CoarseVector iProj(Grid()); + CoarseVector oProj(Grid()); CoarseScalar InnerProd(Grid()); // Orthogonalise the subblocks over the basis blockOrthogonalise(InnerProd,subspace); + blockProject(iProj,subspace[0],subspace); // Compute the matrix elements of linop between this orthonormal // set of vectors. + int self_stencil=-1; + for(int p=0;p_rdimensions[dir])/(Grid()->_rdimensions[dir]); - blockProject(Proj,Mphi,subspace); + LatticeCoordinate(coor,dir); - for(int ss=0;ssoSites();ss++){ - for(int j=0;joSites();ss++){ + for(int j=0;j bc(FineGrid->_ndimension,0); + + blockPick(Grid(),phi,tmp,bc); // Pick out a block + linop.Op(tmp,Mphi); // Apply big dop + blockProject(iProj,Mphi,subspace); // project it and print it + std::cout<< " Computed matrix elements from block zero only "< #include #include +#include +#include #include +#include #include #include #include - #endif diff --git a/lib/Comparison.h b/lib/lattice/Lattice_comparison_utils.h similarity index 98% rename from lib/Comparison.h rename to lib/lattice/Lattice_comparison_utils.h index ecd6ece0..d5e3b3e5 100644 --- a/lib/Comparison.h +++ b/lib/lattice/Lattice_comparison_utils.h @@ -141,7 +141,5 @@ namespace Grid { } } -#include -#include #endif diff --git a/lib/lattice/Lattice_coordinate.h b/lib/lattice/Lattice_coordinate.h index c49aed89..a83dbe49 100644 --- a/lib/lattice/Lattice_coordinate.h +++ b/lib/lattice/Lattice_coordinate.h @@ -3,20 +3,8 @@ namespace Grid { - /* - depbase=`echo Grid_main.o | sed 's|[^/]*$|.deps/&|;s|\.o$||'`;\ - icpc -DHAVE_CONFIG_H -I. -I../lib -I../lib -mmic -O3 -std=c++11 -fopenmp -MT Grid_main.o -MD -MP -MF $depbase.Tpo -c -o Grid_main.o Grid_main.cc &&\ - mv -f $depbase.Tpo $depbase.Po - ../lib/lattice/Grid_lattice_coordinate.h(25): error: no suitable user-defined conversion from "vector_type" to "const Grid::iScalar>>" exists - l._odata[o]=vI; - ^ - detected during instantiation of "void Grid::LatticeCoordinate(Grid::Lattice &, int) [with iobj=Grid::QCD::vTInteger]" at line 283 of "Grid_main.cc" - - compilation aborted for Grid_main.cc (code 2) -*/ template inline void LatticeCoordinate(Lattice &l,int mu) { - typedef typename iobj::scalar_object scalar_object; typedef typename iobj::scalar_type scalar_type; typedef typename iobj::vector_type vector_type; @@ -33,7 +21,7 @@ namespace Grid { mergebuf[i]=(Integer)gcoor[mu]; } merge(vI,mergebuf); - l._odata[o]._internal._internal._internal=vI; + l._odata[o]=vI; } }; diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 871ed893..12993680 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -166,9 +166,10 @@ template inline void blockNormalise(Lattice &ip,Lattice &fineX) { GridBase *coarse = ip._grid; + Lattice zz(fineX._grid); zz=zero; blockInnerProduct(ip,fineX,fineX); ip = rsqrt(ip); - blockZAXPY(fineX,ip,fineX,fineX); + blockZAXPY(fineX,ip,fineX,zz); } // useful in multigrid project; // Generic name : Coarsen? @@ -205,6 +206,26 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) return; } +template +inline void blockPick(GridBase *coarse,const Lattice &unpicked,Lattice &picked,std::vector coor) +{ + GridBase * fine = unpicked._grid; + + Lattice zz(fine); + Lattice > fcoor(fine); + + zz = zero; + + picked = unpicked; + for(int d=0;d_ndimension;d++){ + LatticeCoordinate(fcoor,d); + int block= fine->_rdimensions[d] / coarse->_rdimensions[d]; + int lo = (coor[d])*block; + int hi = (coor[d]+1)*block; + picked = where( (fcoor=lo), picked, zz); + } +} template inline void blockOrthogonalise(Lattice &ip,std::vector > &Basis) diff --git a/lib/lattice/Lattice_unary.h b/lib/lattice/Lattice_unary.h index 131d7d68..fcdcc484 100644 --- a/lib/lattice/Lattice_unary.h +++ b/lib/lattice/Lattice_unary.h @@ -26,7 +26,48 @@ PARALLEL_FOR_LOOP } return ret; } + template Lattice sin(const Lattice &rhs){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=sin(rhs._odata[ss]); + } + return ret; + } + template Lattice cos(const Lattice &rhs){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=cos(rhs._odata[ss]); + } + return ret; + } + template Lattice pow(const Lattice &rhs,RealD y){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=pow(rhs._odata[ss],y); + } + return ret; + } + template Lattice mod(const Lattice &rhs,Integer y){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=mod(rhs._odata[ss],y); + } + return ret; + } + } #endif diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 4ddd14ab..7d9ffe22 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -85,6 +85,7 @@ void WilsonFermion5D::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGau void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp) { assert( (disp==1)||(disp==-1) ); + assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t; WilsonCompressor compressor(DaggerNo); Stencil.HaloExchange(in,comm_buf,compressor); @@ -93,6 +94,9 @@ void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int int dirdisp = dir+skip*4; + assert(dirdisp<=7); + assert(dirdisp>=0); + PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss++){ for(int s=0;s struct ModIntFunctor { + Integer y; + ModIntFunctor(Integer _y) : y(_y) {}; + scalar operator()(const scalar &a) const { + return Integer(a)%y; + } + }; + template < class S, class V > inline Grid_simd sqrt(const Grid_simd &r) { return SimdApply(SqrtRealFunctor(),r); @@ -55,6 +63,10 @@ namespace Grid { inline Grid_simd pow(const Grid_simd &r,double y) { return SimdApply(PowRealFunctor(y),r); } + template < class S, class V > + inline Grid_simd mod(const Grid_simd &r,Integer y) { + return SimdApply(ModIntFunctor(y),r); + } } #endif diff --git a/lib/tensors/Tensor_class.h b/lib/tensors/Tensor_class.h index 3ed4f865..019e6fb1 100644 --- a/lib/tensors/Tensor_class.h +++ b/lib/tensors/Tensor_class.h @@ -90,10 +90,10 @@ public: operator ComplexD () const { return(TensorRemove(_internal)); }; operator RealD () const { return(real(TensorRemove(_internal))); } - // convert from a something to a scalar - template::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iScalar + // convert from a something to a scalar via constructor of something arg + template::value, T>::type* = nullptr > strong_inline iScalar operator = (T arg) { - _internal = vtype(arg); + _internal = arg; return *this; } @@ -316,7 +316,8 @@ public: stream< inline auto func(const iMatrix &z) -> iMatrix inline iScalar func(const iScalar &z,scal y) \ +{\ + iScalar ret;\ + ret._internal = func(z._internal,y); \ + return ret;\ +}\ + template inline iVector func(const iVector &z,scal y) \ +{\ + iVector ret;\ + for(int c1=0;c1 inline iMatrix func(const iMatrix &z, scal y) \ +{\ + iMatrix ret;\ + for(int c1=0;c1 U(4,UGrid); - - for(int mu=0;mu(Umu,mu); + Umu=zero; + Complex cone(1.0,0.0); + for(int nn=0;nn(Umu,U[nn],nn); } - +#endif RealD mass=0.5; RealD M5=1.8; @@ -75,7 +80,7 @@ int main (int argc, char ** argv) err = result-ref; std::cout<<"Error "< subspace(nbasis,FGrid); for(int b=0;b