diff --git a/lib/Make.inc b/lib/Make.inc index ba35e25f..069e818d 100644 --- a/lib/Make.inc +++ b/lib/Make.inc @@ -1,4 +1,4 @@ -HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h +HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc diff --git a/lib/Simd.h b/lib/Simd.h index 26edb4c9..f93c3070 100644 --- a/lib/Simd.h +++ b/lib/Simd.h @@ -35,6 +35,8 @@ namespace Grid { inline RealD conjugate(const RealD & r){ return r; } inline RealD real(const RealD & r){ return r; } + inline RealD sqrt(const RealD & r){ return std::sqrt(r); } + inline ComplexD conjugate(const ComplexD& r){ return(conj(r)); } inline ComplexD adj(const ComplexD& r){ return(conjugate(r)); } inline ComplexF conjugate(const ComplexF& r ){ return(conj(r)); } @@ -112,6 +114,7 @@ namespace Grid { }; #include +#include namespace Grid { // Default precision diff --git a/lib/Tensors.h b/lib/Tensors.h index fa0ca7a9..e585b116 100644 --- a/lib/Tensors.h +++ b/lib/Tensors.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #endif diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h new file mode 100644 index 00000000..54ea279e --- /dev/null +++ b/lib/algorithms/CoarsenedMatrix.h @@ -0,0 +1,146 @@ +#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H +#define GRID_ALGORITHM_COARSENED_MATRIX_H + +#include + +namespace Grid { + + class Geometry { + public: + int npoint; + int dimension; + std::vector directions ; + std::vector displacements; + + Geometry(int _d) : dimension(_d), npoint(2*_d+1), directions(npoint), displacements(npoint) { + for(int d=0;d GetDelta(int point) { + std::vector delta(dimension,0); + delta[directions[point]] = displacements[point]; + return delta; + }; + + }; + + // Fine Object == (per site) type of fine field + // nbasis == number of deflation vectors + template + class CoarsenedMatrix : public SparseMatrixBase > > { + public: + + typedef iVector siteVector; + typedef Lattice > CoarseVector; + typedef Lattice > CoarseMatrix; + + typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + //////////////////// + // Data members + //////////////////// + Geometry geom; + GridBase * _grid; + CartesianStencil Stencil; + std::vector A; + std::vector > comm_buf; + + /////////////////////// + // Interface + /////////////////////// + GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know + + RealD M (const CoarseVector &in, CoarseVector &out){ + + SimpleCompressor compressor; + Stencil.HaloExchange(in,comm_buf,compressor); + + //PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + siteVector res = zero; + siteVector tmp; + siteVector nbr; + + int offset,local,perm; + for(int point=0;point > &linop,std::vector > & subspace){ + + FineField phi(FineGrid); + FineField Mphi(FineGrid); + CoarseVector Proj(Grid()); + CoarseScalar InnerProd(Grid()); + + // Orthogonalise the subblocks over the basis + blockOrthogonalise(InnerProd,subspace); + + // Compute the matrix elements of linop between this orthonormal + // set of vectors. + for(int i=0;ioSites();ss++){ + for(int j=0;j #include #include +#include #include diff --git a/lib/lattice/Lattice_local.h b/lib/lattice/Lattice_local.h index 7b8bdf9b..ad7a1ff5 100644 --- a/lib/lattice/Lattice_local.h +++ b/lib/lattice/Lattice_local.h @@ -25,8 +25,7 @@ PARALLEL_FOR_LOOP // localInnerProduct template - inline auto localInnerProduct (const Lattice &lhs,const Lattice &rhs) - -> Lattice + inline auto localInnerProduct (const Lattice &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); PARALLEL_FOR_LOOP diff --git a/lib/lattice/Lattice_reality.h b/lib/lattice/Lattice_reality.h index 4ad6fb7b..8c8bc17a 100644 --- a/lib/lattice/Lattice_reality.h +++ b/lib/lattice/Lattice_reality.h @@ -27,9 +27,9 @@ PARALLEL_FOR_LOOP return ret; }; - template inline auto real(const Lattice &z) -> Lattice + template inline auto real(const Lattice &z) -> Lattice { - Lattice ret(z._grid); + Lattice ret(z._grid); PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss++){ ret._odata[ss] = real(z._odata[ss]); @@ -37,9 +37,9 @@ PARALLEL_FOR_LOOP return ret; } - template inline auto imag(const Lattice &z) -> Lattice + template inline auto imag(const Lattice &z) -> Lattice { - Lattice ret(z._grid); + Lattice ret(z._grid); PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss++){ ret._odata[ss] = imag(z._odata[ss]); diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index f0046b0e..871ed893 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -56,10 +56,10 @@ PARALLEL_FOR_LOOP } -template -inline void projectBlockBasis(Lattice > &coarseData, - const Lattice &fineData, - const std::vector > &Basis) +template +inline void blockProject(Lattice > &coarseData, + const Lattice &fineData, + const std::vector > &Basis) { GridBase * fine = fineData._grid; GridBase * coarse= coarseData._grid; @@ -69,13 +69,14 @@ inline void projectBlockBasis(Lattice > &coarseData, assert( nbasis == Basis.size() ); subdivides(coarse,fine); for(int i=0;i block_r (_ndimension); for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; + assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]); } coarseData=zero; @@ -92,20 +93,149 @@ inline void projectBlockBasis(Lattice > &coarseData, for(int i=0;i +inline void blockZAXPY(Lattice &fineZ, + const Lattice &coarseA, + const Lattice &fineX, + const Lattice &fineY) +{ + GridBase * fine = fineZ._grid; + GridBase * coarse= coarseA._grid; + + fineZ.checkerboard=fineX.checkerboard; + subdivides(coarse,fine); // require they map + conformable(fineX,fineY); + conformable(fineX,fineZ); + + int _ndimension = coarse->_ndimension; + std::vector block_r (_ndimension); + + // FIXME merge with subdivide checking routine as this is redundant + for(int d=0 ; d<_ndimension;d++){ + block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; + assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]); + } + +PARALLEL_FOR_LOOP + for(int sf=0;sfoSites();sf++){ + + int sc; + std::vector coor_c(_ndimension); + std::vector coor_f(_ndimension); + + GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + + // z = A x + y + fineZ._odata[sf]=coarseA._odata[sc]*fineX._odata[sf]+fineY._odata[sf]; + + } + + return; +} +template + inline void blockInnerProduct(Lattice &CoarseInner, + const Lattice &fineX, + const Lattice &fineY) +{ + typedef decltype(innerProduct(fineX._odata[0],fineY._odata[0])) dotp; + + GridBase *coarse(CoarseInner._grid); + GridBase *fine (fineX._grid); + + Lattice fine_inner(fine); + Lattice coarse_inner(coarse); + + fine_inner = localInnerProduct(fineX,fineY); + blockSum(coarse_inner,fine_inner); + for(int ss=0;ssoSites();ss++){ + CoarseInner._odata[ss] = coarse_inner._odata[ss]; + } +} +template +inline void blockNormalise(Lattice &ip,Lattice &fineX) +{ + GridBase *coarse = ip._grid; + blockInnerProduct(ip,fineX,fineX); + ip = rsqrt(ip); + blockZAXPY(fineX,ip,fineX,fineX); +} +// useful in multigrid project; +// Generic name : Coarsen? +template +inline void blockSum(Lattice &coarseData,const Lattice &fineData) +{ + GridBase * fine = fineData._grid; + GridBase * coarse= coarseData._grid; + + subdivides(coarse,fine); // require they map + + int _ndimension = coarse->_ndimension; + + std::vector block_r (_ndimension); + + for(int d=0 ; d<_ndimension;d++){ + block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; + } + + coarseData=zero; + for(int sf=0;sfoSites();sf++){ + + int sc; + std::vector coor_c(_ndimension); + std::vector coor_f(_ndimension); + + GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + + coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; + + } + return; } -template -inline void promoteBlockBasis(const Lattice > &coarseData, - Lattice &fineData, - const std::vector > &Basis) +template +inline void blockOrthogonalise(Lattice &ip,std::vector > &Basis) +{ + GridBase *coarse = ip._grid; + GridBase *fine = Basis[0]._grid; + + int nbasis = Basis.size() ; + int _ndimension = coarse->_ndimension; + + // checks + subdivides(coarse,fine); + for(int i=0;i (Basis[v],ip,Basis[u],Basis[v]); + } + blockNormalise(ip,Basis[v]); + } +} + +template +inline void blockPromote(const Lattice > &coarseData, + Lattice &fineData, + const std::vector > &Basis) { GridBase * fine = fineData._grid; GridBase * coarse= coarseData._grid; @@ -146,40 +276,6 @@ inline void promoteBlockBasis(const Lattice > &coarseD } -// useful in multigrid project; -// Generic name : Coarsen? -template -inline void sumBlocks(Lattice &coarseData,const Lattice &fineData) -{ - GridBase * fine = fineData._grid; - GridBase * coarse= coarseData._grid; - - subdivides(coarse,fine); // require they map - - int _ndimension = coarse->_ndimension; - - std::vector block_r (_ndimension); - - for(int d=0 ; d<_ndimension;d++){ - block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; - } - - coarseData=zero; - for(int sf=0;sfoSites();sf++){ - - int sc; - std::vector coor_c(_ndimension); - std::vector coor_f(_ndimension); - - GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - - coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; - - } - return; -} } #endif diff --git a/lib/lattice/Lattice_unary.h b/lib/lattice/Lattice_unary.h new file mode 100644 index 00000000..131d7d68 --- /dev/null +++ b/lib/lattice/Lattice_unary.h @@ -0,0 +1,32 @@ +#ifndef GRID_LATTICE_UNARY_H +#define GRID_LATTICE_UNARY_H + +namespace Grid { + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // avoid copy back routines for mult, mac, sub, add + ////////////////////////////////////////////////////////////////////////////////////////////////////// + template Lattice sqrt(const Lattice &rhs){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=sqrt(rhs._odata[ss]); + } + return ret; + } + template Lattice rsqrt(const Lattice &rhs){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=rsqrt(rhs._odata[ss]); + } + return ret; + } + + +} +#endif diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index e47ff331..7db67dd7 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -165,6 +165,27 @@ namespace QCD { } } + void CayleyFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){ + LatticeFermion tmp(psi._grid); + // Assemble the 5d matrix + for(int s=0;s(in,comm_buf,compressor); + + assert( (disp==1)||(disp==-1) ); + + int skip = (disp==1) ? 0 : 1; + + int dirdisp = dir+skip*4; + +PARALLEL_FOR_LOOP + for(int sss=0;sssoSites();sss++){ + DiracOpt::DhopDir(Stencil,Umu,comm_buf,sss,sss,in,out,dirdisp); + } + +}; void WilsonFermion::DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField & U, const LatticeFermion &in, LatticeFermion &out,int dag) diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index 6040d328..60726fdd 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -26,7 +26,7 @@ namespace Grid { void MeooeDag (const LatticeFermion &in, LatticeFermion &out); virtual void Mooee (const LatticeFermion &in, LatticeFermion &out); // remain virtual so we virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out); // can derive Clover - virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); // from Wilson bas + virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); // from Wilson base virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); // non-hermitian hopping term; half cb or both @@ -34,6 +34,10 @@ namespace Grid { void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag); void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag); + // Multigrid assistance + void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp); + void DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp); + /////////////////////////////////////////////////////////////// // Extra methods added by derived /////////////////////////////////////////////////////////////// diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index d22701b0..4ddd14ab 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -82,6 +82,28 @@ void WilsonFermion5D::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGau pokeIndex(Uds,U,mu+4); } } +void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp) +{ + assert( (disp==1)||(disp==-1) ); + + WilsonCompressor compressor(DaggerNo); + Stencil.HaloExchange(in,comm_buf,compressor); + + int skip = (disp==1) ? 0 : 1; + + int dirdisp = dir+skip*4; + +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + for(int s=0;s > &buf, + int sF,int sU,const LatticeFermion &in, LatticeFermion &out,int dirdisp) +{ + vHalfSpinColourVector tmp; + vHalfSpinColourVector chi; + vSpinColourVector result; + vHalfSpinColourVector Uchi; + int offset,local,perm, ptype; + int ss=sF; + + offset = st._offsets [dirdisp][ss]; + local = st._is_local[dirdisp][ss]; + perm = st._permute[dirdisp][ss]; + ptype = st._permute_type[dirdisp]; + + // Xp + if(dirdisp==Xp){ + if ( local && perm ) { + spProjXp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjXp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Xp),&chi()); + spReconXp(result,Uchi); + } + + // Yp + if ( dirdisp==Yp ){ + if ( local && perm ) { + spProjYp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjYp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Yp),&chi()); + spReconYp(result,Uchi); + } + + // Zp + if ( dirdisp ==Zp ){ + if ( local && perm ) { + spProjZp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjZp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Zp),&chi()); + spReconZp(result,Uchi); + } + + // Tp + if ( dirdisp ==Tp ){ + if ( local && perm ) { + spProjTp(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjTp(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Tp),&chi()); + spReconTp(result,Uchi); + } + + // Xm + if ( dirdisp==Xm ){ + if ( local && perm ) { + spProjXm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjXm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Xm),&chi()); + spReconXm(result,Uchi); + } + + // Ym + if ( dirdisp == Ym ){ + if ( local && perm ) { + spProjYm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjYm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Ym),&chi()); + spReconYm(result,Uchi); + } + + // Zm + if ( dirdisp == Zm ){ + if ( local && perm ) { + spProjZm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjZm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Zm),&chi()); + spReconZm(result,Uchi); + } + + // Tm + if ( dirdisp==Tm ) { + if ( local && perm ) { + spProjTm(tmp,in._odata[offset]); + permute(chi,tmp,ptype); + } else if ( local ) { + spProjTm(chi,in._odata[offset]); + } else { + chi=buf[offset]; + } + mult(&Uchi(),&U._odata[sU](Tm),&chi()); + spReconTm(result,Uchi); + } + + vstream(out._odata[ss],result*(-0.5)); +} + }} diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index cc535022..db384ddf 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -20,6 +20,9 @@ namespace Grid { static void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U, std::vector > &buf, int sF,int sU,const LatticeFermion &in, LatticeFermion &out); + static void DhopDir(CartesianStencil &st,LatticeDoubledGaugeField &U, + std::vector > &buf, + int sF,int sU,const LatticeFermion &in, LatticeFermion &out,int dirdisp); }; diff --git a/lib/qcd/action/fermion/g5HermitianLinop.h b/lib/qcd/action/fermion/g5HermitianLinop.h index 9a8cc2eb..5b3411ec 100644 --- a/lib/qcd/action/fermion/g5HermitianLinop.h +++ b/lib/qcd/action/fermion/g5HermitianLinop.h @@ -11,11 +11,22 @@ class Gamma5HermitianLinearOperator : public LinearOperatorBase { public: Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; void Op (const Field &in, Field &out){ - _Mat.M(in,out); + HermOp(in,out); } void AdjOp (const Field &in, Field &out){ - _Mat.M(in,out); + HermOp(in,out); } + void OpDiag (const Field &in, Field &out) { + Field tmp(in._grid); + _Mat.Mdiag(in,tmp); + G5R5(out,tmp); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + Field tmp(in._grid); + _Mat.Mdir(in,tmp,dir,disp); + G5R5(out,tmp); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ HermOp(in,out); diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h index ee4d03d5..ba023146 100644 --- a/lib/simd/Grid_avx.h +++ b/lib/simd/Grid_avx.h @@ -314,9 +314,9 @@ namespace Optimization { template<> inline Grid::ComplexF Reduce::operator()(__m256 in){ __m256 v1,v2; - Optimization::permute(v1,in,0); // sse 128; paired complex single + Optimization::permute(v1,in,0); // avx 256; quad complex single v1 = _mm256_add_ps(v1,in); - Optimization::permute(v2,v1,1); // avx 256; quad complex single + Optimization::permute(v2,v1,1); v1 = _mm256_add_ps(v1,v2); u256f conv; conv.v = v1; return Grid::ComplexF(conv.f[0],conv.f[1]); @@ -367,7 +367,6 @@ namespace Optimization { assert(0); } - } ////////////////////////////////////////////////////////////////////////////////////// diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index aa1abb47..9525e45e 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -78,11 +78,18 @@ namespace Grid { typedef typename RealPart < Scalar_type >::type Real; typedef Vector_type vector_type; typedef Scalar_type scalar_type; + + + typedef union conv_t_union { + Vector_type v; + Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)]; + conv_t_union(){}; + } conv_t; + Vector_type v; static inline int Nsimd(void) { return sizeof(Vector_type)/sizeof(Scalar_type);} - Grid_simd& operator=(const Grid_simd&& rhs){v=rhs.v;return *this;}; Grid_simd& operator=(const Grid_simd& rhs){v=rhs.v;return *this;}; //faster than not declaring it and leaving to the compiler @@ -192,6 +199,27 @@ namespace Grid { return *this; } + + /////////////////////////////////////// + // Not all functions are supported + // through SIMD and must breakout to + // scalar type and back again. This + // provides support + /////////////////////////////////////// + + template friend inline Grid_simd SimdApply (const functor &func,const Grid_simd &v) { + + Grid_simd ret; + Grid_simd::conv_t conv; + + conv.v = v.v; + for(int i=0;i struct SqrtRealFunctor { + scalar operator()(const scalar &a) const { + return sqrt(real(a)); + } + }; + + template struct RSqrtRealFunctor { + scalar operator()(const scalar &a) const { + return scalar(1.0/sqrt(real(a))); + } + }; + + template struct CosRealFunctor { + scalar operator()(const scalar &a) const { + return cos(real(a)); + } + }; + + template struct SinRealFunctor { + scalar operator()(const scalar &a) const { + return sin(real(a)); + } + }; + + template struct PowRealFunctor { + double y; + PowRealFunctor(double _y) : y(_y) {}; + scalar operator()(const scalar &a) const { + return pow(real(a),y); + } + }; + + template < class S, class V > + inline Grid_simd sqrt(const Grid_simd &r) { + return SimdApply(SqrtRealFunctor(),r); + } + template < class S, class V > + inline Grid_simd rsqrt(const Grid_simd &r) { + return SimdApply(RSqrtRealFunctor(),r); + } + template < class S, class V > + inline Grid_simd cos(const Grid_simd &r) { + return SimdApply(CosRealFunctor(),r); + } + template < class S, class V > + inline Grid_simd sin(const Grid_simd &r) { + return SimdApply(CosRealFunctor(),r); + } + template < class S, class V > + inline Grid_simd pow(const Grid_simd &r,double y) { + return SimdApply(PowRealFunctor(y),r); + } + +} +#endif diff --git a/lib/tensors/Tensor_class.h b/lib/tensors/Tensor_class.h index bd1902b8..3ed4f865 100644 --- a/lib/tensors/Tensor_class.h +++ b/lib/tensors/Tensor_class.h @@ -123,6 +123,13 @@ public: typedef iScalar tensor_reduced; typedef iVector scalar_object; + template::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iVector + { + zeroit(*this); + for(int i=0;i::TensorLevel + 1}; iVector(const Zero &z){ *this = zero; }; diff --git a/lib/tensors/Tensor_unary.h b/lib/tensors/Tensor_unary.h new file mode 100644 index 00000000..1fbec985 --- /dev/null +++ b/lib/tensors/Tensor_unary.h @@ -0,0 +1,37 @@ +#ifndef GRID_TENSOR_UNARY_H +#define GRID_TENSOR_UNARY_H +namespace Grid { + +#define UNARY_REAL(func)\ +template inline auto func(const iScalar &z) -> iScalar\ +{\ + iScalar ret;\ + ret._internal = func( (z._internal));\ + return ret;\ +}\ +template inline auto func(const iVector &z) -> iVector\ +{\ + iVector ret;\ + for(int c1=0;c1 inline auto func(const iMatrix &z) -> iMatrix\ +{\ + iMatrix ret;\ + for(int c1=0;c1 + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + + const int Ls=8; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + // Construct a coarsened grid + std::vector clatt = GridDefaultLatt(); + for(int d=0;d seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeFermion ref(FGrid); ref=zero; + LatticeFermion tmp(FGrid); + LatticeFermion err(FGrid); + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + + std::vector U(4,UGrid); + + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.5; + RealD M5=1.8; + + DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + Gamma5HermitianLinearOperator HermIndefOp(Ddwf); + + HermIndefOp.Op(src,ref); + HermIndefOp.OpDiag(src,result); + + for(int d=0;d<4;d++){ + HermIndefOp.OpDir(src,tmp,d,+1); result=result+tmp; + std::cout<<"dir "< subspace(nbasis,FGrid); + + for(int b=0;b LittleDiracOp(*Coarse5d); + + LittleDiracOp.CoarsenOperator(FGrid,HermIndefOp,subspace); + + typedef Lattice > coarse_vec; + + coarse_vec c_src (Coarse5d); c_src= zero; + coarse_vec c_res (Coarse5d); + + Complex one(1.0); + c_src = one; // 1 in every element for vector 1. + + // TODO + // -- promote from subspace, check we get the vector we wanted + // -- apply ldop; check we get the same as inner product of M times big vec + // -- pick blocks one by one. Evaluate matrix elements. + + std::cout << "Multiplying by LittleDiracOp "<< std::endl; + LittleDiracOp.M(c_src,c_res); + + std::cout << "Done "<< std::endl; + Grid_finalize(); +} diff --git a/tests/Test_cf_coarsen_support.cc b/tests/Test_cf_coarsen_support.cc new file mode 100644 index 00000000..9d81a798 --- /dev/null +++ b/tests/Test_cf_coarsen_support.cc @@ -0,0 +1,87 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=9; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeFermion ref(FGrid); ref=zero; + LatticeFermion tmp(FGrid); + LatticeFermion err(FGrid); + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + + std::vector U(4,UGrid); + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.1; + RealD M5=1.8; + + { + OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + HermitianLinearOperator HermIndefOp(Dcf); + + HermIndefOp.Op(src,ref); + HermIndefOp.OpDiag(src,result); + + for(int d=0;d<4;d++){ + HermIndefOp.OpDir(src,tmp,d,+1); result=result+tmp; + std::cout<<"dir "< HermIndefOp(Dpf); + + HermIndefOp.Op(src,ref); + HermIndefOp.OpDiag(src,result); + + for(int d=0;d<4;d++){ + HermIndefOp.OpDir(src,tmp,d,+1); result=result+tmp; + std::cout<<"dir "< + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=9; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + + std::vector U(4,UGrid); + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.1; + RealD M5=1.8; + + OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + + ConjugateResidual MCR(1.0e-8,10000); + + MdagMLinearOperator HermPosDefOp(Dcf); + MCR(HermPosDefOp,src,result); + + HermitianLinearOperator HermIndefOp(Dcf); + MCR(HermIndefOp,src,result); + + Grid_finalize(); +} diff --git a/tests/Test_dwf_cr_unprec.cc b/tests/Test_dwf_cr_unprec.cc new file mode 100644 index 00000000..cffa726c --- /dev/null +++ b/tests/Test_dwf_cr_unprec.cc @@ -0,0 +1,63 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + + const int Ls=8; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + + std::vector U(4,UGrid); + + for(int mu=0;mu(Umu,mu); + } + + ConjugateResidual MCR(1.0e-8,10000); + + RealD mass=0.5; + RealD M5=1.8; + DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + + MdagMLinearOperator HermOp(Ddwf); + MCR(HermOp,src,result); + + Gamma5HermitianLinearOperator g5HermOp(Ddwf); + MCR(g5HermOp,src,result); + + + Grid_finalize(); +} diff --git a/tests/Test_nersc_io.cc b/tests/Test_nersc_io.cc index 80d78291..403d2cab 100644 --- a/tests/Test_nersc_io.cc +++ b/tests/Test_nersc_io.cc @@ -83,7 +83,7 @@ int main (int argc, char ** argv) Complex l = TensorRemove(Tl); std::cout << "calculated link trace " < + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + + std::vector seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); + + LatticeFermion src(&Grid); random(pRNG,src); + RealD nrm = norm2(src); + LatticeFermion result(&Grid); result=zero; + LatticeGaugeField Umu(&Grid); random(pRNG,Umu); + + std::vector U(4,&Grid); + + double volume=1; + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.5; + WilsonFermion Dw(Umu,Grid,RBGrid,mass); + + MdagMLinearOperator HermOp(Dw); + + ConjugateResidual MCR(1.0e-8,10000); + + MCR(HermOp,src,result); + + Grid_finalize(); +}