diff --git a/lib/AlignedAllocator.h b/lib/AlignedAllocator.h index fd7bbb78..3e90371a 100644 --- a/lib/AlignedAllocator.h +++ b/lib/AlignedAllocator.h @@ -49,9 +49,9 @@ public: void deallocate(pointer __p, size_type) { #ifdef HAVE_MM_MALLOC_H - _mm_free(__p); + _mm_free((void *)__p); #else - free(__p); + free((void *)__p); #endif } void construct(pointer __p, const _Tp& __val) { }; diff --git a/lib/Make.inc b/lib/Make.inc index 11362e27..dab78b37 100644 --- a/lib/Make.inc +++ b/lib/Make.inc @@ -1,4 +1,4 @@ -HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h +HFILES=./Cshift.h ./simd/Grid_avx.h ./simd/Grid_vector_types.h ./simd/Grid_sse4.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_vector_unops.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./Tensors.h ./Algorithms.h ./communicator/Communicator_base.h ./lattice/Lattice_rng.h ./lattice/Lattice_reduction.h ./lattice/Lattice_transfer.h ./lattice/Lattice_unary.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_comparison.h ./lattice/Lattice_overload.h ./lattice/Lattice_reality.h ./lattice/Lattice_local.h ./lattice/Lattice_conformable.h ./lattice/Lattice_where.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_ET.h ./lattice/Lattice_transpose.h ./lattice/Lattice_trace.h ./Stencil.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_poke.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_class.h ./tensors/Tensor_logical.h ./tensors/Tensor_transpose.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_outer.h ./tensors/Tensor_inner.h ./tensors/Tensor_traits.h ./tensors/Tensor_Ta.h ./tensors/Tensor_unary.h ./tensors/Tensor_determinant.h ./tensors/Tensor_peek.h ./tensors/Tensor_arith.h ./tensors/Tensor_extract_merge.h ./Communicator.h ./Cartesian.h ./parallelIO/NerscIO.h ./qcd/QCD.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/LinalgUtils.h ./qcd/utils/CovariantCshift.h ./qcd/utils/WilsonLoops.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/spin/TwoSpinor.h ./qcd/spin/Dirac.h ./cshift/Cshift_common.h ./cshift/Cshift_none.h ./cshift/Cshift_mpi.h ./Simd.h ./GridConfig.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_red_black.h ./cartesian/Cartesian_full.h ./AlignedAllocator.h ./Lattice.h ./Threads.h ./Grid.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Remez.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./algorithms/CoarsenedMatrix.h ./stencil/Lebesgue.h -CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc +CCFILES=./qcd/utils/SpaceTimeGrid.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/spin/Dirac.cc ./GridInit.cc ./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc diff --git a/lib/Simd.h b/lib/Simd.h index f93c3070..7da0d7ad 100644 --- a/lib/Simd.h +++ b/lib/Simd.h @@ -46,6 +46,18 @@ namespace Grid { inline ComplexF innerProduct(const ComplexF & l, const ComplexF & r) { return conjugate(l)*r; } inline RealD innerProduct(const RealD & l, const RealD & r) { return l*r; } inline RealF innerProduct(const RealF & l, const RealF & r) { return l*r; } + + inline ComplexD Reduce(const ComplexD& r){ return r; } + inline ComplexF Reduce(const ComplexF& r){ return r; } + inline RealD Reduce(const RealD& r){ return r; } + inline RealF Reduce(const RealF& r){ return r; } + + inline RealD toReal(const ComplexD& r){ return real(r); } + inline RealF toReal(const ComplexF& r){ return real(r); } + inline RealD toReal(const RealD& r){ return r; } + inline RealF toReal(const RealF& r){ return r; } + + //////////////////////////////////////////////////////////////////////////////// //Provide support functions for basic real and complex data types required by Grid diff --git a/lib/Tensors.h b/lib/Tensors.h index a2246f4a..90c9d7a8 100644 --- a/lib/Tensors.h +++ b/lib/Tensors.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index 3a84ed49..2d885d36 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -142,7 +142,10 @@ public: // Use a reduced simd grid _simd_layout[d] = simd_layout[d]; _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; - + + // all elements of a simd vector must have same checkerboard. + if ( simd_layout[d]>1 ) assert((_rdimensions[d]&0x1)==0); + _osites *= _rdimensions[d]; _isites *= _simd_layout[d]; diff --git a/lib/lattice/Lattice_ET.h b/lib/lattice/Lattice_ET.h index caa45e69..82d11218 100644 --- a/lib/lattice/Lattice_ET.h +++ b/lib/lattice/Lattice_ET.h @@ -1,4 +1,3 @@ - #ifndef GRID_LATTICE_ET_H #define GRID_LATTICE_ET_H diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index cd2fa855..0503732c 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -158,7 +158,7 @@ PARALLEL_FOR_LOOP vobj tmp = eval(ss,expr); vstream(_odata[ss] ,tmp); #else - _odata[ss]=_eval(ss,expr); + _odata[ss]=eval(ss,expr); #endif } }; @@ -196,7 +196,7 @@ PARALLEL_FOR_LOOP _odata.resize(_grid->oSites()); PARALLEL_FOR_LOOP for(int ss=0;ss<_grid->oSites();ss++){ - _odata[ss]=eval(ss,expr); + vstream(_odata[ss] ,eval(ss,expr)); } }; diff --git a/lib/lattice/Lattice_unary.h b/lib/lattice/Lattice_unary.h index 91b39007..6d72ef31 100644 --- a/lib/lattice/Lattice_unary.h +++ b/lib/lattice/Lattice_unary.h @@ -24,5 +24,18 @@ PARALLEL_FOR_LOOP return ret; } + template Lattice expMat(const Lattice &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); + } + return ret; + } + + + } #endif diff --git a/lib/qcd/utils/SUn.h b/lib/qcd/utils/SUn.h index 3a8b5f4d..0ede7965 100644 --- a/lib/qcd/utils/SUn.h +++ b/lib/qcd/utils/SUn.h @@ -1,7 +1,6 @@ #ifndef QCD_UTIL_SUN_H #define QCD_UTIL_SUN_H - namespace Grid { namespace QCD { @@ -13,6 +12,7 @@ public: static int su2subgroups(void) { return (ncolour*(ncolour-1))/2; } template using iSUnMatrix = iScalar > > ; + template using iSU2Matrix = iScalar > > ; ////////////////////////////////////////////////////////////////////////////////////////////////// // Types can be accessed as SU<2>::Matrix , SU<2>::vSUnMatrix, SU<2>::LatticeMatrix etc... @@ -29,6 +29,19 @@ public: typedef Lattice LatticeMatrixF; typedef Lattice LatticeMatrixD; + typedef iSU2Matrix SU2Matrix; + typedef iSU2Matrix SU2MatrixF; + typedef iSU2Matrix SU2MatrixD; + + typedef iSU2Matrix vSU2Matrix; + typedef iSU2Matrix vSU2MatrixF; + typedef iSU2Matrix vSU2MatrixD; + + typedef Lattice LatticeSU2Matrix; + typedef Lattice LatticeSU2MatrixF; + typedef Lattice LatticeSU2MatrixD; + + //////////////////////////////////////////////////////////////////////// // There are N^2-1 generators for SU(N). // @@ -122,6 +135,7 @@ public: RealD nrm = 1.0/std::sqrt(2.0*trsq); ta = ta *nrm; } + //////////////////////////////////////////////////////////////////////// // Map a su2 subgroup number to the pair of rows that are non zero //////////////////////////////////////////////////////////////////////// @@ -135,208 +149,333 @@ public: } i2=i1+1+spare; } - template - static void su2Extract(std::vector > > &r, - const Lattice > &source, - int su2_index) + + ////////////////////////////////////////////////////////////////////////////////////////// + // Pull out a subgroup and project on to real coeffs x pauli basis + ////////////////////////////////////////////////////////////////////////////////////////// + template + static void su2Extract( Lattice > &Determinant, + Lattice > &subgroup, + const Lattice > &source, + int su2_index) { GridBase *grid(source._grid); + conformable(subgroup,source); + conformable(subgroup,Determinant); + int i0,i1; + su2SubGroupIndex(i0,i1,su2_index); + +PARALLEL_FOR_LOOP + for(int ss=0;ss!=grid->oSites();ss++){ + subgroup._odata[ss]()()(0,0) = source._odata[ss]()()(i0,i0); + subgroup._odata[ss]()()(0,1) = source._odata[ss]()()(i0,i1); + subgroup._odata[ss]()()(1,0) = source._odata[ss]()()(i1,i0); + subgroup._odata[ss]()()(1,1) = source._odata[ss]()()(i1,i1); + + iSU2Matrix Sigma = subgroup._odata[ss]; + + Sigma = Sigma-adj(Sigma)+trace(adj(Sigma)); + + subgroup._odata[ss] = Sigma; + + // this should be purely real + Determinant._odata[ss] = Sigma()()(0,0)*Sigma()()(1,1) + - Sigma()()(0,1)*Sigma()()(1,0); - assert(r.size() == 4); // store in 4 real parts - - for(int i=0;i<4;i++){ - conformable(r[i],source); } - - int i1,i2; - su2SubGroupIndex(i1,i2,su2_index); - /* Compute the b(k) of A_SU(2) = b0 + i sum_k bk sigma_k */ - - // r[0] = toReal(real(peekColour(source,i1,i1)) + real(peekColour(source,i2,i2))); - // r[1] = toReal(imag(peekColour(source,i1,i2)) + imag(peekColour(source,i2,i1))); - // r[2] = toReal(real(peekColour(source,i1,i2)) - real(peekColour(source,i2,i1))); - // r[3] = toReal(imag(peekColour(source,i1,i1)) - imag(peekColour(source,i2,i2))); - r[0] = toReal(real(peekColour(source,i1,i1)) + real(peekColour(source,i2,i2))); - r[1] = toReal(imag(peekColour(source,i1,i2)) + imag(peekColour(source,i2,i1))); - r[2] = toReal(real(peekColour(source,i1,i2)) - real(peekColour(source,i2,i1))); - r[3] = toReal(imag(peekColour(source,i1,i1)) - imag(peekColour(source,i2,i2))); - } - template - static void su2Insert(const std::vector > > &r, - Lattice > &dest, + ////////////////////////////////////////////////////////////////////////////////////////// + // Set matrix to one and insert a pauli subgroup + ////////////////////////////////////////////////////////////////////////////////////////// + template + static void su2Insert( const Lattice > &subgroup, + Lattice > &dest, int su2_index) { - typedef typename Lattice >::scalar_type cplx; - typedef Lattice > Lcomplex; - GridBase * grid = dest._grid; + GridBase *grid(dest._grid); + conformable(subgroup,dest); + int i0,i1; + su2SubGroupIndex(i0,i1,su2_index); - assert(r.size() == 4); // store in 4 real parts - - Lcomplex tmp(grid); - - std::vector cr(4,grid); - for(int i=0;ioSites();ss++){ + dest._odata[ss]()()(i0,i0) = subgroup._odata[ss]()()(0,0); + dest._odata[ss]()()(i0,i1) = subgroup._odata[ss]()()(0,1); + dest._odata[ss]()()(i1,i0) = subgroup._odata[ss]()()(1,0); + dest._odata[ss]()()(i1,i1) = subgroup._odata[ss]()()(1,1); } - - int i1,i2; - su2SubGroupIndex(i1,i2,su2_index); - - cplx one (1.0,0.0); - cplx cplx_i(0.0,1.0); - - tmp = cr[0]*one+ cr[3]*cplx_i; pokeColour(dest,tmp,i1,i2); - tmp = cr[2]*one+ cr[1]*cplx_i; pokeColour(dest,tmp,i1,i2); - tmp = -cr[2]*one+ cr[1]*cplx_i; pokeColour(dest,tmp,i2,i1); - tmp = cr[0]*one- cr[3]*cplx_i; pokeColour(dest,tmp,i2,i2); } + + /////////////////////////////////////////////// + // Generate e^{ Re Tr Staple Link} dlink + // + // *** Note Staple should be appropriate linear compbination between all staples. + // *** If already by beta pass coefficient 1.0. + // *** This routine applies the additional 1/Nc factor that comes after trace in action. + // + /////////////////////////////////////////////// static void SubGroupHeatBath( GridSerialRNG &sRNG, GridParallelRNG &pRNG, - RealD beta, + RealD beta,//coeff multiplying staple in action (with no 1/Nc) LatticeMatrix &link, - const LatticeMatrix &staple, + const LatticeMatrix &barestaple, // multiplied by action coeffs so th int su2_subgroup, int nheatbath, - int& ntrials, - int& nfails, LatticeInteger &wheremask) { GridBase *grid = link._grid; - LatticeMatrix V(grid); - V = link*staple; - - std::vector r(4,grid); - std::vector a(4,grid); - su2Extract(r,V,su2_subgroup); // HERE - - LatticeReal r_l(grid); - r_l = r[0]*r[0]+r[1]*r[1]+r[2]*r[2]+r[3]*r[3]; - r_l = sqrt(r_l); - - LatticeReal ftmp(grid); - LatticeReal ftmp1(grid); - LatticeReal ftmp2(grid); - LatticeReal one (grid); one = 1.0; - LatticeReal zz (grid); zz = zero; - LatticeReal recip(grid); recip = one/r_l; - - Real machine_epsilon= 1.0e-14; - - ftmp = where(r_l>machine_epsilon,recip,one); - a[0] = where(r_l>machine_epsilon, r[0] * ftmp , one); - a[1] = where(r_l>machine_epsilon, -(r[1] * ftmp), zz); - a[2] = where(r_l>machine_epsilon, -(r[2] * ftmp), zz); - a[3] = where(r_l>machine_epsilon, -(r[3] * ftmp), zz); - - r_l *= beta / ncolour; - - ftmp1 = where(wheremask,one,zz); - Real num_sites = TensorRemove(sum(ftmp1)); - - Integer itrials = (Integer)num_sites; - ntrials = 0; - nfails = 0; - - LatticeInteger lupdate(grid); - LatticeInteger lbtmp(grid); - LatticeInteger lbtmp2(grid); lbtmp2=zero; - - int n_done = 0; - int nhb = 0; - - r[0] = a[0]; - lupdate = 1; - - LatticeReal ones (grid); ones = 1.0; - LatticeReal zeros(grid); zeros=zero; - + int ntrials=0; + int nfails=0; const RealD twopi=2.0*M_PI; - while ( nhb < nheatbath && n_done < num_sites ) { - ntrials += itrials; + LatticeMatrix staple(grid); - random(pRNG,r[1]); - std::cout<<"RANDOM SPARSE FLOAT r[1]"< b(4,grid); - b[0] = r[0]*a[0] - r[1]*a[1] - r[2]*a[2] - r[3]*a[3]; - b[1] = r[0]*a[1] + r[1]*a[0] - r[2]*a[3] + r[3]*a[2]; - b[2] = r[0]*a[2] + r[2]*a[0] - r[3]*a[1] + r[1]*a[3]; - b[3] = r[0]*a[3] + r[3]*a[0] - r[1]*a[2] + r[2]*a[1]; + Tr h Sigma = h_i Sigma_j Tr (sigma_i sigma_j) = h_i Sigma_j 2 delta_ij + Re Tr h Sigma = 2 h_j Re Sigma_j - // - // Now fill an SU(3) matrix V with the SU(2) submatrix su2_index - // parametrized by a_k in the sigma matrix basis. - // +Normalised re Sigma_j = xi u_j + +With u_j a unit vector and U can be in SU(2); + +Re Tr h Sigma = 2 h_j Re Sigma_j = 2 xi (h.u) + +4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag] + u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag] + + xi = sqrt(Det)/2; + +Write a= u h in SU(2); a has pauli decomp a_j; + +Note: Product b' xi is unvariant because scaling Sigma leaves + normalised vector "u" fixed; Can rescale Sigma so b' = 1. + */ + + //////////////////////////////////////////////////////// + // Real part of Pauli decomposition + // Note a subgroup can project to zero in cold start + //////////////////////////////////////////////////////// + su2Extract(udet,u,V,su2_subgroup); + + ////////////////////////////////////////////////////// + // Normalising this vector if possible; else identity + ////////////////////////////////////////////////////// + LatticeComplex xi(grid); + + LatticeSU2Matrix lident(grid); + + SU2Matrix ident = Complex(1.0); + SU2Matrix pauli1; SU<2>::generator(0,pauli1); + SU2Matrix pauli2; SU<2>::generator(1,pauli2); + SU2Matrix pauli3; SU<2>::generator(2,pauli3); + pauli1 = timesI(pauli1)*2.0; + pauli2 = timesI(pauli2)*2.0; + pauli3 = timesI(pauli3)*2.0; + + LatticeComplex cone(grid); + LatticeReal adet(grid); + adet = abs(toReal(udet)); + lident=Complex(1.0); + cone =Complex(1.0); + Real machine_epsilon=1.0e-7; + u = where(adet>machine_epsilon,u,lident); + udet= where(adet>machine_epsilon,udet,cone); + + xi = 0.5*sqrt(udet); //4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag] + u = 0.5*u*pow(xi,-1.0); // u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag] + + // Debug test for sanity + uinv=adj(u); + b=u*uinv-1.0; + assert(norm2(b)<1.0e-4); + + /* +Measure: Haar measure dh has d^4a delta(1-|a^2|) +In polars: + da = da0 r^2 sin theta dr dtheta dphi delta( 1 - r^2 -a0^2) + = da0 r^2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r)(sqrt(1-a0^) + r) ) + = da0 r/2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r) ) + +Action factor Q(h) dh = e^-S[h] dh = e^{ xi Tr uh} dh // beta enters through xi + = e^{2 xi (h.u)} dh + = e^{2 xi h0u0}.e^{2 xi h1u1}.e^{2 xi h2u2}.e^{2 xi h3u3} dh + +Therefore for each site, take xi for that site +i) generate |a0|<1 with dist + (1-a0^2)^0.5 e^{2 xi a0 } da0 + +Take alpha = 2 xi = 2 xi [ recall 2 beta/Nc unmod staple norm]; hence 2.0/Nc factor in Chroma ] +A. Generate two uniformly distributed pseudo-random numbers R and R', R'', R''' in the unit interval; +B. Set X = -(ln R)/alpha, X' =-(ln R')/alpha; +C. Set C = cos^2(2pi R"), with R" another uniform random number in [0,1] ; +D. Set A = XC; +E. Let d = X'+A; +F. If R'''^2 :> 1 - 0.5 d, go back to A; +G. Set a0 = 1 - d; + +Note that in step D setting B ~ X - A and using B in place of A in step E will generate a second independent a 0 value. + */ + + ///////////////////////////////////////////////////////// + // count the number of sites by picking "1"'s out of hat + ///////////////////////////////////////////////////////// + Integer hit=0; + LatticeReal rtmp(grid); + rtmp=where(wheremask,rones,rzeros); + RealD numSites = sum(rtmp); + RealD numAccepted; + LatticeInteger Accepted(grid); Accepted = zero; + LatticeInteger newlyAccepted(grid); + + std::vector xr(4,grid); + std::vector a(4,grid); + LatticeReal d(grid); d=zero; + LatticeReal alpha(grid); + + // std::cout<<"xi "< 1 - 0.5 d, go back to A; + LatticeReal thresh(grid); thresh = 1.0-d*0.5; + xrsq = xr[0]*xr[0]; + LatticeInteger ione(grid); ione = 1; + LatticeInteger izero(grid); izero=zero; + + newlyAccepted = where ( xrsq < thresh,ione,izero); + Accepted = where ( newlyAccepted, newlyAccepted,Accepted); + Accepted = where ( wheremask, Accepted,izero); + + // FIXME need an iSum for integer to avoid overload on return type?? + rtmp=where(Accepted,rones,rzeros); + numAccepted = sum(rtmp); + + hit++; + + } while ( (numAccepted < numSites) && ( hit < nheatbath) ); + + // G. Set a0 = 1 - d; + a[0]=zero; + a[0]=where(wheremask,1.0-d,a[0]); + + ////////////////////////////////////////// + // ii) generate a_i uniform on two sphere radius (1-a0^2)^0.5 + ////////////////////////////////////////// + + LatticeReal a123mag(grid); + a123mag = sqrt(abs(1.0-a[0]*a[0])); + + LatticeReal cos_theta (grid); + LatticeReal sin_theta (grid); + LatticeReal phi (grid); + + random(pRNG,phi); phi = phi * twopi; // uniform in [0,2pi] + random(pRNG,cos_theta); cos_theta=(cos_theta*2.0)-1.0; // uniform in [-1,1] + sin_theta = sqrt(abs(1.0-cos_theta*cos_theta)); + + a[1] = a123mag * sin_theta * cos(phi); + a[2] = a123mag * sin_theta * sin(phi); + a[3] = a123mag * cos_theta; + + ua = toComplex(a[0])*ident + + toComplex(a[1])*pauli1 + + toComplex(a[2])*pauli2 + + toComplex(a[3])*pauli3; + + b = 1.0; + b = where(wheremask,uinv*ua,b); su2Insert(b,V,su2_subgroup); - // U = V*U - LatticeMatrix tmp(grid); - tmp = V * link; + //mask the assignment back based on Accptance + link = where(Accepted,V * link,link); - //mask the assignment back - link = where(wheremask,tmp,link); + ////////////////////////////// + // Debug Checks + // SU2 check + LatticeSU2Matrix check(grid); // rotated matrix after hb + u = zero; + check = ua * adj(ua) - 1.0; + check = where(Accepted,check,u); + assert(norm2(check)<1.0e-4); + check = b*adj(b)-1.0; + check = where(Accepted,check,u); + assert(norm2(check)<1.0e-4); + + LatticeMatrix Vcheck(grid); + Vcheck = zero; + Vcheck = where(Accepted,V*adj(V) - 1.0,Vcheck); + // std::cout << "SU3 check " < &U) { LatticeComplex sitePlaq(U[0]._grid); + Plaq=zero; for(int mu=1;mu U(4,Umu._grid); + for(int mu=0;mu(Umu,mu); } @@ -72,13 +74,15 @@ public: ////////////////////////////////////////////////// static void Staple(GaugeMat &staple,const GaugeLorentz &Umu,int mu){ - std::vector U(4,Umu._grid); + GridBase *grid = Umu._grid; + + std::vector U(4,grid); for(int d=0;d(Umu,d); } staple = zero; - GaugeMat tmp(U[0]._grid); + GaugeMat tmp(grid); for(int nu=0;nu +// Time-stamp: <2015-06-16 23:30:41 neo> //---------------------------------------------------------------------- #include @@ -248,7 +248,7 @@ namespace Optimization { return _mm256_set_m128i(a1,a0); #endif #if defined (AVX2) - return _mm256_mul_epi32(a,b); + return _mm256_mullo_epi32(a,b); #endif } diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index 3a54c0b3..62516201 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -4,7 +4,7 @@ Using intrinsics */ -// Time-stamp: <2015-06-09 14:24:01 neo> +// Time-stamp: <2015-06-16 23:27:54 neo> //---------------------------------------------------------------------- #include @@ -97,7 +97,7 @@ namespace Optimization { } // Integer inline __m128i operator()(Integer *a){ - return _mm_set_epi32(a[0],a[1],a[2],a[3]); + return _mm_set_epi32(a[3],a[2],a[1],a[0]); } @@ -181,7 +181,7 @@ namespace Optimization { } // Integer inline __m128i operator()(__m128i a, __m128i b){ - return _mm_mul_epi32(a,b); + return _mm_mullo_epi32(a,b); } }; diff --git a/lib/simd/Grid_vector_unops.h b/lib/simd/Grid_vector_unops.h index 3ba77d82..37d1f6de 100644 --- a/lib/simd/Grid_vector_unops.h +++ b/lib/simd/Grid_vector_unops.h @@ -100,7 +100,7 @@ namespace Grid { } template < class S, class V > inline Grid_simd sin(const Grid_simd &r) { - return SimdApply(CosRealFunctor(),r); + return SimdApply(SinRealFunctor(),r); } template < class S, class V > inline Grid_simd log(const Grid_simd &r) { diff --git a/lib/tensors/Tensor_Ta.h b/lib/tensors/Tensor_Ta.h index f06dbf04..e49ede35 100644 --- a/lib/tensors/Tensor_Ta.h +++ b/lib/tensors/Tensor_Ta.h @@ -1,5 +1,7 @@ #ifndef GRID_MATH_TA_H #define GRID_MATH_TA_H + + namespace Grid { /////////////////////////////////////////////// @@ -36,7 +38,8 @@ namespace Grid { /////////////////////////////////////////////// - // ProjectOnGroup function for scalar, vector, matrix + // ProjectOnGroup function for scalar, vector, matrix + // Projects on orthogonal, unitary group /////////////////////////////////////////////// @@ -59,19 +62,22 @@ namespace Grid { { // need a check for the group type? iMatrix ret(arg); - double nrm; + RealD nrm; + vtype inner; for(int c1=0;c1 inline iScalar Exponentiate(const iScalar&r, double alpha, int Nexp) - { - iScalar ret; - ret._internal = Exponentiate(r._internal, alpha, Nexp); - return ret; - } - - - template::TensorLevel == 0 >::type * =nullptr> - inline iMatrix Exponentiate(const iMatrix &arg, double alpha, int Nexp) - { - iMatrix unit(1.0); - iMatrix temp(unit); - - for(int i=Nexp; i>=1;--i){ - temp *= alpha/double(i); - temp = unit + temp*arg; - } - return ProjectOnGroup(temp); - } - - } #endif diff --git a/lib/tensors/Tensor_determinant.h b/lib/tensors/Tensor_determinant.h new file mode 100644 index 00000000..7c8af9ed --- /dev/null +++ b/lib/tensors/Tensor_determinant.h @@ -0,0 +1,45 @@ +#ifndef GRID_MATH_DET_H +#define GRID_MATH_DET_H +namespace Grid { + /////////////////////////////////////////////// + // Determinant function for scalar, vector, matrix + /////////////////////////////////////////////// + inline ComplexF Determinant( const ComplexF &arg){ return arg;} + inline ComplexD Determinant( const ComplexD &arg){ return arg;} + inline RealF Determinant( const RealF &arg){ return arg;} + inline RealD Determinant( const RealD &arg){ return arg;} + + template inline auto Determinant(const iScalar&r) -> iScalar + { + iScalar ret; + ret._internal = Determinant(r._internal); + return ret; + } + + template::TensorLevel == 0 >::type * =nullptr> + inline iScalar Determinant(const iMatrix &arg) + { + iMatrix ret(arg); + iScalar det = vtype(1.0); + /* Conversion of matrix to upper triangular */ + for(int i = 0; i < N; i++){ + for(int j = 0; j < N; j++){ + if(j>i){ + vtype ratio = ret._internal[j][i]/ret._internal[i][i]; + for(int k = 0; k < N; k++){ + ret._internal[j][k] -= ratio * ret._internal[i][k]; + } + } + } + } + + for(int i = 0; i < N; i++) + det *= ret._internal[i][i]; + + return det; + } + + + +} +#endif diff --git a/lib/tensors/Tensor_exp.h b/lib/tensors/Tensor_exp.h new file mode 100644 index 00000000..f8437116 --- /dev/null +++ b/lib/tensors/Tensor_exp.h @@ -0,0 +1,37 @@ +#ifndef GRID_MATH_EXP_H +#define GRID_MATH_EXP_H + +#define DEFAULT_MAT_EXP 12 + +namespace Grid { + + /////////////////////////////////////////////// + // Exponentiate function for scalar, vector, matrix + /////////////////////////////////////////////// + + + template inline iScalar Exponentiate(const iScalar&r, ComplexD alpha , Integer Nexp = DEFAULT_MAT_EXP) + { + iScalar ret; + ret._internal = Exponentiate(r._internal, alpha, Nexp); + return ret; + } + + + template::TensorLevel == 0 >::type * =nullptr> + inline iMatrix Exponentiate(const iMatrix &arg, ComplexD alpha , Integer Nexp = DEFAULT_MAT_EXP ) + { + iMatrix unit(1.0); + iMatrix temp(unit); + + for(int i=Nexp; i>=1;--i){ + temp *= alpha/ComplexD(i); + temp = unit + temp*arg; + } + return ProjectOnGroup(temp);//maybe not strictly necessary + } + + + +} +#endif diff --git a/lib/tensors/Tensor_extract_merge.h b/lib/tensors/Tensor_extract_merge.h index cea9bb64..21ffd778 100644 --- a/lib/tensors/Tensor_extract_merge.h +++ b/lib/tensors/Tensor_extract_merge.h @@ -57,7 +57,7 @@ inline void extract(typename std::enable_if::value, const v extracted[i]=buf[i*s]; for(int ii=1;ii pseeds({1,2,3,4,5}); // once I caught a fish alive - std::vector sseeds({6,7,8,9,10});// then i let it go again - GridParallelRNG pRNG(grid); pRNG.SeedFixedIntegers(pseeds); - GridSerialRNG sRNG; sRNG.SeedFixedIntegers(sseeds); - - // SU3 colour operatoions - LatticeColourMatrix link(grid); - LatticeColourMatrix staple(grid); - int mu=0; - - // Get Staple - ColourWilsonLoops::Staple(staple,Umu,mu); - // Get Link - link = peekIndex(Umu,mu); - - // Apply heatbath to the link - RealD beta=6.0; - int subgroup=0; - int nhb=1; - int trials=0; - int fails=0; - - LatticeInteger one(rbGrid); one = 1; // fill with ones - LatticeInteger mask(grid); mask= zero; - one.checkerboard=Even; - setCheckerboard(mask,one); - - // update Even checkerboard - - SU3::SubGroupHeatBath(sRNG,pRNG,beta,link,staple,subgroup, - nhb,trials,fails,mask); - - Grid_finalize(); } diff --git a/tests/Test_main.cc b/tests/Test_main.cc index daa6c3aa..8eecf0c3 100644 --- a/tests/Test_main.cc +++ b/tests/Test_main.cc @@ -1,9 +1,5 @@ #include "Grid.h" -//DEBUG -#ifdef SSE4 -#include "simd/Grid_vector_types.h" -#endif using namespace std; using namespace Grid; @@ -216,16 +212,27 @@ int main (int argc, char ** argv) scm=transposeIndex<1>(scm); - //random(SerialRNG, cm); - //std::cout << cm << std::endl; + random(SerialRNG, cm); + std::cout << cm << std::endl; cm = Ta(cm); - //TComplex tracecm= trace(cm); - //std::cout << cm << " "<< tracecm << std::endl; + TComplex tracecm= trace(cm); + std::cout << cm << std::endl; + + + cm = Exponentiate(cm, 1.0, 12); + std::cout << cm << " " << std::endl; + Complex det = Determinant(cm); + std::cout << "determinant: " << det << std::endl; cm = ProjectOnGroup(cm); + std::cout << cm << " " << std::endl; + cm = ProjectOnGroup(cm); + std::cout << cm << " " << std::endl; + + det = Determinant(cm); + std::cout << "determinant: " << det << std::endl; - cm = Exponentiate(cm, 1.0, 10); // Foo = Foo+scalar; // LatticeColourMatrix+Scalar // Foo = Foo*scalar; // LatticeColourMatrix*Scalar @@ -237,6 +244,9 @@ int main (int argc, char ** argv) LatticeComplex trscMat(&Fine); trscMat = trace(scMat); // Trace + // Exponentiate test + cMat = expMat(cMat, ComplexD(1.0, 0.0)); + // LatticeComplex trlcMat(&Fine); // trlcMat = trace(lcMat); // Trace involving iVector - now generates error @@ -296,10 +306,12 @@ int main (int argc, char ** argv) LatticeInteger coor(&Fine); LatticeCoordinate(coor,d); lex = lex + coor*mm[d]; + } Bar = zero; Bar = where(lex coor(4); for(coor[3]=0;coor[3]