diff --git a/lib/AlignedAllocator.h b/lib/AlignedAllocator.h index fd7bbb78..3e90371a 100644 --- a/lib/AlignedAllocator.h +++ b/lib/AlignedAllocator.h @@ -49,9 +49,9 @@ public: void deallocate(pointer __p, size_type) { #ifdef HAVE_MM_MALLOC_H - _mm_free(__p); + _mm_free((void *)__p); #else - free(__p); + free((void *)__p); #endif } void construct(pointer __p, const _Tp& __val) { }; diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index 3a84ed49..2d885d36 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -142,7 +142,10 @@ public: // Use a reduced simd grid _simd_layout[d] = simd_layout[d]; _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; - + + // all elements of a simd vector must have same checkerboard. + if ( simd_layout[d]>1 ) assert((_rdimensions[d]&0x1)==0); + _osites *= _rdimensions[d]; _isites *= _simd_layout[d]; diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index cd2fa855..0503732c 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -158,7 +158,7 @@ PARALLEL_FOR_LOOP vobj tmp = eval(ss,expr); vstream(_odata[ss] ,tmp); #else - _odata[ss]=_eval(ss,expr); + _odata[ss]=eval(ss,expr); #endif } }; @@ -196,7 +196,7 @@ PARALLEL_FOR_LOOP _odata.resize(_grid->oSites()); PARALLEL_FOR_LOOP for(int ss=0;ss<_grid->oSites();ss++){ - _odata[ss]=eval(ss,expr); + vstream(_odata[ss] ,eval(ss,expr)); } }; diff --git a/lib/qcd/utils/SUn.h b/lib/qcd/utils/SUn.h index 3a8b5f4d..0ede7965 100644 --- a/lib/qcd/utils/SUn.h +++ b/lib/qcd/utils/SUn.h @@ -1,7 +1,6 @@ #ifndef QCD_UTIL_SUN_H #define QCD_UTIL_SUN_H - namespace Grid { namespace QCD { @@ -13,6 +12,7 @@ public: static int su2subgroups(void) { return (ncolour*(ncolour-1))/2; } template using iSUnMatrix = iScalar > > ; + template using iSU2Matrix = iScalar > > ; ////////////////////////////////////////////////////////////////////////////////////////////////// // Types can be accessed as SU<2>::Matrix , SU<2>::vSUnMatrix, SU<2>::LatticeMatrix etc... @@ -29,6 +29,19 @@ public: typedef Lattice LatticeMatrixF; typedef Lattice LatticeMatrixD; + typedef iSU2Matrix SU2Matrix; + typedef iSU2Matrix SU2MatrixF; + typedef iSU2Matrix SU2MatrixD; + + typedef iSU2Matrix vSU2Matrix; + typedef iSU2Matrix vSU2MatrixF; + typedef iSU2Matrix vSU2MatrixD; + + typedef Lattice LatticeSU2Matrix; + typedef Lattice LatticeSU2MatrixF; + typedef Lattice LatticeSU2MatrixD; + + //////////////////////////////////////////////////////////////////////// // There are N^2-1 generators for SU(N). // @@ -122,6 +135,7 @@ public: RealD nrm = 1.0/std::sqrt(2.0*trsq); ta = ta *nrm; } + //////////////////////////////////////////////////////////////////////// // Map a su2 subgroup number to the pair of rows that are non zero //////////////////////////////////////////////////////////////////////// @@ -135,208 +149,333 @@ public: } i2=i1+1+spare; } - template - static void su2Extract(std::vector > > &r, - const Lattice > &source, - int su2_index) + + ////////////////////////////////////////////////////////////////////////////////////////// + // Pull out a subgroup and project on to real coeffs x pauli basis + ////////////////////////////////////////////////////////////////////////////////////////// + template + static void su2Extract( Lattice > &Determinant, + Lattice > &subgroup, + const Lattice > &source, + int su2_index) { GridBase *grid(source._grid); + conformable(subgroup,source); + conformable(subgroup,Determinant); + int i0,i1; + su2SubGroupIndex(i0,i1,su2_index); + +PARALLEL_FOR_LOOP + for(int ss=0;ss!=grid->oSites();ss++){ + subgroup._odata[ss]()()(0,0) = source._odata[ss]()()(i0,i0); + subgroup._odata[ss]()()(0,1) = source._odata[ss]()()(i0,i1); + subgroup._odata[ss]()()(1,0) = source._odata[ss]()()(i1,i0); + subgroup._odata[ss]()()(1,1) = source._odata[ss]()()(i1,i1); + + iSU2Matrix Sigma = subgroup._odata[ss]; + + Sigma = Sigma-adj(Sigma)+trace(adj(Sigma)); + + subgroup._odata[ss] = Sigma; + + // this should be purely real + Determinant._odata[ss] = Sigma()()(0,0)*Sigma()()(1,1) + - Sigma()()(0,1)*Sigma()()(1,0); - assert(r.size() == 4); // store in 4 real parts - - for(int i=0;i<4;i++){ - conformable(r[i],source); } - - int i1,i2; - su2SubGroupIndex(i1,i2,su2_index); - /* Compute the b(k) of A_SU(2) = b0 + i sum_k bk sigma_k */ - - // r[0] = toReal(real(peekColour(source,i1,i1)) + real(peekColour(source,i2,i2))); - // r[1] = toReal(imag(peekColour(source,i1,i2)) + imag(peekColour(source,i2,i1))); - // r[2] = toReal(real(peekColour(source,i1,i2)) - real(peekColour(source,i2,i1))); - // r[3] = toReal(imag(peekColour(source,i1,i1)) - imag(peekColour(source,i2,i2))); - r[0] = toReal(real(peekColour(source,i1,i1)) + real(peekColour(source,i2,i2))); - r[1] = toReal(imag(peekColour(source,i1,i2)) + imag(peekColour(source,i2,i1))); - r[2] = toReal(real(peekColour(source,i1,i2)) - real(peekColour(source,i2,i1))); - r[3] = toReal(imag(peekColour(source,i1,i1)) - imag(peekColour(source,i2,i2))); - } - template - static void su2Insert(const std::vector > > &r, - Lattice > &dest, + ////////////////////////////////////////////////////////////////////////////////////////// + // Set matrix to one and insert a pauli subgroup + ////////////////////////////////////////////////////////////////////////////////////////// + template + static void su2Insert( const Lattice > &subgroup, + Lattice > &dest, int su2_index) { - typedef typename Lattice >::scalar_type cplx; - typedef Lattice > Lcomplex; - GridBase * grid = dest._grid; + GridBase *grid(dest._grid); + conformable(subgroup,dest); + int i0,i1; + su2SubGroupIndex(i0,i1,su2_index); - assert(r.size() == 4); // store in 4 real parts - - Lcomplex tmp(grid); - - std::vector cr(4,grid); - for(int i=0;ioSites();ss++){ + dest._odata[ss]()()(i0,i0) = subgroup._odata[ss]()()(0,0); + dest._odata[ss]()()(i0,i1) = subgroup._odata[ss]()()(0,1); + dest._odata[ss]()()(i1,i0) = subgroup._odata[ss]()()(1,0); + dest._odata[ss]()()(i1,i1) = subgroup._odata[ss]()()(1,1); } - - int i1,i2; - su2SubGroupIndex(i1,i2,su2_index); - - cplx one (1.0,0.0); - cplx cplx_i(0.0,1.0); - - tmp = cr[0]*one+ cr[3]*cplx_i; pokeColour(dest,tmp,i1,i2); - tmp = cr[2]*one+ cr[1]*cplx_i; pokeColour(dest,tmp,i1,i2); - tmp = -cr[2]*one+ cr[1]*cplx_i; pokeColour(dest,tmp,i2,i1); - tmp = cr[0]*one- cr[3]*cplx_i; pokeColour(dest,tmp,i2,i2); } + + /////////////////////////////////////////////// + // Generate e^{ Re Tr Staple Link} dlink + // + // *** Note Staple should be appropriate linear compbination between all staples. + // *** If already by beta pass coefficient 1.0. + // *** This routine applies the additional 1/Nc factor that comes after trace in action. + // + /////////////////////////////////////////////// static void SubGroupHeatBath( GridSerialRNG &sRNG, GridParallelRNG &pRNG, - RealD beta, + RealD beta,//coeff multiplying staple in action (with no 1/Nc) LatticeMatrix &link, - const LatticeMatrix &staple, + const LatticeMatrix &barestaple, // multiplied by action coeffs so th int su2_subgroup, int nheatbath, - int& ntrials, - int& nfails, LatticeInteger &wheremask) { GridBase *grid = link._grid; - LatticeMatrix V(grid); - V = link*staple; - - std::vector r(4,grid); - std::vector a(4,grid); - su2Extract(r,V,su2_subgroup); // HERE - - LatticeReal r_l(grid); - r_l = r[0]*r[0]+r[1]*r[1]+r[2]*r[2]+r[3]*r[3]; - r_l = sqrt(r_l); - - LatticeReal ftmp(grid); - LatticeReal ftmp1(grid); - LatticeReal ftmp2(grid); - LatticeReal one (grid); one = 1.0; - LatticeReal zz (grid); zz = zero; - LatticeReal recip(grid); recip = one/r_l; - - Real machine_epsilon= 1.0e-14; - - ftmp = where(r_l>machine_epsilon,recip,one); - a[0] = where(r_l>machine_epsilon, r[0] * ftmp , one); - a[1] = where(r_l>machine_epsilon, -(r[1] * ftmp), zz); - a[2] = where(r_l>machine_epsilon, -(r[2] * ftmp), zz); - a[3] = where(r_l>machine_epsilon, -(r[3] * ftmp), zz); - - r_l *= beta / ncolour; - - ftmp1 = where(wheremask,one,zz); - Real num_sites = TensorRemove(sum(ftmp1)); - - Integer itrials = (Integer)num_sites; - ntrials = 0; - nfails = 0; - - LatticeInteger lupdate(grid); - LatticeInteger lbtmp(grid); - LatticeInteger lbtmp2(grid); lbtmp2=zero; - - int n_done = 0; - int nhb = 0; - - r[0] = a[0]; - lupdate = 1; - - LatticeReal ones (grid); ones = 1.0; - LatticeReal zeros(grid); zeros=zero; - + int ntrials=0; + int nfails=0; const RealD twopi=2.0*M_PI; - while ( nhb < nheatbath && n_done < num_sites ) { - ntrials += itrials; + LatticeMatrix staple(grid); - random(pRNG,r[1]); - std::cout<<"RANDOM SPARSE FLOAT r[1]"< b(4,grid); - b[0] = r[0]*a[0] - r[1]*a[1] - r[2]*a[2] - r[3]*a[3]; - b[1] = r[0]*a[1] + r[1]*a[0] - r[2]*a[3] + r[3]*a[2]; - b[2] = r[0]*a[2] + r[2]*a[0] - r[3]*a[1] + r[1]*a[3]; - b[3] = r[0]*a[3] + r[3]*a[0] - r[1]*a[2] + r[2]*a[1]; + Tr h Sigma = h_i Sigma_j Tr (sigma_i sigma_j) = h_i Sigma_j 2 delta_ij + Re Tr h Sigma = 2 h_j Re Sigma_j - // - // Now fill an SU(3) matrix V with the SU(2) submatrix su2_index - // parametrized by a_k in the sigma matrix basis. - // +Normalised re Sigma_j = xi u_j + +With u_j a unit vector and U can be in SU(2); + +Re Tr h Sigma = 2 h_j Re Sigma_j = 2 xi (h.u) + +4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag] + u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag] + + xi = sqrt(Det)/2; + +Write a= u h in SU(2); a has pauli decomp a_j; + +Note: Product b' xi is unvariant because scaling Sigma leaves + normalised vector "u" fixed; Can rescale Sigma so b' = 1. + */ + + //////////////////////////////////////////////////////// + // Real part of Pauli decomposition + // Note a subgroup can project to zero in cold start + //////////////////////////////////////////////////////// + su2Extract(udet,u,V,su2_subgroup); + + ////////////////////////////////////////////////////// + // Normalising this vector if possible; else identity + ////////////////////////////////////////////////////// + LatticeComplex xi(grid); + + LatticeSU2Matrix lident(grid); + + SU2Matrix ident = Complex(1.0); + SU2Matrix pauli1; SU<2>::generator(0,pauli1); + SU2Matrix pauli2; SU<2>::generator(1,pauli2); + SU2Matrix pauli3; SU<2>::generator(2,pauli3); + pauli1 = timesI(pauli1)*2.0; + pauli2 = timesI(pauli2)*2.0; + pauli3 = timesI(pauli3)*2.0; + + LatticeComplex cone(grid); + LatticeReal adet(grid); + adet = abs(toReal(udet)); + lident=Complex(1.0); + cone =Complex(1.0); + Real machine_epsilon=1.0e-7; + u = where(adet>machine_epsilon,u,lident); + udet= where(adet>machine_epsilon,udet,cone); + + xi = 0.5*sqrt(udet); //4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag] + u = 0.5*u*pow(xi,-1.0); // u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag] + + // Debug test for sanity + uinv=adj(u); + b=u*uinv-1.0; + assert(norm2(b)<1.0e-4); + + /* +Measure: Haar measure dh has d^4a delta(1-|a^2|) +In polars: + da = da0 r^2 sin theta dr dtheta dphi delta( 1 - r^2 -a0^2) + = da0 r^2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r)(sqrt(1-a0^) + r) ) + = da0 r/2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r) ) + +Action factor Q(h) dh = e^-S[h] dh = e^{ xi Tr uh} dh // beta enters through xi + = e^{2 xi (h.u)} dh + = e^{2 xi h0u0}.e^{2 xi h1u1}.e^{2 xi h2u2}.e^{2 xi h3u3} dh + +Therefore for each site, take xi for that site +i) generate |a0|<1 with dist + (1-a0^2)^0.5 e^{2 xi a0 } da0 + +Take alpha = 2 xi = 2 xi [ recall 2 beta/Nc unmod staple norm]; hence 2.0/Nc factor in Chroma ] +A. Generate two uniformly distributed pseudo-random numbers R and R', R'', R''' in the unit interval; +B. Set X = -(ln R)/alpha, X' =-(ln R')/alpha; +C. Set C = cos^2(2pi R"), with R" another uniform random number in [0,1] ; +D. Set A = XC; +E. Let d = X'+A; +F. If R'''^2 :> 1 - 0.5 d, go back to A; +G. Set a0 = 1 - d; + +Note that in step D setting B ~ X - A and using B in place of A in step E will generate a second independent a 0 value. + */ + + ///////////////////////////////////////////////////////// + // count the number of sites by picking "1"'s out of hat + ///////////////////////////////////////////////////////// + Integer hit=0; + LatticeReal rtmp(grid); + rtmp=where(wheremask,rones,rzeros); + RealD numSites = sum(rtmp); + RealD numAccepted; + LatticeInteger Accepted(grid); Accepted = zero; + LatticeInteger newlyAccepted(grid); + + std::vector xr(4,grid); + std::vector a(4,grid); + LatticeReal d(grid); d=zero; + LatticeReal alpha(grid); + + // std::cout<<"xi "< 1 - 0.5 d, go back to A; + LatticeReal thresh(grid); thresh = 1.0-d*0.5; + xrsq = xr[0]*xr[0]; + LatticeInteger ione(grid); ione = 1; + LatticeInteger izero(grid); izero=zero; + + newlyAccepted = where ( xrsq < thresh,ione,izero); + Accepted = where ( newlyAccepted, newlyAccepted,Accepted); + Accepted = where ( wheremask, Accepted,izero); + + // FIXME need an iSum for integer to avoid overload on return type?? + rtmp=where(Accepted,rones,rzeros); + numAccepted = sum(rtmp); + + hit++; + + } while ( (numAccepted < numSites) && ( hit < nheatbath) ); + + // G. Set a0 = 1 - d; + a[0]=zero; + a[0]=where(wheremask,1.0-d,a[0]); + + ////////////////////////////////////////// + // ii) generate a_i uniform on two sphere radius (1-a0^2)^0.5 + ////////////////////////////////////////// + + LatticeReal a123mag(grid); + a123mag = sqrt(abs(1.0-a[0]*a[0])); + + LatticeReal cos_theta (grid); + LatticeReal sin_theta (grid); + LatticeReal phi (grid); + + random(pRNG,phi); phi = phi * twopi; // uniform in [0,2pi] + random(pRNG,cos_theta); cos_theta=(cos_theta*2.0)-1.0; // uniform in [-1,1] + sin_theta = sqrt(abs(1.0-cos_theta*cos_theta)); + + a[1] = a123mag * sin_theta * cos(phi); + a[2] = a123mag * sin_theta * sin(phi); + a[3] = a123mag * cos_theta; + + ua = toComplex(a[0])*ident + + toComplex(a[1])*pauli1 + + toComplex(a[2])*pauli2 + + toComplex(a[3])*pauli3; + + b = 1.0; + b = where(wheremask,uinv*ua,b); su2Insert(b,V,su2_subgroup); - // U = V*U - LatticeMatrix tmp(grid); - tmp = V * link; + //mask the assignment back based on Accptance + link = where(Accepted,V * link,link); - //mask the assignment back - link = where(wheremask,tmp,link); + ////////////////////////////// + // Debug Checks + // SU2 check + LatticeSU2Matrix check(grid); // rotated matrix after hb + u = zero; + check = ua * adj(ua) - 1.0; + check = where(Accepted,check,u); + assert(norm2(check)<1.0e-4); + check = b*adj(b)-1.0; + check = where(Accepted,check,u); + assert(norm2(check)<1.0e-4); + + LatticeMatrix Vcheck(grid); + Vcheck = zero; + Vcheck = where(Accepted,V*adj(V) - 1.0,Vcheck); + // std::cout << "SU3 check " < &U) { LatticeComplex sitePlaq(U[0]._grid); + Plaq=zero; for(int mu=1;mu U(4,Umu._grid); + for(int mu=0;mu(Umu,mu); } @@ -72,13 +74,15 @@ public: ////////////////////////////////////////////////// static void Staple(GaugeMat &staple,const GaugeLorentz &Umu,int mu){ - std::vector U(4,Umu._grid); + GridBase *grid = Umu._grid; + + std::vector U(4,grid); for(int d=0;d(Umu,d); } staple = zero; - GaugeMat tmp(U[0]._grid); + GaugeMat tmp(grid); for(int nu=0;nu inline Grid_simd sin(const Grid_simd &r) { - return SimdApply(CosRealFunctor(),r); + return SimdApply(SinRealFunctor(),r); } template < class S, class V > inline Grid_simd log(const Grid_simd &r) { diff --git a/tests/Test_lie_generators.cc b/tests/Test_lie_generators.cc index 91f2d33f..d36e463e 100644 --- a/tests/Test_lie_generators.cc +++ b/tests/Test_lie_generators.cc @@ -44,49 +44,6 @@ int main (int argc, char ** argv) // SU5::printGenerators(); // SU5::testGenerators(); - /////////////////////////////// - // Configuration of known size - /////////////////////////////// - NerscField header; - std::string file("./ckpoint_lat.400"); - LatticeGaugeField Umu(grid); - // readNerscConfiguration(Umu,header,file); - Umu=1.0; // Cold start - - // RNG set up for test - std::vector pseeds({1,2,3,4,5}); // once I caught a fish alive - std::vector sseeds({6,7,8,9,10});// then i let it go again - GridParallelRNG pRNG(grid); pRNG.SeedFixedIntegers(pseeds); - GridSerialRNG sRNG; sRNG.SeedFixedIntegers(sseeds); - - // SU3 colour operatoions - LatticeColourMatrix link(grid); - LatticeColourMatrix staple(grid); - int mu=0; - - // Get Staple - ColourWilsonLoops::Staple(staple,Umu,mu); - // Get Link - link = peekIndex(Umu,mu); - - // Apply heatbath to the link - RealD beta=6.0; - int subgroup=0; - int nhb=1; - int trials=0; - int fails=0; - - LatticeInteger one(rbGrid); one = 1; // fill with ones - LatticeInteger mask(grid); mask= zero; - one.checkerboard=Even; - setCheckerboard(mask,one); - - // update Even checkerboard - - SU3::SubGroupHeatBath(sRNG,pRNG,beta,link,staple,subgroup, - nhb,trials,fails,mask); - - Grid_finalize(); }