From d5b2323a579ca3b8e95fb6686a0fdf7d151a4503 Mon Sep 17 00:00:00 2001 From: Felix Ziegler Date: Mon, 7 Mar 2022 14:44:24 +0000 Subject: [PATCH] included Cayley-Hamilton exponentiation for the compact Wilson exp clover, bug fix for inverse of exp clover --- Grid/qcd/action/fermion/CloverHelpers.h | 296 +++++++++++++++++++----- 1 file changed, 240 insertions(+), 56 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 7f7ad2b7..134ee161 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -35,7 +35,7 @@ #include //////////////////////////////////////////// -// Standard Clover +// Standard Clover // (4+m0) + csw * clover_term // Exp Clover // (4+m0) * exp(csw/(4+m0) clover_term) @@ -46,10 +46,10 @@ NAMESPACE_BEGIN(Grid); ////////////////////////////////// -// Generic Standard Clover +// Generic Standard Clover ////////////////////////////////// -template +template class CloverHelpers: public WilsonCloverHelpers { public: @@ -61,7 +61,7 @@ public: static void Instantiate(CloverField& CloverTerm, CloverField& CloverTermInv, RealD csw_t, RealD diag_mass) { GridBase *grid = CloverTerm.Grid(); CloverTerm += diag_mass; - + int lvol = grid->lSites(); int DimRep = Impl::Dimension; { @@ -83,7 +83,7 @@ public: EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); } // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; - + EigenInvCloverOp = EigenCloverOp.inverse(); //std::cout << EigenInvCloverOp << std::endl; for (int j = 0; j < Ns; j++) @@ -106,10 +106,10 @@ public: ////////////////////////////////// -// Generic Exp Clover +// Generic Exp Clover ////////////////////////////////// -template +template class ExpCloverHelpers: public WilsonCloverHelpers { public: @@ -122,9 +122,9 @@ public: // Can this be avoided? static void IdentityTimesC(const CloverField& in, RealD c) { int DimRep = Impl::Dimension; - + autoView(in_v, in, AcceleratorWrite); - + accelerator_for(ss, in.Grid()->oSites(), 1, { for (int sa=0; sa=0; i--) ExpClover = ExpClover * Clover + cn[i]; - + + // prepare inverse + CloverInv = (-1.0)*Clover; + Clover = ExpClover * diag_mass; - CloverInv = adj(ExpClover) * (1.0/diag_mass); + + ExpClover = Zero(); + IdentityTimesC(ExpClover, cn[NMAX]); + for (int i=NMAX-1; i>=0; i--) + ExpClover = ExpClover * CloverInv + cn[i]; + + CloverInv = ExpClover * (1.0/diag_mass); + } static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { @@ -182,11 +192,11 @@ public: ////////////////////////////////// -// Compact Standard Clover +// Compact Standard Clover ////////////////////////////////// -template +template class CompactCloverHelpers: public CompactWilsonCloverHelpers, public WilsonCloverHelpers { public: @@ -197,7 +207,7 @@ public: typedef WilsonCloverHelpers Helpers; typedef CompactWilsonCloverHelpers CompactHelpers; - + static void MassTerm(CloverField& Clover, RealD diag_mass) { Clover += diag_mass; } @@ -219,10 +229,9 @@ public: }; ////////////////////////////////// -// Compact Exp Clover +// Compact Exp Clover ////////////////////////////////// - template class CompactExpCloverHelpers: public CompactWilsonCloverHelpers { public: @@ -230,22 +239,28 @@ public: INHERIT_IMPL_TYPES(Impl); INHERIT_CLOVER_TYPES(Impl); INHERIT_COMPACT_CLOVER_TYPES(Impl); - - template using iImplClover = iScalar, Ns>>; + + template using iImplClover = iScalar, Ns>>; typedef CompactWilsonCloverHelpers CompactHelpers; - + +/* static void plusIdentity(const CloverField& in) { int DimRep = Impl::Dimension; - + autoView(in_v, in, AcceleratorWrite); - + accelerator_for(ss, in.Grid()->oSites(), 1, { for (int sa=0; sa> &t, RealD R) {return getNMAX(1e-12,R);} static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} - - static void MassTerm(CloverField& Clover, RealD diag_mass) { - // csw/(diag_mass) * clover - Clover = Clover * (1.0/diag_mass); - } - - static void Instantiate(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, + + static void ExponentiateHermitean6by6(const iMatrix &arg, const RealD& alpha, const std::vector& cN, const int Niter, iMatrix& dest){ + + typedef iMatrix mat; + + RealD qn[6]; + RealD qnold[6]; + RealD p[5]; + RealD trA2, trA3, trA4; + + mat A2, A3, A4, A5; + A2 = alpha * alpha * arg * arg; + A3 = alpha * arg * A2; + A4 = A2 * A2; + A5 = A2 * A3; + + trA2 = toReal( trace(A2) ); + trA3 = toReal( trace(A3) ); + trA4 = toReal( trace(A4)); + + p[0] = toReal( trace(A3 * A3)) / 6.0 - 0.125 * trA4 * trA2 - trA3 * trA3 / 18.0 + trA2 * trA2 * trA2/ 48.0; + p[1] = toReal( trace(A5)) / 5.0 - trA3 * trA2 / 6.0; + p[2] = toReal( trace(A4)) / 4.0 - 0.125 * trA2 * trA2; + p[3] = trA3 / 3.0; + p[4] = 0.5 * trA2; + + qnold[0] = cN[Niter]; + qnold[1] = 0.0; + qnold[2] = 0.0; + qnold[3] = 0.0; + qnold[4] = 0.0; + qnold[5] = 0.0; + + for(int i = Niter-1; i >= 0; i--) + { + qn[0] = p[0] * qnold[5] + cN[i]; + qn[1] = p[1] * qnold[5] + qnold[0]; + qn[2] = p[2] * qnold[5] + qnold[1]; + qn[3] = p[3] * qnold[5] + qnold[2]; + qn[4] = p[4] * qnold[5] + qnold[3]; + qn[5] = qnold[4]; + + qnold[0] = qn[0]; + qnold[1] = qn[1]; + qnold[2] = qn[2]; + qnold[3] = qn[3]; + qnold[4] = qn[4]; + qnold[5] = qn[5]; + } + + mat unit(1.0); + + dest = (qn[0] * unit + qn[1] * alpha * arg + qn[2] * A2 + qn[3] * A3 + qn[4] * A4 + qn[5] * A5); + + } + + static void Instantiate(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, CloverDiagonalField& DiagonalInv, CloverTriangleField& TriangleInv, RealD csw_t, RealD diag_mass) { + GridBase* grid = Diagonal.Grid(); int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass); - - // To be optimized: implement exp in improved layout - // START: code to be replaced - CloverField Clover(grid), ExpClover(grid); - - CompactHelpers::ConvertLayout(Diagonal, Triangle, Clover); - - ExpClover = Clover; - plusIdentity(ExpClover); // 1 + Clover - - RealD pref = 1.0; - for (int i=2; i<=NMAX; i++) { - Clover = Clover * Clover; - pref /= (RealD)(i); - ExpClover += pref * Clover; + + // + // Implementation completely in Daniel's layout + // + + // Taylor expansion with Cayley-Hamilton recursion + // underlying Horner scheme as above + std::vector cn(NMAX+1); + cn[0] = 1.0; + for (int i=1; i<=NMAX; i++){ + cn[i] = cn[i-1] / RealD(i); } - - // Convert the data layout of the clover terms - CompactHelpers::ConvertLayout(ExpClover, Diagonal, Triangle); - CompactHelpers::ConvertLayout(adj(ExpClover), DiagonalInv, TriangleInv); - // END: code to be replaced - + + // Taken over from Daniel's implementation + conformable(Diagonal, Triangle); + + long lsites = grid->lSites(); + + typedef typename SiteCloverDiagonal::scalar_object scalar_object_diagonal; + typedef typename SiteCloverTriangle::scalar_object scalar_object_triangle; + typedef iMatrix mat; + + autoView(diagonal_v, Diagonal, CpuRead); + autoView(triangle_v, Triangle, CpuRead); + autoView(diagonalExp_v, Diagonal, CpuWrite); + autoView(triangleExp_v, Triangle, CpuWrite); + autoView(diagonalExpInv_v, DiagonalInv, CpuWrite); + autoView(triangleExpInv_v, TriangleInv, CpuWrite); + + thread_for(site, lsites, { // NOTE: Not on GPU because of (peek/poke)LocalSite + + mat srcCloverOpUL(0.0); // upper left block + mat srcCloverOpLR(0.0); // lower right block + mat ExpCloverOp; + + scalar_object_diagonal diagonal_tmp = Zero(); + scalar_object_diagonal diagonal_exp_tmp = Zero(); + scalar_object_triangle triangle_tmp = Zero(); + scalar_object_triangle triangle_exp_tmp = Zero(); + + Coordinate lcoor; + grid->LocalIndexToLocalCoor(site, lcoor); + + peekLocalSite(diagonal_tmp, diagonal_v, lcoor); + peekLocalSite(triangle_tmp, triangle_v, lcoor); + + // block = 0 + int block; + block = 0; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + srcCloverOpUL(i,j) = static_cast(TensorRemove(diagonal_tmp()(block)(i))); + } + else{ + srcCloverOpUL(i,j) = static_cast(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j))); + } + } + } + // block = 1 + block = 1; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + srcCloverOpLR(i,j) = static_cast(TensorRemove(diagonal_tmp()(block)(i))); + } + else{ + srcCloverOpLR(i,j) = static_cast(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j))); + } + } + } + + // exp(Clover) + + ExponentiateHermitean6by6(srcCloverOpUL,1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 0; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + ExponentiateHermitean6by6(srcCloverOpLR,1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 1; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + pokeLocalSite(diagonal_exp_tmp, diagonalExp_v, lcoor); + pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); + + // inverse exp(-Clover) + + ExponentiateHermitean6by6(srcCloverOpUL,-1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 0; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + ExponentiateHermitean6by6(srcCloverOpLR,-1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 1; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + pokeLocalSite(diagonal_exp_tmp, diagonalExpInv_v, lcoor); + pokeLocalSite(triangle_exp_tmp, triangleExpInv_v, lcoor); + }); + Diagonal = Diagonal * diag_mass; Triangle = Triangle * diag_mass; + DiagonalInv = DiagonalInv*(1.0/diag_mass); TriangleInv = TriangleInv*(1.0/diag_mass); } - + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { assert(0); } - + };