diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 57e71998..cd469ea7 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -204,15 +204,18 @@ public: typedef WilsonCloverHelpers Helpers; typedef CompactWilsonCloverHelpers CompactHelpers; - static void MassTerm(CloverField& Clover, RealD diag_mass) { + static void InstantiateClover(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) { Clover += diag_mass; } - static void Exponentiate_Clover(CloverDiagonalField& Diagonal, - CloverTriangleField& Triangle, - RealD csw_t, RealD diag_mass) { + static void InvertClover(CloverField& InvClover, + const CloverDiagonalField& diagonal, + const CloverTriangleField& triangle, + CloverDiagonalField& diagonalInv, + CloverTriangleField& triangleInv, + bool fixedBoundaries) { - // Do nothing + CompactHelpers::Invert(diagonal, triangle, diagonalInv, triangleInv); } // TODO: implement Cmunu for better performances with compact layout, but don't do it @@ -237,9 +240,17 @@ public: template using iImplClover = iScalar, Ns>>; typedef CompactWilsonCloverHelpers CompactHelpers; - static void MassTerm(CloverField& Clover, RealD diag_mass) { - // do nothing! - // mass term is multiplied to exp(Clover) below + // Can this be avoided? + static void IdentityTimesC(const CloverField& in, RealD c) { + int DimRep = Impl::Dimension; + + autoView(in_v, in, AcceleratorWrite); + + accelerator_for(ss, in.Grid()->oSites(), 1, { + for (int sa=0; sa> &t, RealD R) {return getNMAX(1e-12,R);} - static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-12,R);} + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} - static void ExponentiateHermitean6by6(const iMatrix &arg, const RealD& alpha, const std::vector& cN, const int Niter, iMatrix& dest){ + static void InstantiateClover(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) { - typedef iMatrix mat; + GridBase* grid = Clover.Grid(); + CloverField ExpClover(grid); - RealD qn[6]; - RealD qnold[6]; - RealD p[5]; - RealD trA2, trA3, trA4; + int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass); - mat A2, A3, A4, A5; - A2 = alpha * alpha * arg * arg; - A3 = alpha * arg * A2; - A4 = A2 * A2; - A5 = A2 * A3; + Clover *= (1.0/diag_mass); - trA2 = toReal( trace(A2) ); - trA3 = toReal( trace(A3) ); - trA4 = toReal( trace(A4)); - - p[0] = toReal( trace(A3 * A3)) / 6.0 - 0.125 * trA4 * trA2 - trA3 * trA3 / 18.0 + trA2 * trA2 * trA2/ 48.0; - p[1] = toReal( trace(A5)) / 5.0 - trA3 * trA2 / 6.0; - p[2] = toReal( trace(A4)) / 4.0 - 0.125 * trA2 * trA2; - p[3] = trA3 / 3.0; - p[4] = 0.5 * trA2; - - qnold[0] = cN[Niter]; - qnold[1] = 0.0; - qnold[2] = 0.0; - qnold[3] = 0.0; - qnold[4] = 0.0; - qnold[5] = 0.0; - - for(int i = Niter-1; i >= 0; i--) - { - qn[0] = p[0] * qnold[5] + cN[i]; - qn[1] = p[1] * qnold[5] + qnold[0]; - qn[2] = p[2] * qnold[5] + qnold[1]; - qn[3] = p[3] * qnold[5] + qnold[2]; - qn[4] = p[4] * qnold[5] + qnold[3]; - qn[5] = qnold[4]; - - qnold[0] = qn[0]; - qnold[1] = qn[1]; - qnold[2] = qn[2]; - qnold[3] = qn[3]; - qnold[4] = qn[4]; - qnold[5] = qn[5]; - } - - mat unit(1.0); - - dest = (qn[0] * unit + qn[1] * alpha * arg + qn[2] * A2 + qn[3] * A3 + qn[4] * A4 + qn[5] * A5); - - } - - static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, RealD csw_t, RealD diag_mass) { - - GridBase* grid = Diagonal.Grid(); - int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass); - - // - // Implementation completely in Daniel's layout - // - - // Taylor expansion with Cayley-Hamilton recursion - // underlying Horner scheme as above + // Taylor expansion, slow but generic + // Horner scheme: a0 + a1 x + a2 x^2 + .. = a0 + x (a1 + x(...)) + // qN = cN + // qn = cn + qn+1 X std::vector cn(NMAX+1); cn[0] = 1.0; - for (int i=1; i<=NMAX; i++){ + for (int i=1; i<=NMAX; i++) cn[i] = cn[i-1] / RealD(i); - } - // Taken over from Daniel's implementation - conformable(Diagonal, Triangle); + ExpClover = Zero(); + IdentityTimesC(ExpClover, cn[NMAX]); + for (int i=NMAX-1; i>=0; i--) + ExpClover = ExpClover * Clover + cn[i]; - long lsites = grid->lSites(); - { - typedef typename SiteCloverDiagonal::scalar_object scalar_object_diagonal; - typedef typename SiteCloverTriangle::scalar_object scalar_object_triangle; - typedef iMatrix mat; + // prepare inverse + CloverInv = (-1.0)*Clover; - autoView(diagonal_v, Diagonal, CpuRead); - autoView(triangle_v, Triangle, CpuRead); - autoView(diagonalExp_v, Diagonal, CpuWrite); - autoView(triangleExp_v, Triangle, CpuWrite); + Clover = ExpClover * diag_mass; - thread_for(site, lsites, { // NOTE: Not on GPU because of (peek/poke)LocalSite + ExpClover = Zero(); + IdentityTimesC(ExpClover, cn[NMAX]); + for (int i=NMAX-1; i>=0; i--) + ExpClover = ExpClover * CloverInv + cn[i]; - mat srcCloverOpUL(0.0); // upper left block - mat srcCloverOpLR(0.0); // lower right block - mat ExpCloverOp; + CloverInv = ExpClover * (1.0/diag_mass); - scalar_object_diagonal diagonal_tmp = Zero(); - scalar_object_diagonal diagonal_exp_tmp = Zero(); - scalar_object_triangle triangle_tmp = Zero(); - scalar_object_triangle triangle_exp_tmp = Zero(); - - Coordinate lcoor; - grid->LocalIndexToLocalCoor(site, lcoor); - - peekLocalSite(diagonal_tmp, diagonal_v, lcoor); - peekLocalSite(triangle_tmp, triangle_v, lcoor); - - int block; - block = 0; - for(int i = 0; i < 6; i++){ - for(int j = 0; j < 6; j++){ - if (i == j){ - srcCloverOpUL(i,j) = static_cast(TensorRemove(diagonal_tmp()(block)(i))); - } - else{ - srcCloverOpUL(i,j) = static_cast(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j))); - } - } - } - block = 1; - for(int i = 0; i < 6; i++){ - for(int j = 0; j < 6; j++){ - if (i == j){ - srcCloverOpLR(i,j) = static_cast(TensorRemove(diagonal_tmp()(block)(i))); - } - else{ - srcCloverOpLR(i,j) = static_cast(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j))); - } - } - } - - // exp(Clover) - - ExponentiateHermitean6by6(srcCloverOpUL,1.0/diag_mass,cn,NMAX,ExpCloverOp); - - block = 0; - for(int i = 0; i < 6; i++){ - for(int j = 0; j < 6; j++){ - if (i == j){ - diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); - } - else if(i < j){ - triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); - } - } - } - - ExponentiateHermitean6by6(srcCloverOpLR,1.0/diag_mass,cn,NMAX,ExpCloverOp); - - block = 1; - for(int i = 0; i < 6; i++){ - for(int j = 0; j < 6; j++){ - if (i == j){ - diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); - } - else if(i < j){ - triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); - } - } - } - - pokeLocalSite(diagonal_exp_tmp, diagonalExp_v, lcoor); - pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); - }); - } - - Diagonal *= diag_mass; - Triangle *= diag_mass; } + static void InvertClover(CloverField& InvClover, + const CloverDiagonalField& diagonal, + const CloverTriangleField& triangle, + CloverDiagonalField& diagonalInv, + CloverTriangleField& triangleInv, + bool fixedBoundaries) { + + if (fixedBoundaries) + { + CompactHelpers::Invert(diagonal, triangle, diagonalInv, triangleInv); + } + else + { + CompactHelpers::ConvertLayout(InvClover, diagonalInv, triangleInv); + } + } static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { assert(0); diff --git a/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h b/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h index 249b20bd..d79b34d4 100644 --- a/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h @@ -225,7 +225,7 @@ public: RealD csw_t; RealD cF; - bool open_boundaries; + bool fixedBoundaries; CloverDiagonalField Diagonal, DiagonalEven, DiagonalOdd; CloverDiagonalField DiagonalInv, DiagonalInvEven, DiagonalInvOdd; diff --git a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h index e4864730..7e3b7f00 100644 --- a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h @@ -48,7 +48,7 @@ CompactWilsonCloverFermion::CompactWilsonCloverFermion(Gaug , csw_r(_csw_r) , csw_t(_csw_t) , cF(_cF) - , open_boundaries(impl_p.boundary_phases[Nd-1] == 0.0) + , fixedBoundaries(impl_p.boundary_phases[Nd-1] == 0.0) , Diagonal(&Fgrid), Triangle(&Fgrid) , DiagonalEven(&Hgrid), TriangleEven(&Hgrid) , DiagonalOdd(&Hgrid), TriangleOdd(&Hgrid) @@ -67,7 +67,7 @@ CompactWilsonCloverFermion::CompactWilsonCloverFermion(Gaug csw_r /= clover_anisotropy.xi_0; ImportGauge(_Umu); - if (open_boundaries) { + if (fixedBoundaries) { this->BoundaryMaskEven.Checkerboard() = Even; this->BoundaryMaskOdd.Checkerboard() = Odd; CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd); @@ -77,31 +77,31 @@ CompactWilsonCloverFermion::CompactWilsonCloverFermion(Gaug template void CompactWilsonCloverFermion::Dhop(const FermionField& in, FermionField& out, int dag) { WilsonBase::Dhop(in, out, dag); - if(open_boundaries) ApplyBoundaryMask(out); + if(fixedBoundaries) ApplyBoundaryMask(out); } template void CompactWilsonCloverFermion::DhopOE(const FermionField& in, FermionField& out, int dag) { WilsonBase::DhopOE(in, out, dag); - if(open_boundaries) ApplyBoundaryMask(out); + if(fixedBoundaries) ApplyBoundaryMask(out); } template void CompactWilsonCloverFermion::DhopEO(const FermionField& in, FermionField& out, int dag) { WilsonBase::DhopEO(in, out, dag); - if(open_boundaries) ApplyBoundaryMask(out); + if(fixedBoundaries) ApplyBoundaryMask(out); } template void CompactWilsonCloverFermion::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) { WilsonBase::DhopDir(in, out, dir, disp); - if(this->open_boundaries) ApplyBoundaryMask(out); + if(this->fixedBoundaries) ApplyBoundaryMask(out); } template void CompactWilsonCloverFermion::DhopDirAll(const FermionField& in, std::vector& out) { WilsonBase::DhopDirAll(in, out); - if(this->open_boundaries) { + if(this->fixedBoundaries) { for(auto& o : out) ApplyBoundaryMask(o); } } @@ -112,7 +112,7 @@ void CompactWilsonCloverFermion::M(const FermionField& in, WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc Mooee(in, Tmp); axpy(out, 1.0, out, Tmp); - if(open_boundaries) ApplyBoundaryMask(out); + if(fixedBoundaries) ApplyBoundaryMask(out); } template @@ -121,19 +121,19 @@ void CompactWilsonCloverFermion::Mdag(const FermionField& i WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc MooeeDag(in, Tmp); axpy(out, 1.0, out, Tmp); - if(open_boundaries) ApplyBoundaryMask(out); + if(fixedBoundaries) ApplyBoundaryMask(out); } template void CompactWilsonCloverFermion::Meooe(const FermionField& in, FermionField& out) { WilsonBase::Meooe(in, out); - if(open_boundaries) ApplyBoundaryMask(out); + if(fixedBoundaries) ApplyBoundaryMask(out); } template void CompactWilsonCloverFermion::MeooeDag(const FermionField& in, FermionField& out) { WilsonBase::MeooeDag(in, out); - if(open_boundaries) ApplyBoundaryMask(out); + if(fixedBoundaries) ApplyBoundaryMask(out); } template @@ -147,7 +147,7 @@ void CompactWilsonCloverFermion::Mooee(const FermionField& } else { MooeeInternal(in, out, Diagonal, Triangle); } - if(open_boundaries) ApplyBoundaryMask(out); + if(fixedBoundaries) ApplyBoundaryMask(out); } template @@ -166,7 +166,7 @@ void CompactWilsonCloverFermion::MooeeInv(const FermionFiel } else { MooeeInternal(in, out, DiagonalInv, TriangleInv); } - if(open_boundaries) ApplyBoundaryMask(out); + if(fixedBoundaries) ApplyBoundaryMask(out); } template @@ -186,7 +186,7 @@ void CompactWilsonCloverFermion::MdirAll(const FermionField template void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) { - assert(!open_boundaries); // TODO check for changes required for open bc + assert(!fixedBoundaries); // TODO check for changes required for open bc // NOTE: code copied from original clover term conformable(X.Grid(), Y.Grid()); @@ -305,6 +305,7 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeFie GridBase* grid = _Umu.Grid(); typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid); CloverField TmpOriginal(grid); + CloverField TmpInverse(grid); // Compute the field strength terms mu>nu double t2 = usecond(); @@ -324,24 +325,27 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeFie TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t; TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t; TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t; - // Handle mass term based on clover policy - CloverHelpers::MassTerm(TmpOriginal, this->diag_mass); - - // Convert the data layout of the clover term + + // Instantiate the clover term + // - In case of the standard clover the mass term is added + // - In case of the exponential clover the clover term is exponentiated double t4 = usecond(); + CloverHelpers::InstantiateClover(TmpOriginal, TmpInverse, csw_t, this->diag_mass); + + // Convert the data layout of the clover term + double t5 = usecond(); CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle); - // Exponentiate the clover (nothing happens in case of the standard clover) - double t5 = usecond(); - CloverHelpers::Exponentiate_Clover(Diagonal, Triangle, csw_t, this->diag_mass); - - // Possible modify the boundary values + // Modify the clover term at the temporal boundaries in case of open boundary conditions double t6 = usecond(); - if(open_boundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass); + if(fixedBoundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass); - // Invert the Clover term (explicit inversion needed for the improvement in case of open boundary conditions) + // Invert the Clover term + // In case of the exponential clover with (anti-)periodic boundary conditions exp(-Clover) saved + // in TmpInverse can be used. In all other cases the clover term has to be explictly inverted. + // TODO: For now this inversion is explictly done on the CPU double t7 = usecond(); - CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); + CloverHelpers::InvertClover(TmpInverse, Diagonal, Triangle, DiagonalInv, TriangleInv, fixedBoundaries); // Fill the remaining clover fields double t8 = usecond(); @@ -362,10 +366,10 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeFie std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl; std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl; std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl; - std::cout << GridLogDebug << "convert = " << (t5 - t4) / 1e6 << std::endl; - std::cout << GridLogDebug << "exponentiation = " << (t6 - t5) / 1e6 << std::endl; - std::cout << GridLogDebug << "boundaries = " << (t7 - t6) / 1e6 << std::endl; - std::cout << GridLogDebug << "inversions = " << (t8 - t7) / 1e6 << std::endl; + std::cout << GridLogDebug << "instantiate clover = " << (t5 - t4) / 1e6 << std::endl; + std::cout << GridLogDebug << "convert layout = " << (t6 - t5) / 1e6 << std::endl; + std::cout << GridLogDebug << "modify boundaries = " << (t7 - t6) / 1e6 << std::endl; + std::cout << GridLogDebug << "invert clover = " << (t8 - t7) / 1e6 << std::endl; std::cout << GridLogDebug << "pick cbs = " << (t9 - t8) / 1e6 << std::endl; std::cout << GridLogDebug << "total = " << (t9 - t0) / 1e6 << std::endl; }