diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml new file mode 100644 index 00000000..514c0c48 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -0,0 +1,54 @@ +name: Bug report +description: Report a bug. +title: "" +labels: [bug] + +body: + - type: markdown + attributes: + value: > + Thank you for taking the time to file a bug report. + Please check that the code is pointing to the HEAD of develop + or any commit in master which is tagged with a version number. + + - type: textarea + attributes: + label: "Describe the issue:" + description: > + Describe the issue and any previous attempt to solve it. + validations: + required: true + + - type: textarea + attributes: + label: "Code example:" + description: > + If relevant, show how to reproduce the issue using a minimal working + example. + placeholder: | + << your code here >> + render: shell + validations: + required: false + + - type: textarea + attributes: + label: "Target platform:" + description: > + Give a description of the target platform (CPU, network, compiler). + Please give the full CPU part description, using for example + `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) + or `sysctl machdep.cpu.brand_string` (macOS) and the full output + the `--version` option of your compiler. + validations: + required: true + + - type: textarea + attributes: + label: "Configure options:" + description: > + Please give the exact configure command used and attach + `config.log`, `grid.config.summary` and the output of `make V=1`. + render: shell + validations: + required: true diff --git a/Grid/GridCore.h b/Grid/GridCore.h index 2209f960..41c64ef6 100644 --- a/Grid/GridCore.h +++ b/Grid/GridCore.h @@ -44,9 +44,10 @@ Author: paboyle #include #include #include -#include +//#include #include #include +#include #include #include #include diff --git a/Grid/GridQCDcore.h b/Grid/GridQCDcore.h index cae6f43f..065b62cd 100644 --- a/Grid/GridQCDcore.h +++ b/Grid/GridQCDcore.h @@ -36,6 +36,7 @@ Author: paboyle #include #include #include +#include #include #include NAMESPACE_CHECK(GridQCDCore); diff --git a/Grid/algorithms/Algorithms.h b/Grid/algorithms/Algorithms.h index 7f27784b..9eaf89f3 100644 --- a/Grid/algorithms/Algorithms.h +++ b/Grid/algorithms/Algorithms.h @@ -54,6 +54,8 @@ NAMESPACE_CHECK(BiCGSTAB); #include #include #include +#include +#include #include #include #include diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index ba4abecd..7008008c 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -324,9 +324,9 @@ public: GridBase* _cbgrid; int hermitian; - CartesianStencil Stencil; - CartesianStencil StencilEven; - CartesianStencil StencilOdd; + CartesianStencil Stencil; + CartesianStencil StencilEven; + CartesianStencil StencilOdd; std::vector A; std::vector Aeven; @@ -631,7 +631,7 @@ public: assert(Aself != nullptr); } - void DselfInternal(CartesianStencil &st, CoarseMatrix &a, + void DselfInternal(CartesianStencil &st, CoarseMatrix &a, const CoarseVector &in, CoarseVector &out, int dag) { int point = geom.npoint-1; autoView( out_v, out, AcceleratorWrite); @@ -694,7 +694,7 @@ public: } } - void DhopInternal(CartesianStencil &st, std::vector &a, + void DhopInternal(CartesianStencil &st, std::vector &a, const CoarseVector &in, CoarseVector &out, int dag) { SimpleCompressor compressor; @@ -784,9 +784,9 @@ public: _cbgrid(new GridRedBlackCartesian(&CoarseGrid)), geom(CoarseGrid._ndimension), hermitian(hermitian_), - Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0), - StencilEven(_cbgrid,geom.npoint,Even,geom.directions,geom.displacements,0), - StencilOdd(_cbgrid,geom.npoint,Odd,geom.directions,geom.displacements,0), + Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements), + StencilEven(_cbgrid,geom.npoint,Even,geom.directions,geom.displacements), + StencilOdd(_cbgrid,geom.npoint,Odd,geom.directions,geom.displacements), A(geom.npoint,&CoarseGrid), Aeven(geom.npoint,_cbgrid), Aodd(geom.npoint,_cbgrid), @@ -804,9 +804,9 @@ public: _cbgrid(&CoarseRBGrid), geom(CoarseGrid._ndimension), hermitian(hermitian_), - Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0), - StencilEven(&CoarseRBGrid,geom.npoint,Even,geom.directions,geom.displacements,0), - StencilOdd(&CoarseRBGrid,geom.npoint,Odd,geom.directions,geom.displacements,0), + Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements), + StencilEven(&CoarseRBGrid,geom.npoint,Even,geom.directions,geom.displacements), + StencilOdd(&CoarseRBGrid,geom.npoint,Odd,geom.directions,geom.displacements), A(geom.npoint,&CoarseGrid), Aeven(geom.npoint,&CoarseRBGrid), Aodd(geom.npoint,&CoarseRBGrid), diff --git a/Grid/algorithms/LinearOperator.h b/Grid/algorithms/LinearOperator.h index b1cf4d97..5096231d 100644 --- a/Grid/algorithms/LinearOperator.h +++ b/Grid/algorithms/LinearOperator.h @@ -526,6 +526,7 @@ public: (*this)(Linop,in[k],out[k]); } }; + virtual ~OperatorFunction(){}; }; template class LinearFunction { @@ -541,6 +542,7 @@ public: (*this)(in[i], out[i]); } } + virtual ~LinearFunction(){}; }; template class IdentityLinearFunction : public LinearFunction { diff --git a/Grid/algorithms/approx/Chebyshev.h b/Grid/algorithms/approx/Chebyshev.h index 7c93f0b8..1d6984f3 100644 --- a/Grid/algorithms/approx/Chebyshev.h +++ b/Grid/algorithms/approx/Chebyshev.h @@ -258,26 +258,12 @@ public: for(int n=2;nView(); - auto Tnp_v = Tnp->View(); - auto Tnm_v = Tnm->View(); - constexpr int Nsimd = vector_type::Nsimd(); - accelerator_for(ss, in.Grid()->oSites(), Nsimd, { - coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); - coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss)); - }); - if ( Coeffs[n] != 0.0) { - axpy(out,Coeffs[n],*Tnp,out); - } -#else axpby(y,xscale,mscale,y,(*Tn)); axpby(*Tnp,2.0,-1.0,y,(*Tnm)); if ( Coeffs[n] != 0.0) { axpy(out,Coeffs[n],*Tnp,out); } -#endif + // Cycle pointers to avoid copies Field *swizzle = Tnm; Tnm =Tn; diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index 14f3d306..3308d8fe 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -58,6 +58,7 @@ public: void operator()(LinearOperatorBase &Linop, const Field &src, Field &psi) { + GRID_TRACE("ConjugateGradient"); psi.Checkerboard() = src.Checkerboard(); conformable(psi, src); @@ -117,9 +118,13 @@ public: GridStopWatch MatrixTimer; GridStopWatch SolverTimer; + RealD usecs = -usecond(); SolverTimer.Start(); int k; for (k = 1; k <= MaxIterations; k++) { + + GridStopWatch IterationTimer; + IterationTimer.Start(); c = cp; MatrixTimer.Start(); @@ -152,31 +157,41 @@ public: LinearCombTimer.Stop(); LinalgTimer.Stop(); - std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k + IterationTimer.Stop(); + if ( (k % 500) == 0 ) { + std::cout << GridLogMessage << "ConjugateGradient: Iteration " << k << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl; + } else { + std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k + << " residual " << sqrt(cp/ssq) << " target " << Tolerance << " took " << IterationTimer.Elapsed() << std::endl; + } // Stopping condition if (cp <= rsq) { + usecs +=usecond(); SolverTimer.Stop(); Linop.HermOpAndNorm(psi, mmp, d, qq); p = mmp - src; - + GridBase *grid = src.Grid(); + RealD DwfFlops = (1452. )*grid->gSites()*4*k + + (8+4+8+4+4)*12*grid->gSites()*k; // CG linear algebra RealD srcnorm = std::sqrt(norm2(src)); RealD resnorm = std::sqrt(norm2(p)); RealD true_residual = resnorm / srcnorm; - std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << "\tComputed residual " << std::sqrt(cp / ssq) << "\tTrue residual " << true_residual << "\tTarget " << Tolerance << std::endl; - std::cout << GridLogIterative << "Time breakdown "< *guesser; @@ -68,6 +69,7 @@ NAMESPACE_BEGIN(Grid); } void operator() (const FieldD &src_d_in, FieldD &sol_d){ + std::cout << GridLogMessage << "MixedPrecisionConjugateGradient: Starting mixed precision CG with outer tolerance " << Tolerance << " and inner tolerance " << InnerTolerance << std::endl; TotalInnerIterations = 0; GridStopWatch TotalTimer; @@ -97,6 +99,7 @@ NAMESPACE_BEGIN(Grid); FieldF sol_f(SinglePrecGrid); sol_f.Checkerboard() = cb; + std::cout< CG_f(inner_tol, MaxInnerIterations); CG_f.ErrorOnNoConverge = false; @@ -105,7 +108,10 @@ NAMESPACE_BEGIN(Grid); GridStopWatch PrecChangeTimer; Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count - + + precisionChangeWorkspace pc_wk_sp_to_dp(DoublePrecGrid, SinglePrecGrid); + precisionChangeWorkspace pc_wk_dp_to_sp(SinglePrecGrid, DoublePrecGrid); + for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ //Compute double precision rsd and also new RHS vector. Linop_d.HermOp(sol_d, tmp_d); @@ -120,7 +126,7 @@ NAMESPACE_BEGIN(Grid); while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ?? PrecChangeTimer.Start(); - precisionChange(src_f, src_d); + precisionChange(src_f, src_d, pc_wk_dp_to_sp); PrecChangeTimer.Stop(); sol_f = Zero(); @@ -130,6 +136,7 @@ NAMESPACE_BEGIN(Grid); (*guesser)(src_f, sol_f); //Inner CG + std::cout< CG_d(Tolerance, MaxInnerIterations); CG_d(Linop_d, src_d_in, sol_d); TotalFinalStepIterations = CG_d.IterationsToComplete; + TrueResidual = CG_d.TrueResidual; TotalTimer.Stop(); std::cout< + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_BATCHED_H +#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_BATCHED_H + +NAMESPACE_BEGIN(Grid); + +//Mixed precision restarted defect correction CG +template::value == 2, int>::type = 0, + typename std::enable_if< getPrecision::value == 1, int>::type = 0> +class MixedPrecisionConjugateGradientBatched : public LinearFunction { +public: + using LinearFunction::operator(); + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + Integer MaxPatchupIterations; + GridBase* SinglePrecGrid; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + LinearOperatorBase &Linop_f; + LinearOperatorBase &Linop_d; + + //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess + LinearFunction *guesser; + bool updateResidual; + + MixedPrecisionConjugateGradientBatched(RealD tol, + Integer maxinnerit, + Integer maxouterit, + Integer maxpatchit, + GridBase* _sp_grid, + LinearOperatorBase &_Linop_f, + LinearOperatorBase &_Linop_d, + bool _updateResidual=true) : + Linop_f(_Linop_f), Linop_d(_Linop_d), + Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), MaxPatchupIterations(maxpatchit), SinglePrecGrid(_sp_grid), + OuterLoopNormMult(100.), guesser(NULL), updateResidual(_updateResidual) { }; + + void useGuesser(LinearFunction &g){ + guesser = &g; + } + + void operator() (const FieldD &src_d_in, FieldD &sol_d){ + std::vector srcs_d_in{src_d_in}; + std::vector sols_d{sol_d}; + + (*this)(srcs_d_in,sols_d); + + sol_d = sols_d[0]; + } + + void operator() (const std::vector &src_d_in, std::vector &sol_d){ + assert(src_d_in.size() == sol_d.size()); + int NBatch = src_d_in.size(); + + std::cout << GridLogMessage << "NBatch = " << NBatch << std::endl; + + Integer TotalOuterIterations = 0; //Number of restarts + std::vector TotalInnerIterations(NBatch,0); //Number of inner CG iterations + std::vector TotalFinalStepIterations(NBatch,0); //Number of CG iterations in final patch-up step + + GridStopWatch TotalTimer; + TotalTimer.Start(); + + GridStopWatch InnerCGtimer; + GridStopWatch PrecChangeTimer; + + int cb = src_d_in[0].Checkerboard(); + + std::vector src_norm; + std::vector norm; + std::vector stop; + + GridBase* DoublePrecGrid = src_d_in[0].Grid(); + FieldD tmp_d(DoublePrecGrid); + tmp_d.Checkerboard() = cb; + + FieldD tmp2_d(DoublePrecGrid); + tmp2_d.Checkerboard() = cb; + + std::vector src_d; + std::vector src_f; + std::vector sol_f; + + for (int i=0; i CG_f(inner_tol, MaxInnerIterations); + CG_f.ErrorOnNoConverge = false; + + Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count + + for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ + std::cout << GridLogMessage << std::endl; + std::cout << GridLogMessage << "Outer iteration " << outer_iter << std::endl; + + bool allConverged = true; + + for (int i=0; i OuterLoopNormMult * stop[i]) { + allConverged = false; + } + } + if (allConverged) break; + + if (updateResidual) { + RealD normMax = *std::max_element(std::begin(norm), std::end(norm)); + RealD stopMax = *std::max_element(std::begin(stop), std::end(stop)); + while( normMax * inner_tol * inner_tol < stopMax) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ?? + CG_f.Tolerance = inner_tol; + } + + //Optionally improve inner solver guess (eg using known eigenvectors) + if(guesser != NULL) { + (*guesser)(src_f, sol_f); + } + + for (int i=0; i CG_d(Tolerance, MaxPatchupIterations); + CG_d(Linop_d, src_d_in[i], sol_d[i]); + TotalFinalStepIterations[i] += CG_d.IterationsToComplete; + } + + TotalTimer.Stop(); + + std::cout << GridLogMessage << std::endl; + for (int i=0; i::operator(); - RealD Tolerance; + // RealD Tolerance; Integer MaxIterations; Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion std::vector IterationsToCompleteShift; // Iterations for this shift @@ -52,7 +52,7 @@ public: MultiShiftFunction shifts; std::vector TrueResidualShift; - ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) : + ConjugateGradientMultiShift(Integer maxit, const MultiShiftFunction &_shifts) : MaxIterations(maxit), shifts(_shifts) { @@ -84,6 +84,7 @@ public: void operator() (LinearOperatorBase &Linop, const Field &src, std::vector &psi) { + GRID_TRACE("ConjugateGradientMultiShift"); GridBase *grid = src.Grid(); @@ -182,6 +183,9 @@ public: for(int s=0;s +Author: Peter Boyle +Author: Christopher Kelly + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +//CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision. +//The residual is stored in single precision, but the search directions and solution are stored in double precision. +//Every update_freq iterations the residual is corrected in double precision. +//For safety the a final regular CG is applied to clean up if necessary + +//PB Pure single, then double fixup + +template::value == 2, int>::type = 0, + typename std::enable_if< getPrecision::value == 1, int>::type = 0> +class ConjugateGradientMultiShiftMixedPrecCleanup : public OperatorMultiFunction, + public OperatorFunction +{ +public: + + using OperatorFunction::operator(); + + RealD Tolerance; + Integer MaxIterationsMshift; + Integer MaxIterations; + Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion + std::vector IterationsToCompleteShift; // Iterations for this shift + int verbose; + MultiShiftFunction shifts; + std::vector TrueResidualShift; + + int ReliableUpdateFreq; //number of iterations between reliable updates + + GridBase* SinglePrecGrid; //Grid for single-precision fields + LinearOperatorBase &Linop_f; //single precision + + ConjugateGradientMultiShiftMixedPrecCleanup(Integer maxit, const MultiShiftFunction &_shifts, + GridBase* _SinglePrecGrid, LinearOperatorBase &_Linop_f, + int _ReliableUpdateFreq) : + MaxIterationsMshift(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq), + MaxIterations(20000) + { + verbose=1; + IterationsToCompleteShift.resize(_shifts.order); + TrueResidualShift.resize(_shifts.order); + } + + void operator() (LinearOperatorBase &Linop, const FieldD &src, FieldD &psi) + { + GridBase *grid = src.Grid(); + int nshift = shifts.order; + std::vector results(nshift,grid); + (*this)(Linop,src,results,psi); + } + void operator() (LinearOperatorBase &Linop, const FieldD &src, std::vector &results, FieldD &psi) + { + int nshift = shifts.order; + + (*this)(Linop,src,results); + + psi = shifts.norm*src; + for(int i=0;i &Linop_d, const FieldD &src_d, std::vector &psi_d) + { + GRID_TRACE("ConjugateGradientMultiShiftMixedPrecCleanup"); + GridBase *DoublePrecGrid = src_d.Grid(); + + //////////////////////////////////////////////////////////////////////// + // Convenience references to the info stored in "MultiShiftFunction" + //////////////////////////////////////////////////////////////////////// + int nshift = shifts.order; + + std::vector &mass(shifts.poles); // Make references to array in "shifts" + std::vector &mresidual(shifts.tolerances); + std::vector alpha(nshift,1.0); + + //Double precision search directions + FieldD p_d(DoublePrecGrid); + std::vector ps_f (nshift, SinglePrecGrid);// Search directions (single precision) + std::vector psi_f(nshift, SinglePrecGrid);// solutions (single precision) + + FieldD tmp_d(DoublePrecGrid); + FieldD r_d(DoublePrecGrid); + FieldF r_f(SinglePrecGrid); + FieldD mmp_d(DoublePrecGrid); + + assert(psi_d.size()==nshift); + assert(mass.size()==nshift); + assert(mresidual.size()==nshift); + + // dynamic sized arrays on stack; 2d is a pain with vector + RealD bs[nshift]; + RealD rsq[nshift]; + RealD rsqf[nshift]; + RealD z[nshift][2]; + int converged[nshift]; + + const int primary =0; + + //Primary shift fields CG iteration + RealD a,b,c,d; + RealD cp,bp,qq; //prev + + // Matrix mult fields + FieldF p_f(SinglePrecGrid); + FieldF mmp_f(SinglePrecGrid); + + // Check lightest mass + for(int s=0;s= mass[primary] ); + converged[s]=0; + } + + // Wire guess to zero + // Residuals "r" are src + // First search direction "p" is also src + cp = norm2(src_d); + + // Handle trivial case of zero src. + if( cp == 0. ){ + for(int s=0;s= rsq[s]){ + CleanupTimer.Start(); + std::cout< Linop_shift_d(Linop_d, mass[s]); + ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop Linop_shift_f(Linop_f, mass[s]); + + MixedPrecisionConjugateGradient cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d); + cg(src_d, psi_d[s]); + + TrueResidualShift[s] = cg.TrueResidual; + CleanupTimer.Stop(); + } + } + + std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrecCleanup: Time Breakdown for body"< +Author: Peter Boyle +Author: Christopher Kelly + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H +#define GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H + +NAMESPACE_BEGIN(Grid); + +//CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision. +//The residual is stored in single precision, but the search directions and solution are stored in double precision. +//Every update_freq iterations the residual is corrected in double precision. + +//For safety the a final regular CG is applied to clean up if necessary + +//Linop to add shift to input linop, used in cleanup CG +namespace ConjugateGradientMultiShiftMixedPrecSupport{ +template +class ShiftedLinop: public LinearOperatorBase{ +public: + LinearOperatorBase &linop_base; + RealD shift; + + ShiftedLinop(LinearOperatorBase &_linop_base, RealD _shift): linop_base(_linop_base), shift(_shift){} + + void OpDiag (const Field &in, Field &out){ assert(0); } + void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); } + void OpDirAll (const Field &in, std::vector &out){ assert(0); } + + void Op (const Field &in, Field &out){ assert(0); } + void AdjOp (const Field &in, Field &out){ assert(0); } + + void HermOp(const Field &in, Field &out){ + linop_base.HermOp(in, out); + axpy(out, shift, in, out); + } + + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + HermOp(in,out); + ComplexD dot = innerProduct(in,out); + n1=real(dot); + n2=norm2(out); + } +}; +}; + + +template::value == 2, int>::type = 0, + typename std::enable_if< getPrecision::value == 1, int>::type = 0> +class ConjugateGradientMultiShiftMixedPrec : public OperatorMultiFunction, + public OperatorFunction +{ +public: + + using OperatorFunction::operator(); + + RealD Tolerance; + Integer MaxIterationsMshift; + Integer MaxIterations; + Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion + std::vector IterationsToCompleteShift; // Iterations for this shift + int verbose; + MultiShiftFunction shifts; + std::vector TrueResidualShift; + + int ReliableUpdateFreq; //number of iterations between reliable updates + + GridBase* SinglePrecGrid; //Grid for single-precision fields + LinearOperatorBase &Linop_f; //single precision + + ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts, + GridBase* _SinglePrecGrid, LinearOperatorBase &_Linop_f, + int _ReliableUpdateFreq) : + MaxIterationsMshift(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq), + MaxIterations(20000) + { + verbose=1; + IterationsToCompleteShift.resize(_shifts.order); + TrueResidualShift.resize(_shifts.order); + } + + void operator() (LinearOperatorBase &Linop, const FieldD &src, FieldD &psi) + { + GridBase *grid = src.Grid(); + int nshift = shifts.order; + std::vector results(nshift,grid); + (*this)(Linop,src,results,psi); + } + void operator() (LinearOperatorBase &Linop, const FieldD &src, std::vector &results, FieldD &psi) + { + int nshift = shifts.order; + + (*this)(Linop,src,results); + + psi = shifts.norm*src; + for(int i=0;i &Linop_d, const FieldD &src_d, std::vector &psi_d) + { + GRID_TRACE("ConjugateGradientMultiShiftMixedPrec"); + GridBase *DoublePrecGrid = src_d.Grid(); + + precisionChangeWorkspace pc_wk_s_to_d(DoublePrecGrid,SinglePrecGrid); + precisionChangeWorkspace pc_wk_d_to_s(SinglePrecGrid,DoublePrecGrid); + + //////////////////////////////////////////////////////////////////////// + // Convenience references to the info stored in "MultiShiftFunction" + //////////////////////////////////////////////////////////////////////// + int nshift = shifts.order; + + std::vector &mass(shifts.poles); // Make references to array in "shifts" + std::vector &mresidual(shifts.tolerances); + std::vector alpha(nshift,1.0); + + //Double precision search directions + FieldD p_d(DoublePrecGrid); + std::vector ps_d(nshift, DoublePrecGrid);// Search directions (double precision) + + FieldD tmp_d(DoublePrecGrid); + FieldD r_d(DoublePrecGrid); + FieldD mmp_d(DoublePrecGrid); + + assert(psi_d.size()==nshift); + assert(mass.size()==nshift); + assert(mresidual.size()==nshift); + + // dynamic sized arrays on stack; 2d is a pain with vector + RealD bs[nshift]; + RealD rsq[nshift]; + RealD rsqf[nshift]; + RealD z[nshift][2]; + int converged[nshift]; + + const int primary =0; + + //Primary shift fields CG iteration + RealD a,b,c,d; + RealD cp,bp,qq; //prev + + // Matrix mult fields + FieldF p_f(SinglePrecGrid); + FieldF mmp_f(SinglePrecGrid); + + // Check lightest mass + for(int s=0;s= mass[primary] ); + converged[s]=0; + } + + // Wire guess to zero + // Residuals "r" are src + // First search direction "p" is also src + cp = norm2(src_d); + + // Handle trivial case of zero src. + if( cp == 0. ){ + for(int s=0;s= rsq[s]){ + CleanupTimer.Start(); + std::cout< Linop_shift_d(Linop_d, mass[s]); + ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop Linop_shift_f(Linop_f, mass[s]); + + MixedPrecisionConjugateGradient cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d); + cg(src_d, psi_d[s]); + + TrueResidualShift[s] = cg.TrueResidual; + CleanupTimer.Stop(); + } + } + + std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrec: Time Breakdown for body"< &Linop_f; LinearOperatorBase &Linop_d; GridBase* SinglePrecGrid; - RealD Delta; //reliable update parameter + RealD Delta; //reliable update parameter. A reliable update is performed when the residual drops by a factor of Delta relative to its value at the last update //Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single LinearOperatorBase *Linop_fallback; @@ -65,7 +65,9 @@ public: ErrorOnNoConverge(err_on_no_conv), DoFinalCleanup(true), Linop_fallback(NULL) - {}; + { + assert(Delta > 0. && Delta < 1. && "Expect 0 < Delta < 1"); + }; void setFallbackLinop(LinearOperatorBase &_Linop_fallback, const RealD _fallback_transition_tol){ Linop_fallback = &_Linop_fallback; @@ -73,6 +75,7 @@ public: } void operator()(const FieldD &src, FieldD &psi) { + GRID_TRACE("ConjugateGradientReliableUpdate"); LinearOperatorBase *Linop_f_use = &Linop_f; bool using_fallback = false; @@ -115,9 +118,12 @@ public: } //Single prec initialization + precisionChangeWorkspace pc_wk_sp_to_dp(src.Grid(), SinglePrecGrid); + precisionChangeWorkspace pc_wk_dp_to_sp(SinglePrecGrid, src.Grid()); + FieldF r_f(SinglePrecGrid); r_f.Checkerboard() = r.Checkerboard(); - precisionChange(r_f, r); + precisionChange(r_f, r, pc_wk_dp_to_sp); FieldF psi_f(r_f); psi_f = Zero(); @@ -133,7 +139,8 @@ public: GridStopWatch LinalgTimer; GridStopWatch MatrixTimer; GridStopWatch SolverTimer; - + GridStopWatch PrecChangeTimer; + SolverTimer.Start(); int k = 0; int l = 0; @@ -172,7 +179,9 @@ public: // Stopping condition if (cp <= rsq) { //Although not written in the paper, I assume that I have to add on the final solution - precisionChange(mmp, psi_f); + PrecChangeTimer.Start(); + precisionChange(mmp, psi_f, pc_wk_sp_to_dp); + PrecChangeTimer.Stop(); psi = psi + mmp; @@ -193,7 +202,10 @@ public: std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() < +Author: Yong-Chull Jang +Author: Chulwoo Jung + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_IRBL_H +#define GRID_IRBL_H + +#include //memset +#ifdef USE_LAPACK +#include +#endif + +#undef USE_LAPACK +#define Glog std::cout << GridLogMessage + +#ifdef GRID_CUDA +#include "cublas_v2.h" +#endif + +#if 0 +#define CUDA_COMPLEX cuDoubleComplex +#define CUDA_FLOAT double +#define MAKE_CUDA_COMPLEX make_cuDoubleComplex +#define CUDA_GEMM cublasZgemm +#else +#define CUDA_COMPLEX cuComplex +#define CUDA_FLOAT float +#define MAKE_CUDA_COMPLEX make_cuComplex +#define CUDA_GEMM cublasCgemm +#endif + +namespace Grid { + +//////////////////////////////////////////////////////////////////////////////// +// Helper class for sorting the evalues AND evectors by Field +// Use pointer swizzle on vectors SHOULD GET RID OF IT SOON! +//////////////////////////////////////////////////////////////////////////////// +template +class SortEigen { + private: + static bool less_lmd(RealD left,RealD right){ + return left > right; + } + static bool less_pair(std::pair& left, + std::pair& right){ + return left.first > (right.first); + } + + public: + void push(std::vector& lmd,std::vector& evec,int N) { + + //////////////////////////////////////////////////////////////////////// + // PAB: FIXME: VERY VERY VERY wasteful: takes a copy of the entire vector set. + // : The vector reorder should be done by pointer swizzle somehow + //////////////////////////////////////////////////////////////////////// + std::vector cpy(lmd.size(),evec[0].Grid()); + for(int i=0;i > emod(lmd.size()); + + for(int i=0;i(lmd[i],&cpy[i]); + + partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); + + typename std::vector >::iterator it = emod.begin(); + for(int i=0;ifirst; + evec[i]=*(it->second); + ++it; + } + } + void push(std::vector& lmd,int N) { + std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); + } + bool saturated(RealD lmd, RealD thrs) { + return fabs(lmd) > fabs(thrs); + } +}; + +enum class LanczosType { irbl, rbl }; + +enum IRBLdiagonalisation { + IRBLdiagonaliseWithDSTEGR, + IRBLdiagonaliseWithQR, + IRBLdiagonaliseWithEigen +}; + +///////////////////////////////////////////////////////////// +// Implicitly restarted block lanczos +///////////////////////////////////////////////////////////// +template +class ImplicitlyRestartedBlockLanczos { + +private: + + std::string cname = std::string("ImplicitlyRestartedBlockLanczos"); + int MaxIter; // Max iterations + int Nstop; // Number of evecs checked for convergence + int Nu; // Number of vecs in the unit block + int Nk; // Number of converged sought + int Nm; // total number of vectors + int Nblock_k; // Nk/Nu + int Nblock_m; // Nm/Nu + int Nconv_test_interval; // Number of skipped vectors when checking a convergence + RealD eresid; + IRBLdiagonalisation diagonalisation; + //////////////////////////////////// + // Embedded objects + //////////////////////////////////// + SortEigen _sort; + LinearOperatorBase &_Linop; + LinearOperatorBase &_SLinop;//for split + OperatorFunction &_poly; + GridRedBlackCartesian * f_grid; + GridRedBlackCartesian * sf_grid; + int mrhs; + ///////////////////////// + // BLAS objects + ///////////////////////// +#ifdef GRID_CUDA + cudaError_t cudaStat; + CUDA_COMPLEX *w_acc, *evec_acc, *c_acc; +#endif + int Nevec_acc; // Number of eigenvectors stored in the buffer evec_acc + + ///////////////////////// + // Constructor + ///////////////////////// +public: + int split_test; //test split in the first iteration + ImplicitlyRestartedBlockLanczos(LinearOperatorBase &Linop, // op + LinearOperatorBase &SLinop, // op + GridRedBlackCartesian * FrbGrid, + GridRedBlackCartesian * SFrbGrid, + int _mrhs, + OperatorFunction & poly, // polynomial + int _Nstop, // really sought vecs + int _Nconv_test_interval, // conv check interval + int _Nu, // vecs in the unit block + int _Nk, // sought vecs + int _Nm, // total vecs + RealD _eresid, // resid in lmd deficit + int _MaxIter, // Max iterations + IRBLdiagonalisation _diagonalisation = IRBLdiagonaliseWithEigen) + : _Linop(Linop), _SLinop(SLinop), _poly(poly),sf_grid(SFrbGrid),f_grid(FrbGrid), + Nstop(_Nstop), Nconv_test_interval(_Nconv_test_interval), mrhs(_mrhs), + Nu(_Nu), Nk(_Nk), Nm(_Nm), + Nblock_m(_Nm/_Nu), Nblock_k(_Nk/_Nu), + //eresid(_eresid), MaxIter(10), + eresid(_eresid), MaxIter(_MaxIter), + diagonalisation(_diagonalisation),split_test(0), + Nevec_acc(_Nu) + { assert( (Nk%Nu==0) && (Nm%Nu==0) ); }; + + //////////////////////////////// + // Helpers + //////////////////////////////// + static RealD normalize(Field& v, int if_print=0) + { + RealD nn = norm2(v); + nn = sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void orthogonalize(Field& w, std::vector& evec, int k, int if_print=0) + { + typedef typename Field::scalar_type MyComplex; +// MyComplex ip; + ComplexD ip; + + for(int j=0; j 1e-14) + Glog<<"orthogonalize before: "< 1e-14) + Glog<<"orthogonalize after: "<& evec, int k) + { + orthogonalize(w, evec, k,1); + } + + void orthogonalize(std::vector& w, int _Nu, std::vector& evec, int k, int if_print=0) + { + typedef typename Field::scalar_type MyComplex; + MyComplex ip; +// ComplexD ip; + + for(int j=0; j& w, std::vector& evec, int R, int do_print=0) + { +#ifdef GRID_CUDA + Glog << "cuBLAS orthogonalize" << std::endl; + + typedef typename Field::vector_object vobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + typedef typename Field::scalar_type MyComplex; + + GridBase *grid = w[0].Grid(); + const uint64_t sites = grid->lSites(); + + int Nbatch = R/Nevec_acc; + assert( R%Nevec_acc == 0 ); +// Glog << "nBatch, Nevec_acc, R, Nu = " +// << Nbatch << "," << Nevec_acc << "," << R << "," << Nu << std::endl; + + for (int col=0; col(&w_v[0]); +// Glog << "col= "<(&evec_v[0]); +// Glog << "col= "<& evec, int k, int Nu) + { + typedef typename Field::scalar_type MyComplex; + MyComplex ip; + + for(int j=0; j& eval, + std::vector& evec, + const std::vector& src, int& Nconv, LanczosType Impl) + { +#ifdef GRID_CUDA + GridBase *grid = src[0].Grid(); + grid->show_decomposition(); + +// printf("GRID_CUDA\n"); + + // set eigenvector buffers for the cuBLAS calls + //const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites = grid->lSites(); + + cudaStat = cudaMallocManaged((void **)&w_acc, Nu*sites*12*sizeof(CUDA_COMPLEX)); +// Glog << "w_acc= "<& eval, + std::vector& evec, + const std::vector& src, int& Nconv) + { + std::string fname = std::string(cname+"::calc_irbl()"); + GridBase *grid = evec[0].Grid(); + assert(grid == src[0].Grid()); + assert( Nu = src.size() ); + + Glog << std::string(74,'*') << std::endl; + Glog << fname + " starting iteration 0 / "<< MaxIter<< std::endl; + Glog << std::string(74,'*') << std::endl; + Glog <<" -- seek Nk = "<< Nk <<" vectors"<< std::endl; + Glog <<" -- accept Nstop = "<< Nstop <<" vectors"<< std::endl; + Glog <<" -- total Nm = "<< Nm <<" vectors"<< std::endl; + Glog <<" -- size of eval = "<< eval.size() << std::endl; + Glog <<" -- size of evec = "<< evec.size() << std::endl; + if ( diagonalisation == IRBLdiagonaliseWithEigen ) { + Glog << "Diagonalisation is Eigen "<< std::endl; +#ifdef USE_LAPACK + } else if ( diagonalisation == IRBLdiagonaliseWithLAPACK ) { + Glog << "Diagonalisation is LAPACK "<< std::endl; +#endif + } else { + abort(); + } + Glog << std::string(74,'*') << std::endl; + + assert(Nm == evec.size() && Nm == eval.size()); + + std::vector> lmd(Nu,std::vector(Nm,0.0)); + std::vector> lme(Nu,std::vector(Nm,0.0)); + std::vector> lmd2(Nu,std::vector(Nm,0.0)); + std::vector> lme2(Nu,std::vector(Nm,0.0)); + std::vector eval2(Nm); + std::vector resid(Nk); + + Eigen::MatrixXcd Qt = Eigen::MatrixXcd::Zero(Nm,Nm); + Eigen::MatrixXcd Q = Eigen::MatrixXcd::Zero(Nm,Nm); + + std::vector Iconv(Nm); + std::vector B(Nm,grid); // waste of space replicating + + std::vector f(Nu,grid); + std::vector f_copy(Nu,grid); + Field v(grid); + + Nconv = 0; + + RealD beta_k; + + // set initial vector + for (int i=0; i& eval, + std::vector& evec, + const std::vector& src, int& Nconv) + { + std::string fname = std::string(cname+"::calc_rbl()"); + GridBase *grid = evec[0].Grid(); + assert(grid == src[0].Grid()); + assert( Nu = src.size() ); + + int Np = (Nm-Nk); + if (Np > 0 && MaxIter > 1) Np /= MaxIter; + int Nblock_p = Np/Nu; + for(int i=0;i< evec.size();i++) evec[0].Advise()=AdviseInfrequentUse; + + Glog << std::string(74,'*') << std::endl; + Glog << fname + " starting iteration 0 / "<< MaxIter<< std::endl; + Glog << std::string(74,'*') << std::endl; + Glog <<" -- seek (min) Nk = "<< Nk <<" vectors"<< std::endl; + Glog <<" -- seek (inc) Np = "<< Np <<" vectors"<< std::endl; + Glog <<" -- seek (max) Nm = "<< Nm <<" vectors"<< std::endl; + Glog <<" -- accept Nstop = "<< Nstop <<" vectors"<< std::endl; + Glog <<" -- size of eval = "<< eval.size() << std::endl; + Glog <<" -- size of evec = "<< evec.size() << std::endl; + if ( diagonalisation == IRBLdiagonaliseWithEigen ) { + Glog << "Diagonalisation is Eigen "<< std::endl; +#ifdef USE_LAPACK + } else if ( diagonalisation == IRBLdiagonaliseWithLAPACK ) { + Glog << "Diagonalisation is LAPACK "<< std::endl; +#endif + } else { + abort(); + } + Glog << std::string(74,'*') << std::endl; + + assert(Nm == evec.size() && Nm == eval.size()); + + std::vector> lmd(Nu,std::vector(Nm,0.0)); + std::vector> lme(Nu,std::vector(Nm,0.0)); + std::vector> lmd2(Nu,std::vector(Nm,0.0)); + std::vector> lme2(Nu,std::vector(Nm,0.0)); + std::vector eval2(Nk); + std::vector resid(Nm); + + Eigen::MatrixXcd Qt = Eigen::MatrixXcd::Zero(Nm,Nm); + Eigen::MatrixXcd Q = Eigen::MatrixXcd::Zero(Nm,Nm); + + std::vector Iconv(Nm); +// int Ntest=Nu; +// std::vector B(Nm,grid); // waste of space replicating + std::vector B(1,grid); // waste of space replicating + + std::vector f(Nu,grid); + std::vector f_copy(Nu,grid); + Field v(grid); + + Nconv = 0; + +// RealD beta_k; + + // set initial vector + for (int i=0; i Btmp(Nstop,grid); // waste of space replicating + + for(int i=0; i>& lmd, + std::vector>& lme, + std::vector& evec, + std::vector& w, + std::vector& w_copy, + int b) + { + const RealD tiny = 1.0e-20; + + int Nu = w.size(); + int Nm = evec.size(); + assert( b < Nm/Nu ); +// GridCartesian *grid = evec[0]._grid; + + // converts block index to full indicies for an interval [L,R) + int L = Nu*b; + int R = Nu*(b+1); + + Real beta; + + Glog << "Using split grid"<< std::endl; +// LatticeGaugeField s_Umu(SGrid); + assert((Nu%mrhs)==0); + std::vector in(mrhs,f_grid); + + Field s_in(sf_grid); + Field s_out(sf_grid); + // unnecessary copy. Can or should it be avoided? + int k_start = 0; +while ( k_start < Nu) { + Glog << "k_start= "<0) { + for (int u=0; u& eval, + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nk, int Nm, + Eigen::MatrixXcd & Qt, // Nm x Nm + GridBase *grid) + { + assert( Nk%Nu == 0 && Nm%Nu == 0 ); + assert( Nk <= Nm ); + Eigen::MatrixXcd BlockTriDiag = Eigen::MatrixXcd::Zero(Nk,Nk); + + for ( int u=0; u eigensolver(BlockTriDiag); + + for (int i = 0; i < Nk; i++) { + eval[Nk-1-i] = eigensolver.eigenvalues()(i); + } + for (int i = 0; i < Nk; i++) { + for (int j = 0; j < Nk; j++) { + Qt(j,Nk-1-i) = eigensolver.eigenvectors()(j,i); + //Qt(Nk-1-i,j) = eigensolver.eigenvectors()(i,j); + //Qt(i,j) = eigensolver.eigenvectors()(i,j); + } + } + } + +#ifdef USE_LAPACK + void diagonalize_lapack(std::vector& eval, + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nk, int Nm, + Eigen::MatrixXcd & Qt, // Nm x Nm + GridBase *grid) + { + Glog << "diagonalize_lapack: Nu= "<_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + MKL_INT il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + Glog << "node "<= il-1; i--){ + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + for (int j = 0; j< NN; j++){ + evec_tmp[i*NN+j] = evec_tmp[(i - (il-1))*NN+j]; + if (il>1) { + evec_tmp[(i-(il-1))*NN+j].imag=0.; + evec_tmp[(i-(il-1))*NN+j].real=0.; + } + } + } + } + { + grid->GlobalSumVector(evals_tmp,NN); + grid->GlobalSumVector((double*)evec_tmp,2*NN*NN); + } + } + for (int i = 0; i < Nk; i++) + eval[Nk-1-i] = evals_tmp[i]; + for (int i = 0; i < Nk; i++) { + for (int j = 0; j < Nk; j++) { +// Qt(j,Nk-1-i) = eigensolver.eigenvectors()(j,i); + Qt(j,Nk-1-i)=std::complex + ( evec_tmp[i*Nk+j].real, + evec_tmp[i*Nk+j].imag); +// ( evec_tmp[(Nk-1-j)*Nk+Nk-1-i].real, +// evec_tmp[(Nk-1-j)*Nk+Nk-1-i].imag); + + } + } + +if (1){ + Eigen::SelfAdjointEigenSolver eigensolver(BlockTriDiag); + + for (int i = 0; i < Nk; i++) { + Glog << "eval = "<& eval, + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nk, int Nm, + Eigen::MatrixXcd & Qt, + GridBase *grid) + { + Qt = Eigen::MatrixXcd::Identity(Nm,Nm); + if ( diagonalisation == IRBLdiagonaliseWithEigen ) { + diagonalize_Eigen(eval,lmd,lme,Nu,Nk,Nm,Qt,grid); +#ifdef USE_LAPACK + } else if ( diagonalisation == IRBLdiagonaliseWithLAPACK ) { + diagonalize_lapack(eval,lmd,lme,Nu,Nk,Nm,Qt,grid); +#endif + } else { + assert(0); + } + } + + + void unpackHermitBlockTriDiagMatToEigen( + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nb, int Nk, int Nm, + Eigen::MatrixXcd& M) + { + //Glog << "unpackHermitBlockTriDiagMatToEigen() begin" << '\n'; + assert( Nk%Nu == 0 && Nm%Nu == 0 ); + assert( Nk <= Nm ); + M = Eigen::MatrixXcd::Zero(Nk,Nk); + + // rearrange + for ( int u=0; u>& lmd, + std::vector>& lme, + int Nu, int Nb, int Nk, int Nm, + Eigen::MatrixXcd& M) + { + //Glog << "packHermitBlockTriDiagMatfromEigen() begin" << '\n'; + assert( Nk%Nu == 0 && Nm%Nu == 0 ); + assert( Nk <= Nm ); + + // rearrange + for ( int u=0; u QRD(Mtmp); + Q = QRD.householderQ(); + R = QRD.matrixQR(); // upper triangular part is the R matrix. + // lower triangular part used to represent series + // of Q sequence. + + // equivalent operation of Qprod *= Q + //M = Eigen::MatrixXcd::Zero(Nm,Nm); + + //for (int i=0; i Nm) kmax = Nm; + for (int k=i; ki) M(i,j) = conj(M(j,i)); + // if (i-j > Nu || j-i > Nu) M(i,j) = 0.; + // } + //} + + //Glog << "shiftedQRDecompEigen() end" << endl; + } + + void exampleQRDecompEigen(void) + { + Eigen::MatrixXd A = Eigen::MatrixXd::Zero(3,3); + Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(3,3); + Eigen::MatrixXd R = Eigen::MatrixXd::Zero(3,3); + Eigen::MatrixXd P = Eigen::MatrixXd::Zero(3,3); + + A(0,0) = 12.0; + A(0,1) = -51.0; + A(0,2) = 4.0; + A(1,0) = 6.0; + A(1,1) = 167.0; + A(1,2) = -68.0; + A(2,0) = -4.0; + A(2,1) = 24.0; + A(2,2) = -41.0; + + Glog << "matrix A before ColPivHouseholder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + + Eigen::ColPivHouseholderQR QRD(A); + + Glog << "matrix A after ColPivHouseholder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "HouseholderQ with sequence lenth = nonzeroPiviots" << std::endl; + Q = QRD.householderQ().setLength(QRD.nonzeroPivots()); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "HouseholderQ with sequence lenth = 1" << std::endl; + Q = QRD.householderQ().setLength(1); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "HouseholderQ with sequence lenth = 2" << std::endl; + Q = QRD.householderQ().setLength(2); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "matrixR" << std::endl; + R = QRD.matrixR(); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "R[" << i << "," << j << "] = " << R(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "rank = " << QRD.rank() << std::endl; + Glog << "threshold = " << QRD.threshold() << std::endl; + + Glog << "matrixP" << std::endl; + P = QRD.colsPermutation(); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "P[" << i << "," << j << "] = " << P(i,j) << '\n'; + } + } + Glog << std::endl; + + + Glog << "QR decomposition without column pivoting" << std::endl; + + A(0,0) = 12.0; + A(0,1) = -51.0; + A(0,2) = 4.0; + A(1,0) = 6.0; + A(1,1) = 167.0; + A(1,2) = -68.0; + A(2,0) = -4.0; + A(2,1) = 24.0; + A(2,2) = -41.0; + + Glog << "matrix A before Householder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + + Eigen::HouseholderQR QRDplain(A); + + Glog << "HouseholderQ" << std::endl; + Q = QRDplain.householderQ(); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "matrix A after Householder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + } + + }; +} +#undef Glog +#undef USE_LAPACK +#undef CUDA_COMPLEX +#undef CUDA_FLOAT +#undef MAKE_CUDA_COMPLEX +#undef CUDA_GEMM +#endif diff --git a/Grid/algorithms/iterative/LocalCoherenceLanczos.h b/Grid/algorithms/iterative/LocalCoherenceLanczos.h index dc82134a..344a785a 100644 --- a/Grid/algorithms/iterative/LocalCoherenceLanczos.h +++ b/Grid/algorithms/iterative/LocalCoherenceLanczos.h @@ -44,6 +44,7 @@ public: int, MinRes); // Must restart }; +//This class is the input parameter class for some testing programs struct LocalCoherenceLanczosParams : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams, @@ -145,16 +146,24 @@ public: LinearOperatorBase &_Linop; RealD _coarse_relax_tol; std::vector &_subspace; + + int _largestEvalIdxForReport; //The convergence of the LCL is based on the evals of the coarse grid operator, not those of the underlying fine grid operator + //As a result we do not know what the eval range of the fine operator is until the very end, making tuning the Cheby bounds very difficult + //To work around this issue, every restart we separately reconstruct the fine operator eval for the lowest and highest evec and print these + //out alongside the evals of the coarse operator. To do so we need to know the index of the largest eval (i.e. Nstop-1) + //NOTE: If largestEvalIdxForReport=-1 (default) then this is not performed ImplicitlyRestartedLanczosSmoothedTester(LinearFunction &Poly, OperatorFunction &smoother, LinearOperatorBase &Linop, std::vector &subspace, - RealD coarse_relax_tol=5.0e3) + RealD coarse_relax_tol=5.0e3, + int largestEvalIdxForReport=-1) : _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace), - _coarse_relax_tol(coarse_relax_tol) + _coarse_relax_tol(coarse_relax_tol), _largestEvalIdxForReport(largestEvalIdxForReport) { }; + //evalMaxApprox: approximation of largest eval of the fine Chebyshev operator (suitably wrapped by block projection) int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) { CoarseField v(B); @@ -177,12 +186,26 @@ public: <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv < nbasis ) eresid = eresid*_coarse_relax_tol; + std::cout.precision(13); std::cout< nbasis ) eresid = eresid*_coarse_relax_tol; if( (vv on the coarse grid. This function orthnormalizes the fine-grid subspace + //vectors under the block inner product. This step must be performed after computing the fine grid + //eigenvectors and before computing the coarse grid eigenvectors. void Orthogonalise(void ) { CoarseScalar InnerProd(_CoarseGrid); std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"< Cheby(cheby_op); - ProjectedHermOp Op(_FineOp,subspace); - ProjectedFunctionHermOp ChebyOp (Cheby,_FineOp,subspace); + Chebyshev Cheby(cheby_op); //Chebyshev of fine operator on fine grid + ProjectedHermOp Op(_FineOp,subspace); //Fine operator on coarse grid with intermediate fine grid conversion + ProjectedFunctionHermOp ChebyOp (Cheby,_FineOp,subspace); //Chebyshev of fine operator on coarse grid with intermediate fine grid conversion ////////////////////////////////////////////////////////////////////////////////////////////////// // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL ////////////////////////////////////////////////////////////////////////////////////////////////// - Chebyshev ChebySmooth(cheby_smooth); - ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax); + Chebyshev ChebySmooth(cheby_smooth); //lower order Chebyshev of fine operator on fine grid used to smooth regenerated eigenvectors + ImplicitlyRestartedLanczosSmoothedTester ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax,Nstop-1); evals_coarse.resize(Nm); evec_coarse.resize(Nm,_CoarseGrid); CoarseField src(_CoarseGrid); src=1.0; + //Note the "tester" here is also responsible for generating the fine grid eigenvalues which are output into the "evals_coarse" array ImplicitlyRestartedLanczos IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); int Nconv=0; IRL.calc(evals_coarse,evec_coarse,src,Nconv,false); @@ -405,6 +440,14 @@ public: std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl; } } + + //Get the fine eigenvector 'i' by reconstruction + void getFineEvecEval(FineField &evec, RealD &eval, const int i) const{ + blockPromote(evec_coarse[i],evec,subspace); + eval = evals_coarse[i]; + } + + }; NAMESPACE_END(Grid); diff --git a/Grid/algorithms/iterative/PowerMethod.h b/Grid/algorithms/iterative/PowerMethod.h index 6aa8e923..027ea68c 100644 --- a/Grid/algorithms/iterative/PowerMethod.h +++ b/Grid/algorithms/iterative/PowerMethod.h @@ -29,6 +29,8 @@ template class PowerMethod RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. RealD vden = norm2(src_n); RealD na = vnum/vden; + + std::cout << GridLogIterative << "PowerMethod: Current approximation of largest eigenvalue " << na << std::endl; if ( (fabs(evalMaxApprox/na - 1.0) < 0.001) || (i==_MAX_ITER_EST_-1) ) { evalMaxApprox = na; diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc index d055898f..a9e5c9b4 100644 --- a/Grid/allocator/MemoryManager.cc +++ b/Grid/allocator/MemoryManager.cc @@ -4,11 +4,14 @@ NAMESPACE_BEGIN(Grid); /*Allocation types, saying which pointer cache should be used*/ #define Cpu (0) -#define CpuSmall (1) -#define Acc (2) -#define AccSmall (3) -#define Shared (4) -#define SharedSmall (5) +#define CpuHuge (1) +#define CpuSmall (2) +#define Acc (3) +#define AccHuge (4) +#define AccSmall (5) +#define Shared (6) +#define SharedHuge (7) +#define SharedSmall (8) #undef GRID_MM_VERBOSE uint64_t total_shared; uint64_t total_device; @@ -35,12 +38,15 @@ void MemoryManager::PrintBytes(void) } +uint64_t MemoryManager::DeviceCacheBytes() { return CacheBytes[Acc] + CacheBytes[AccHuge] + CacheBytes[AccSmall]; } +uint64_t MemoryManager::HostCacheBytes() { return CacheBytes[Cpu] + CacheBytes[CpuHuge] + CacheBytes[CpuSmall]; } + ////////////////////////////////////////////////////////////////////// // Data tables for recently freed pooiniter caches ////////////////////////////////////////////////////////////////////// MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax]; int MemoryManager::Victim[MemoryManager::NallocType]; -int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 8, 16, 8, 16 }; +int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 0, 8, 8, 0, 16, 8, 0, 16 }; uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType]; ////////////////////////////////////////////////////////////////////// // Actual allocation and deallocation utils @@ -170,6 +176,16 @@ void MemoryManager::Init(void) } } + str= getenv("GRID_ALLOC_NCACHE_HUGE"); + if ( str ) { + Nc = atoi(str); + if ( (Nc>=0) && (Nc < NallocCacheMax)) { + Ncache[CpuHuge]=Nc; + Ncache[AccHuge]=Nc; + Ncache[SharedHuge]=Nc; + } + } + str= getenv("GRID_ALLOC_NCACHE_SMALL"); if ( str ) { Nc = atoi(str); @@ -190,7 +206,9 @@ void MemoryManager::InitMessage(void) { std::cout << GridLogMessage<< "MemoryManager::Init() setting up"<= GRID_ALLOC_HUGE_LIMIT) cache = type + 1; + else cache = type; + return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache],CacheBytes[cache]); #else return ptr; @@ -232,11 +253,12 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,int type) void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim, uint64_t &cacheBytes) { - assert(ncache>0); #ifdef GRID_OMP assert(omp_in_parallel()==0); #endif + if (ncache == 0) return ptr; + void * ret = NULL; int v = -1; @@ -271,8 +293,11 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries void *MemoryManager::Lookup(size_t bytes,int type) { #ifdef ALLOCATION_CACHE - bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); - int cache = type+small; + int cache; + if (bytes < GRID_ALLOC_SMALL_LIMIT) cache = type + 2; + else if (bytes >= GRID_ALLOC_HUGE_LIMIT) cache = type + 1; + else cache = type; + return Lookup(bytes,Entries[cache],Ncache[cache],CacheBytes[cache]); #else return NULL; @@ -281,7 +306,6 @@ void *MemoryManager::Lookup(size_t bytes,int type) void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t & cacheBytes) { - assert(ncache>0); #ifdef GRID_OMP assert(omp_in_parallel()==0); #endif diff --git a/Grid/allocator/MemoryManager.h b/Grid/allocator/MemoryManager.h index c22a54f3..0dc78f04 100644 --- a/Grid/allocator/MemoryManager.h +++ b/Grid/allocator/MemoryManager.h @@ -35,6 +35,7 @@ NAMESPACE_BEGIN(Grid); // Move control to configure.ac and Config.h? #define GRID_ALLOC_SMALL_LIMIT (4096) +#define GRID_ALLOC_HUGE_LIMIT (2147483648) #define STRINGIFY(x) #x #define TOSTRING(x) STRINGIFY(x) @@ -70,6 +71,21 @@ enum ViewMode { CpuWriteDiscard = 0x10 // same for now }; +struct MemoryStatus { + uint64_t DeviceBytes; + uint64_t DeviceLRUBytes; + uint64_t DeviceMaxBytes; + uint64_t HostToDeviceBytes; + uint64_t DeviceToHostBytes; + uint64_t HostToDeviceXfer; + uint64_t DeviceToHostXfer; + uint64_t DeviceEvictions; + uint64_t DeviceDestroy; + uint64_t DeviceAllocCacheBytes; + uint64_t HostAllocCacheBytes; +}; + + class MemoryManager { private: @@ -83,7 +99,7 @@ private: } AllocationCacheEntry; static const int NallocCacheMax=128; - static const int NallocType=6; + static const int NallocType=9; static AllocationCacheEntry Entries[NallocType][NallocCacheMax]; static int Victim[NallocType]; static int Ncache[NallocType]; @@ -121,7 +137,26 @@ private: static uint64_t DeviceToHostXfer; static uint64_t DeviceEvictions; static uint64_t DeviceDestroy; - + + static uint64_t DeviceCacheBytes(); + static uint64_t HostCacheBytes(); + + static MemoryStatus GetFootprint(void) { + MemoryStatus stat; + stat.DeviceBytes = DeviceBytes; + stat.DeviceLRUBytes = DeviceLRUBytes; + stat.DeviceMaxBytes = DeviceMaxBytes; + stat.HostToDeviceBytes = HostToDeviceBytes; + stat.DeviceToHostBytes = DeviceToHostBytes; + stat.HostToDeviceXfer = HostToDeviceXfer; + stat.DeviceToHostXfer = DeviceToHostXfer; + stat.DeviceEvictions = DeviceEvictions; + stat.DeviceDestroy = DeviceDestroy; + stat.DeviceAllocCacheBytes = DeviceCacheBytes(); + stat.HostAllocCacheBytes = HostCacheBytes(); + return stat; + }; + private: #ifndef GRID_UVM ////////////////////////////////////////////////////////////////////// diff --git a/Grid/allocator/MemoryManagerCache.cc b/Grid/allocator/MemoryManagerCache.cc index bae184ec..e758ac2f 100644 --- a/Grid/allocator/MemoryManagerCache.cc +++ b/Grid/allocator/MemoryManagerCache.cc @@ -144,8 +144,8 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache) mprintf("MemoryManager: Evict cpu %lx acc %lx cpuLock %ld accLock %ld\n", (uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr, (uint64_t)AccCache.cpuLock,(uint64_t)AccCache.accLock); - assert(AccCache.accLock==0); // Cannot evict so logic bomb - assert(AccCache.CpuPtr!=(uint64_t)NULL); + if (AccCache.accLock!=0) return; + if (AccCache.cpuLock!=0) return; if(AccCache.state==AccDirty) { Flush(AccCache); } @@ -519,7 +519,6 @@ void MemoryManager::Audit(std::string s) uint64_t LruBytes1=0; uint64_t LruBytes2=0; uint64_t LruCnt=0; - uint64_t LockedBytes=0; std::cout << " Memory Manager::Audit() from "<second; @@ -548,6 +548,7 @@ void MemoryManager::Audit(std::string s) if ( AccCache.cpuLock || AccCache.accLock ) { assert(AccCache.LRU_valid==0); + std::cout << GridLogError << s<< "\n\t 0x"< communicator_halo; @@ -97,14 +98,16 @@ public: int BossRank(void) ; int ThisRank(void) ; const Coordinate & ThisProcessorCoor(void) ; + const Coordinate & ShmGrid(void) { return _shm_processors; } ; const Coordinate & ProcessorGrid(void) ; - int ProcessorCount(void) ; + int ProcessorCount(void) ; //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid //////////////////////////////////////////////////////////////////////////////// static int RankWorld(void) ; static void BroadcastWorld(int root,void* data, int bytes); + static void BarrierWorld(void); //////////////////////////////////////////////////////////// // Reduction @@ -128,7 +131,7 @@ public: template void GlobalSum(obj &o){ typedef typename obj::scalar_type scalar_type; int words = sizeof(obj)/sizeof(scalar_type); - scalar_type * ptr = (scalar_type *)& o; + scalar_type * ptr = (scalar_type *)& o; // Safe alias GlobalSumVector(ptr,words); } @@ -142,17 +145,17 @@ public: int bytes); double StencilSendToRecvFrom(void *xmit, - int xmit_to_rank, + int xmit_to_rank,int do_xmit, void *recv, - int recv_from_rank, + int recv_from_rank,int do_recv, int bytes,int dir); double StencilSendToRecvFromBegin(std::vector &list, void *xmit, - int xmit_to_rank, + int xmit_to_rank,int do_xmit, void *recv, - int recv_from_rank, - int bytes,int dir); + int recv_from_rank,int do_recv, + int xbytes,int rbytes,int dir); void StencilSendToRecvFromComplete(std::vector &waitall,int i); diff --git a/Grid/communicator/Communicator_mpi3.cc b/Grid/communicator/Communicator_mpi3.cc index b8ce7bca..e7d7a96d 100644 --- a/Grid/communicator/Communicator_mpi3.cc +++ b/Grid/communicator/Communicator_mpi3.cc @@ -106,7 +106,7 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors) // Remap using the shared memory optimising routine // The remap creates a comm which must be freed //////////////////////////////////////////////////// - GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); + GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm,_shm_processors); InitFromMPICommunicator(processors,optimal_comm); SetCommunicator(optimal_comm); /////////////////////////////////////////////////// @@ -124,12 +124,13 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension); Coordinate parent_processor_coor(_ndimension,0); Coordinate parent_processors (_ndimension,1); - + Coordinate shm_processors (_ndimension,1); // Can make 5d grid from 4d etc... int pad = _ndimension-parent_ndimension; for(int d=0;d list; - double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir); + double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,dox,recv,from,dor,bytes,bytes,dir); StencilSendToRecvFromComplete(list,dir); return offbytes; } double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, void *xmit, - int dest, + int dest,int dox, void *recv, - int from, - int bytes,int dir) + int from,int dor, + int xbytes,int rbytes,int dir) { int ncomm =communicator_halo.size(); int commdir=dir%ncomm; @@ -370,39 +372,34 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vectorShmBufferTranslate(dest,recv); - assert(shm!=NULL); - // std::cout <<"acceleratorCopyDeviceToDeviceAsynch"<< std::endl; - acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes); + + if (dox) { + if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) { + tag= dir+_processor*32; + ierr =MPI_Isend(xmit, xbytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq); + assert(ierr==0); + list.push_back(xrq); + off_node_bytes+=xbytes; + } else { + void *shm = (void *) this->ShmBufferTranslate(dest,recv); + assert(shm!=NULL); + acceleratorCopyDeviceToDeviceAsynch(xmit,shm,xbytes); + } } - // if ( CommunicatorPolicy == CommunicatorPolicySequential ) { - // this->StencilSendToRecvFromComplete(list,dir); - // } - return off_node_bytes; } void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &list,int dir) { - // std::cout << "Copy Synchronised\n"<=1); _processor_coor.resize(_ndimension); @@ -102,6 +104,7 @@ int CartesianCommunicator::RankWorld(void){return 0;} void CartesianCommunicator::Barrier(void){} void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {} void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { } +void CartesianCommunicator::BarrierWorld(void) { } int CartesianCommunicator::RankFromProcessorCoor(Coordinate &coor) { return 0;} void CartesianCommunicator::ProcessorCoorFromRank(int rank, Coordinate &coor){ coor = _processor_coor; } void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) @@ -111,21 +114,21 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest } double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, - int xmit_to_rank, + int xmit_to_rank,int dox, void *recv, - int recv_from_rank, + int recv_from_rank,int dor, int bytes, int dir) { return 2.0*bytes; } double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, void *xmit, - int xmit_to_rank, + int xmit_to_rank,int dox, void *recv, - int recv_from_rank, - int bytes, int dir) + int recv_from_rank,int dor, + int xbytes,int rbytes, int dir) { - return 2.0*bytes; + return xbytes+rbytes; } void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall,int dir) { diff --git a/Grid/communicator/SharedMemory.cc b/Grid/communicator/SharedMemory.cc index de10da3d..ec42dd87 100644 --- a/Grid/communicator/SharedMemory.cc +++ b/Grid/communicator/SharedMemory.cc @@ -91,6 +91,59 @@ void *SharedMemory::ShmBufferSelf(void) //std::cerr << "ShmBufferSelf "< IntShmDims; + GridCmdOptionIntVector(std::string(str),IntShmDims); + assert(IntShmDims.size() == WorldDims.size()); + long ShmSize = 1; + for (int dim=0;dim primes({2,3,5}); + + int dim = 0; + int last_dim = ndimension - 1; + int AutoShmSize = 1; + while(AutoShmSize != WorldShmSize) { + int p; + for(p=0;p *************************************************************************************/ /* END LEGAL */ +#define header "SharedMemoryMpi: " + #include #include @@ -36,12 +38,120 @@ Author: Christoph Lehner #ifdef GRID_HIP #include #endif -#ifdef GRID_SYCl - +#ifdef GRID_SYCL +#define GRID_SYCL_LEVEL_ZERO_IPC +#include +#define SHM_SOCKETS #endif +#include +#include + NAMESPACE_BEGIN(Grid); -#define header "SharedMemoryMpi: " + +#ifdef SHM_SOCKETS + +/* + * Barbaric extra intranode communication route in case we need sockets to pass FDs + * Forced by level_zero not being nicely designed + */ +static int sock; +static const char *sock_path_fmt = "/tmp/GridUnixSocket.%d"; +static char sock_path[256]; +class UnixSockets { +public: + static void Open(int rank) + { + int errnum; + + sock = socket(AF_UNIX, SOCK_DGRAM, 0); assert(sock>0); + + struct sockaddr_un sa_un = { 0 }; + sa_un.sun_family = AF_UNIX; + snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,rank); + unlink(sa_un.sun_path); + if (bind(sock, (struct sockaddr *)&sa_un, sizeof(sa_un))) { + perror("bind failure"); + exit(EXIT_FAILURE); + } + } + + static int RecvFileDescriptor(void) + { + int n; + int fd; + char buf[1]; + struct iovec iov; + struct msghdr msg; + struct cmsghdr *cmsg; + char cms[CMSG_SPACE(sizeof(int))]; + + iov.iov_base = buf; + iov.iov_len = 1; + + memset(&msg, 0, sizeof msg); + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = (caddr_t)cms; + msg.msg_controllen = sizeof cms; + + if((n=recvmsg(sock, &msg, 0)) < 0) { + perror("recvmsg failed"); + return -1; + } + if(n == 0){ + perror("recvmsg returned 0"); + return -1; + } + cmsg = CMSG_FIRSTHDR(&msg); + + memmove(&fd, CMSG_DATA(cmsg), sizeof(int)); + + return fd; + } + + static void SendFileDescriptor(int fildes,int xmit_to_rank) + { + struct msghdr msg; + struct iovec iov; + struct cmsghdr *cmsg = NULL; + char ctrl[CMSG_SPACE(sizeof(int))]; + char data = ' '; + + memset(&msg, 0, sizeof(struct msghdr)); + memset(ctrl, 0, CMSG_SPACE(sizeof(int))); + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + sprintf(sock_path,sock_path_fmt,xmit_to_rank); + + struct sockaddr_un sa_un = { 0 }; + sa_un.sun_family = AF_UNIX; + snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,xmit_to_rank); + + msg.msg_name = (void *)&sa_un; + msg.msg_namelen = sizeof(sa_un); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_controllen = CMSG_SPACE(sizeof(int)); + msg.msg_control = ctrl; + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + + *((int *) CMSG_DATA(cmsg)) = fildes; + + sendmsg(sock, &msg, 0); + }; +}; +#endif + + /*Construct from an MPI communicator*/ void GlobalSharedMemory::Init(Grid_MPI_Comm comm) { @@ -152,7 +262,7 @@ int Log2Size(int TwoToPower,int MAXLOG2) } return log2size; } -void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm) +void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM) { ////////////////////////////////////////////////////////////////////////////// // Look and see if it looks like an HPE 8600 based on hostname conventions @@ -165,63 +275,11 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M gethostname(name,namelen); int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ; - if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm); - else OptimalCommunicatorSharedMemory(processors,optimal_comm); + if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm,SHM); + else OptimalCommunicatorSharedMemory(processors,optimal_comm,SHM); } -static inline int divides(int a,int b) -{ - return ( b == ( (b/a)*a ) ); -} -void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims) -{ - //////////////////////////////////////////////////////////////// - // Allow user to configure through environment variable - //////////////////////////////////////////////////////////////// - char* str = getenv(("GRID_SHM_DIMS_" + std::to_string(ShmDims.size())).c_str()); - if ( str ) { - std::vector IntShmDims; - GridCmdOptionIntVector(std::string(str),IntShmDims); - assert(IntShmDims.size() == WorldDims.size()); - long ShmSize = 1; - for (int dim=0;dim primes({2,3,5}); - - int dim = 0; - int last_dim = ndimension - 1; - int AutoShmSize = 1; - while(AutoShmSize != WorldShmSize) { - int p; - for(p=0;p(theGridAccelerator->get_device()); auto zeContext = cl::sycl::get_native(theGridAccelerator->get_context()); ze_ipc_mem_handle_t ihandle; clone_mem_t handle; - + if ( r==WorldShmRank ) { auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle); if ( err != ZE_RESULT_SUCCESS ) { - std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "< void Scatter_plane_merge(Lattice &rhs,ExtractPointerA } } +#if (defined(GRID_CUDA) || defined(GRID_HIP)) && defined(ACCELERATOR_CSHIFT) + +template +T iDivUp(T a, T b) // Round a / b to nearest higher integer value +{ return (a % b != 0) ? (a / b + 1) : (a / b); } + +template +__global__ void populate_Cshift_table(T* vector, T lo, T ro, T e1, T e2, T stride) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx >= e1*e2) return; + + int n, b, o; + + n = idx / e2; + b = idx % e2; + o = n*stride + b; + + vector[2*idx + 0] = lo + o; + vector[2*idx + 1] = ro + o; +} + +#endif + ////////////////////////////////////////////////////// // local to node block strided copies ////////////////////////////////////////////////////// @@ -321,12 +345,20 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs int ent=0; if(cbmask == 0x3 ){ +#if (defined(GRID_CUDA) || defined(GRID_HIP)) && defined(ACCELERATOR_CSHIFT) + ent = e1*e2; + dim3 blockSize(acceleratorThreads()); + dim3 gridSize(iDivUp((unsigned int)ent, blockSize.x)); + populate_Cshift_table<<>>(&Cshift_table[0].first, lo, ro, e1, e2, stride); + accelerator_barrier(); +#else for(int n=0;n(lo+o,ro+o); } } +#endif } else { for(int n=0;n void Copy_plane_permute(Lattice& lhs,const Lattice>>(&Cshift_table[0].first, lo, ro, e1, e2, stride); + accelerator_barrier(); +#else for(int n=0;n(lo+o+b,ro+o+b); }} +#endif } else { for(int n=0;n::type ret; typedef typename vobj::scalar_object scalar_object; - typedef typename vobj::scalar_type scalar_type; + // typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; const int Nsimd = vobj::vector_type::Nsimd(); diff --git a/Grid/lattice/Lattice_arith.h b/Grid/lattice/Lattice_arith.h index b39a475d..aebc093a 100644 --- a/Grid/lattice/Lattice_arith.h +++ b/Grid/lattice/Lattice_arith.h @@ -36,6 +36,7 @@ NAMESPACE_BEGIN(Grid); ////////////////////////////////////////////////////////////////////////////////////////////////////// template inline void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + GRID_TRACE("mult"); ret.Checkerboard() = lhs.Checkerboard(); autoView( ret_v , ret, AcceleratorWrite); autoView( lhs_v , lhs, AcceleratorRead); @@ -53,6 +54,7 @@ void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ template inline void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + GRID_TRACE("mac"); ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); @@ -70,6 +72,7 @@ void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ template inline void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + GRID_TRACE("sub"); ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); @@ -86,6 +89,7 @@ void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ } template inline void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + GRID_TRACE("add"); ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); @@ -106,6 +110,7 @@ void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ////////////////////////////////////////////////////////////////////////////////////////////////////// template inline void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + GRID_TRACE("mult"); ret.Checkerboard() = lhs.Checkerboard(); conformable(lhs,ret); autoView( ret_v , ret, AcceleratorWrite); @@ -119,6 +124,7 @@ void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ template inline void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + GRID_TRACE("mac"); ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,lhs); autoView( ret_v , ret, AcceleratorWrite); @@ -133,6 +139,7 @@ void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ template inline void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + GRID_TRACE("sub"); ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,lhs); autoView( ret_v , ret, AcceleratorWrite); @@ -146,6 +153,7 @@ void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ } template inline void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + GRID_TRACE("add"); ret.Checkerboard() = lhs.Checkerboard(); conformable(lhs,ret); autoView( ret_v , ret, AcceleratorWrite); @@ -163,6 +171,7 @@ void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ////////////////////////////////////////////////////////////////////////////////////////////////////// template inline void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + GRID_TRACE("mult"); ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); autoView( ret_v , ret, AcceleratorWrite); @@ -177,6 +186,7 @@ void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ template inline void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + GRID_TRACE("mac"); ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); autoView( ret_v , ret, AcceleratorWrite); @@ -191,6 +201,7 @@ void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ template inline void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + GRID_TRACE("sub"); ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); autoView( ret_v , ret, AcceleratorWrite); @@ -204,6 +215,7 @@ void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ } template inline void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + GRID_TRACE("add"); ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); autoView( ret_v , ret, AcceleratorWrite); @@ -218,6 +230,7 @@ void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ template inline void axpy(Lattice &ret,sobj a,const Lattice &x,const Lattice &y){ + GRID_TRACE("axpy"); ret.Checkerboard() = x.Checkerboard(); conformable(ret,x); conformable(x,y); @@ -231,6 +244,7 @@ void axpy(Lattice &ret,sobj a,const Lattice &x,const Lattice & } template inline void axpby(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice &y){ + GRID_TRACE("axpby"); ret.Checkerboard() = x.Checkerboard(); conformable(ret,x); conformable(x,y); @@ -246,11 +260,13 @@ void axpby(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice template inline RealD axpy_norm(Lattice &ret,sobj a,const Lattice &x,const Lattice &y) { + GRID_TRACE("axpy_norm"); return axpy_norm_fast(ret,a,x,y); } template inline RealD axpby_norm(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice &y) { + GRID_TRACE("axpby_norm"); return axpby_norm_fast(ret,a,b,x,y); } diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index 34f13fa6..b0b759b5 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -117,6 +117,7 @@ public: //////////////////////////////////////////////////////////////////////////////// template inline Lattice & operator=(const LatticeUnaryExpression &expr) { + GRID_TRACE("ExpressionTemplateEval"); GridBase *egrid(nullptr); GridFromExpression(egrid,expr); assert(egrid!=nullptr); @@ -129,7 +130,7 @@ public: auto exprCopy = expr; ExpressionViewOpen(exprCopy); - auto me = View(AcceleratorWrite); + auto me = View(AcceleratorWriteDiscard); accelerator_for(ss,me.size(),vobj::Nsimd(),{ auto tmp = eval(ss,exprCopy); coalescedWrite(me[ss],tmp); @@ -140,6 +141,7 @@ public: } template inline Lattice & operator=(const LatticeBinaryExpression &expr) { + GRID_TRACE("ExpressionTemplateEval"); GridBase *egrid(nullptr); GridFromExpression(egrid,expr); assert(egrid!=nullptr); @@ -152,7 +154,7 @@ public: auto exprCopy = expr; ExpressionViewOpen(exprCopy); - auto me = View(AcceleratorWrite); + auto me = View(AcceleratorWriteDiscard); accelerator_for(ss,me.size(),vobj::Nsimd(),{ auto tmp = eval(ss,exprCopy); coalescedWrite(me[ss],tmp); @@ -163,6 +165,7 @@ public: } template inline Lattice & operator=(const LatticeTrinaryExpression &expr) { + GRID_TRACE("ExpressionTemplateEval"); GridBase *egrid(nullptr); GridFromExpression(egrid,expr); assert(egrid!=nullptr); @@ -174,7 +177,7 @@ public: this->checkerboard=cb; auto exprCopy = expr; ExpressionViewOpen(exprCopy); - auto me = View(AcceleratorWrite); + auto me = View(AcceleratorWriteDiscard); accelerator_for(ss,me.size(),vobj::Nsimd(),{ auto tmp = eval(ss,exprCopy); coalescedWrite(me[ss],tmp); @@ -245,7 +248,7 @@ public: /////////////////////////////////////////// // user defined constructor /////////////////////////////////////////// - Lattice(GridBase *grid,ViewMode mode=AcceleratorWrite) { + Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) { this->_grid = grid; resize(this->_grid->oSites()); assert((((uint64_t)&this->_odata[0])&0xF) ==0); @@ -288,8 +291,8 @@ public: typename std::enable_if::value,int>::type i=0; conformable(*this,r); this->checkerboard = r.Checkerboard(); - auto me = View(AcceleratorWrite); auto him= r.View(AcceleratorRead); + auto me = View(AcceleratorWriteDiscard); accelerator_for(ss,me.size(),vobj::Nsimd(),{ coalescedWrite(me[ss],him(ss)); }); @@ -303,8 +306,8 @@ public: inline Lattice & operator = (const Lattice & r){ this->checkerboard = r.Checkerboard(); conformable(*this,r); - auto me = View(AcceleratorWrite); auto him= r.View(AcceleratorRead); + auto me = View(AcceleratorWriteDiscard); accelerator_for(ss,me.size(),vobj::Nsimd(),{ coalescedWrite(me[ss],him(ss)); }); diff --git a/Grid/lattice/Lattice_matrix_reduction.h b/Grid/lattice/Lattice_matrix_reduction.h index 7c470fef..abebbfd6 100644 --- a/Grid/lattice/Lattice_matrix_reduction.h +++ b/Grid/lattice/Lattice_matrix_reduction.h @@ -32,7 +32,6 @@ template static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,const Lattice &Y,int Orthog,RealD scale=1.0) { typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; int Nblock = X.Grid()->GlobalDimensions()[Orthog]; @@ -82,7 +81,6 @@ template static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,int Orthog,RealD scale=1.0) { typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; int Nblock = X.Grid()->GlobalDimensions()[Orthog]; @@ -130,7 +128,6 @@ template static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice &lhs,const Lattice &rhs,int Orthog) { typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; GridBase *FullGrid = lhs.Grid(); diff --git a/Grid/lattice/Lattice_peekpoke.h b/Grid/lattice/Lattice_peekpoke.h index 5caab214..b6a36b11 100644 --- a/Grid/lattice/Lattice_peekpoke.h +++ b/Grid/lattice/Lattice_peekpoke.h @@ -96,9 +96,6 @@ void pokeSite(const sobj &s,Lattice &l,const Coordinate &site){ GridBase *grid=l.Grid(); - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - int Nsimd = grid->Nsimd(); assert( l.Checkerboard()== l.Grid()->CheckerBoard(site)); @@ -125,14 +122,17 @@ void pokeSite(const sobj &s,Lattice &l,const Coordinate &site){ ////////////////////////////////////////////////////////// // Peek a scalar object from the SIMD array ////////////////////////////////////////////////////////// +template +typename vobj::scalar_object peekSite(const Lattice &l,const Coordinate &site){ + typename vobj::scalar_object s; + peekSite(s,l,site); + return s; +} template void peekSite(sobj &s,const Lattice &l,const Coordinate &site){ GridBase *grid=l.Grid(); - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - int Nsimd = grid->Nsimd(); assert( l.Checkerboard() == l.Grid()->CheckerBoard(site)); @@ -173,11 +173,11 @@ inline void peekLocalSite(sobj &s,const LatticeView &l,Coordinate &site) idx= grid->iIndex(site); odx= grid->oIndex(site); - scalar_type * vp = (scalar_type *)&l[odx]; + const vector_type *vp = (const vector_type *) &l[odx]; scalar_type * pt = (scalar_type *)&s; for(int w=0;w &l,Coordinate &site) idx= grid->iIndex(site); odx= grid->oIndex(site); - scalar_type * vp = (scalar_type *)&l[odx]; + vector_type * vp = (vector_type *)&l[odx]; scalar_type * pt = (scalar_type *)&s; for(int w=0;w -inline typename vobj::scalar_object sum(const Lattice &arg) +inline typename vobj::scalar_object rankSum(const Lattice &arg) { Integer osites = arg.Grid()->oSites(); #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) - typename vobj::scalar_object ssum; autoView( arg_v, arg, AcceleratorRead); - ssum= sum_gpu(&arg_v[0],osites); + return sum_gpu(&arg_v[0],osites); #else autoView(arg_v, arg, CpuRead); - auto ssum= sum_cpu(&arg_v[0],osites); + return sum_cpu(&arg_v[0],osites); #endif +} + +template +inline typename vobj::scalar_object sum(const Lattice &arg) +{ + auto ssum = rankSum(arg); arg.Grid()->GlobalSum(ssum); return ssum; } template -inline typename vobj::scalar_object sum_large(const Lattice &arg) +inline typename vobj::scalar_object rankSumLarge(const Lattice &arg) { #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) autoView( arg_v, arg, AcceleratorRead); Integer osites = arg.Grid()->oSites(); - auto ssum= sum_gpu_large(&arg_v[0],osites); + return sum_gpu_large(&arg_v[0],osites); #else autoView(arg_v, arg, CpuRead); Integer osites = arg.Grid()->oSites(); - auto ssum= sum_cpu(&arg_v[0],osites); + return sum_cpu(&arg_v[0],osites); #endif +} + +template +inline typename vobj::scalar_object sum_large(const Lattice &arg) +{ + auto ssum = rankSumLarge(arg); arg.Grid()->GlobalSum(ssum); return ssum; } @@ -225,7 +233,6 @@ template inline RealD maxLocalNorm2(const Lattice &arg) template inline ComplexD rankInnerProduct(const Lattice &left,const Lattice &right) { - typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_typeD vector_type; ComplexD nrm; @@ -235,6 +242,7 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & const uint64_t sites = grid->oSites(); // Might make all code paths go this way. +#if 0 typedef decltype(innerProductD(vobj(),vobj())) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; @@ -243,15 +251,31 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & autoView( right_v,right, AcceleratorRead); // This code could read coalesce // GPU - SIMT lane compliance... - accelerator_for( ss, sites, 1,{ - auto x_l = left_v[ss]; - auto y_l = right_v[ss]; - inner_tmp_v[ss]=innerProductD(x_l,y_l); + accelerator_for( ss, sites, nsimd,{ + auto x_l = left_v(ss); + auto y_l = right_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProductD(x_l,y_l)); }); } +#else + typedef decltype(innerProduct(vobj(),vobj())) inner_t; + Vector inner_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; + + { + autoView( left_v , left, AcceleratorRead); + autoView( right_v,right, AcceleratorRead); + // GPU - SIMT lane compliance... + accelerator_for( ss, sites, nsimd,{ + auto x_l = left_v(ss); + auto y_l = right_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l)); + }); + } +#endif // This is in single precision and fails some tests - auto anrm = sum(inner_tmp_v,sites); + auto anrm = sumD(inner_tmp_v,sites); nrm = anrm; return nrm; } @@ -284,8 +308,7 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt conformable(z,x); conformable(x,y); - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_typeD vector_type; + // typedef typename vobj::vector_typeD vector_type; RealD nrm; GridBase *grid = x.Grid(); @@ -297,17 +320,29 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt autoView( x_v, x, AcceleratorRead); autoView( y_v, y, AcceleratorRead); autoView( z_v, z, AcceleratorWrite); - +#if 0 typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; - accelerator_for( ss, sites, 1,{ - auto tmp = a*x_v[ss]+b*y_v[ss]; - inner_tmp_v[ss]=innerProductD(tmp,tmp); - z_v[ss]=tmp; + accelerator_for( ss, sites, nsimd,{ + auto tmp = a*x_v(ss)+b*y_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProductD(tmp,tmp)); + coalescedWrite(z_v[ss],tmp); }); nrm = real(TensorRemove(sum(inner_tmp_v,sites))); +#else + typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; + Vector inner_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; + + accelerator_for( ss, sites, nsimd,{ + auto tmp = a*x_v(ss)+b*y_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp)); + coalescedWrite(z_v[ss],tmp); + }); + nrm = real(TensorRemove(sumD(inner_tmp_v,sites))); +#endif grid->GlobalSum(nrm); return nrm; } @@ -317,7 +352,6 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice &left,const Latti { conformable(left,right); - typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_typeD vector_type; Vector tmp(2); @@ -461,6 +495,14 @@ template inline void sliceSum(const Lattice &Data,std::vector< int words = fd*sizeof(sobj)/sizeof(scalar_type); grid->GlobalSumVector(ptr, words); } +template inline +std::vector +sliceSum(const Lattice &Data,int orthogdim) +{ + std::vector result; + sliceSum(Data,result,orthogdim); + return result; +} template static void sliceInnerProductVector( std::vector & result, const Lattice &lhs,const Lattice &rhs,int orthogdim) @@ -565,7 +607,8 @@ static void sliceNorm (std::vector &sn,const Lattice &rhs,int Ortho template static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice &X,const Lattice &Y, int orthogdim,RealD scale=1.0) -{ +{ + // perhaps easier to just promote A to a field and use regular madd typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; @@ -596,8 +639,7 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice for(int l=0;liCoorFromIindex(icoor,l); int ldx =r+icoor[orthogdim]*rd; - scalar_type *as =(scalar_type *)&av; - as[l] = scalar_type(a[ldx])*zscale; + av.putlane(scalar_type(a[ldx])*zscale,l); } tensor_reduced at; at=av; @@ -637,7 +679,6 @@ template static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,const Lattice &Y,int Orthog,RealD scale=1.0) { typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; int Nblock = X.Grid()->GlobalDimensions()[Orthog]; @@ -691,7 +732,6 @@ template static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,int Orthog,RealD scale=1.0) { typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; int Nblock = X.Grid()->GlobalDimensions()[Orthog]; @@ -745,7 +785,6 @@ template static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice &lhs,const Lattice &rhs,int Orthog) { typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; GridBase *FullGrid = lhs.Grid(); diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h index bad86d2a..ecf90d19 100644 --- a/Grid/lattice/Lattice_reduction_gpu.h +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -211,13 +211,25 @@ inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osi assert(ok); Integer smemSize = numThreads * sizeof(sobj); - + // Move out of UVM + // Turns out I had messed up the synchronise after move to compute stream + // as running this on the default stream fools the synchronise +#undef UVM_BLOCK_BUFFER +#ifndef UVM_BLOCK_BUFFER + commVector buffer(numBlocks); + sobj *buffer_v = &buffer[0]; + sobj result; + reduceKernel<<< numBlocks, numThreads, smemSize, computeStream >>>(lat, buffer_v, size); + accelerator_barrier(); + acceleratorCopyFromDevice(buffer_v,&result,sizeof(result)); +#else Vector buffer(numBlocks); sobj *buffer_v = &buffer[0]; - - reduceKernel<<< numBlocks, numThreads, smemSize >>>(lat, buffer_v, size); + sobj result; + reduceKernel<<< numBlocks, numThreads, smemSize, computeStream >>>(lat, buffer_v, size); accelerator_barrier(); - auto result = buffer_v[0]; + result = *buffer_v; +#endif return result; } @@ -250,8 +262,6 @@ inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osi template inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) { - typedef typename vobj::vector_type vector; - typedef typename vobj::scalar_typeD scalarD; typedef typename vobj::scalar_objectD sobj; sobj ret; diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index e5e63716..b7ef0e82 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -424,9 +424,33 @@ public: // MT implementation does not implement fast discard even though // in principle this is possible //////////////////////////////////////////////// +#if 1 + thread_for( lidx, _grid->lSites(), { + int gidx; + int o_idx; + int i_idx; + int rank; + Coordinate pcoor; + Coordinate lcoor; + Coordinate gcoor; + _grid->LocalIndexToLocalCoor(lidx,lcoor); + pcoor=_grid->ThisProcessorCoor(); + _grid->ProcessorCoorLocalCoorToGlobalCoor(pcoor,lcoor,gcoor); + _grid->GlobalCoorToGlobalIndex(gcoor,gidx); + + _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + + assert(rank == _grid->ThisRank() ); + + int l_idx=generator_idx(o_idx,i_idx); + _generators[l_idx] = master_engine; + Skip(_generators[l_idx],gidx); // Skip to next RNG sequence + }); +#else // Everybody loops over global volume. thread_for( gidx, _grid->_gsites, { + // Where is it? int rank; int o_idx; @@ -443,6 +467,7 @@ public: Skip(_generators[l_idx],gidx); // Skip to next RNG sequence } }); +#endif #else //////////////////////////////////////////////////////////////// // Machine and thread decomposition dependent seeding is efficient diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 3895ad46..04540d88 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -194,11 +194,11 @@ accelerator_inline void convertType(vComplexD2 & out, const ComplexD & in) { #endif accelerator_inline void convertType(vComplexF & out, const vComplexD2 & in) { - out.v = Optimization::PrecisionChange::DtoS(in._internal[0].v,in._internal[1].v); + precisionChange(out,in); } accelerator_inline void convertType(vComplexD2 & out, const vComplexF & in) { - Optimization::PrecisionChange::StoD(in.v,out._internal[0].v,out._internal[1].v); + precisionChange(out,in); } template @@ -288,7 +288,36 @@ inline void blockProject(Lattice > &coarseData, blockZAXPY(fineDataRed,ip,Basis[v],fineDataRed); } } +template +inline void batchBlockProject(std::vector>> &coarseData, + const std::vector> &fineData, + const VLattice &Basis) +{ + int NBatch = fineData.size(); + assert(coarseData.size() == NBatch); + GridBase * fine = fineData[0].Grid(); + GridBase * coarse= coarseData[0].Grid(); + + Lattice> ip(coarse); + std::vector> fineDataCopy = fineData; + + autoView(ip_, ip, AcceleratorWrite); + for(int v=0;v + accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), { + convertType(coarseData_[sc](v),ip_[sc]); + }); + + // improve numerical stability of projection + // |fine> = |fine> - |basis> + ip=-ip; + blockZAXPY(fineDataCopy[k],ip,Basis[v],fineDataCopy[k]); + } + } +} template inline void blockZAXPY(Lattice &fineZ, @@ -590,6 +619,26 @@ inline void blockPromote(const Lattice > &coarseData, } #endif +template +inline void batchBlockPromote(const std::vector>> &coarseData, + std::vector> &fineData, + const VLattice &Basis) +{ + int NBatch = coarseData.size(); + assert(fineData.size() == NBatch); + + GridBase * fine = fineData[0].Grid(); + GridBase * coarse = coarseData[0].Grid(); + for (int k=0; k> ip = PeekIndex<0>(coarseData[k],i); + blockZAXPY(fineData[k],ip,Basis[i],fineData[k]); + } + } +} + // Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars. // Simd layouts need not match since we use peek/poke Local template @@ -681,7 +730,7 @@ void localCopyRegion(const Lattice &From,Lattice & To,Coordinate Fro scalar_type * fp = (scalar_type *)&f_v[odx_f]; scalar_type * tp = (scalar_type *)&t_v[odx_t]; for(int w=0;w &lowDim,const Lattice & higherDim,int template -void Replicate(Lattice &coarse,Lattice & fine) +void Replicate(const Lattice &coarse,Lattice & fine) { typedef typename vobj::scalar_object sobj; @@ -1085,9 +1134,27 @@ vectorizeFromRevLexOrdArray( std::vector &in, Lattice &out) }); } -//Convert a Lattice from one precision to another +//Very fast precision change. Requires in/out objects to reside on same Grid (e.g. by using double2 for the double-precision field) template -void precisionChange(Lattice &out, const Lattice &in) +void precisionChangeFast(Lattice &out, const Lattice &in) +{ + typedef typename VobjOut::vector_type Vout; + typedef typename VobjIn::vector_type Vin; + const int N = sizeof(VobjOut)/sizeof(Vout); + conformable(out.Grid(),in.Grid()); + out.Checkerboard() = in.Checkerboard(); + int nsimd = out.Grid()->Nsimd(); + autoView( out_v , out, AcceleratorWrite); + autoView( in_v , in, AcceleratorRead); + accelerator_for(idx,out.Grid()->oSites(),1,{ + Vout *vout = (Vout *)&out_v[idx]; + Vin *vin = (Vin *)&in_v[idx]; + precisionChange(vout,vin,N); + }); +} +//Convert a Lattice from one precision to another (original, slow implementation) +template +void precisionChangeOrig(Lattice &out, const Lattice &in) { assert(out.Grid()->Nd() == in.Grid()->Nd()); for(int d=0;dNd();d++){ @@ -1102,7 +1169,7 @@ void precisionChange(Lattice &out, const Lattice &in) int ndim = out.Grid()->Nd(); int out_nsimd = out_grid->Nsimd(); - + int in_nsimd = in_grid->Nsimd(); std::vector out_icoor(out_nsimd); for(int lane=0; lane < out_nsimd; lane++){ @@ -1133,6 +1200,128 @@ void precisionChange(Lattice &out, const Lattice &in) }); } +//The workspace for a precision change operation allowing for the reuse of the mapping to save time on subsequent calls +class precisionChangeWorkspace{ + std::pair* fmap_device; //device pointer + //maintain grids for checking + GridBase* _out_grid; + GridBase* _in_grid; +public: + precisionChangeWorkspace(GridBase *out_grid, GridBase *in_grid): _out_grid(out_grid), _in_grid(in_grid){ + //Build a map between the sites and lanes of the output field and the input field as we cannot use the Grids on the device + assert(out_grid->Nd() == in_grid->Nd()); + for(int d=0;dNd();d++){ + assert(out_grid->FullDimensions()[d] == in_grid->FullDimensions()[d]); + } + int Nsimd_out = out_grid->Nsimd(); + + std::vector out_icorrs(out_grid->Nsimd()); //reuse these + for(int lane=0; lane < out_grid->Nsimd(); lane++) + out_grid->iCoorFromIindex(out_icorrs[lane], lane); + + std::vector > fmap_host(out_grid->lSites()); //lsites = osites*Nsimd + thread_for(out_oidx,out_grid->oSites(),{ + Coordinate out_ocorr; + out_grid->oCoorFromOindex(out_ocorr, out_oidx); + + Coordinate lcorr; //the local coordinate (common to both in and out as full coordinate) + for(int out_lane=0; out_lane < Nsimd_out; out_lane++){ + out_grid->InOutCoorToLocalCoor(out_ocorr, out_icorrs[out_lane], lcorr); + + //int in_oidx = in_grid->oIndex(lcorr), in_lane = in_grid->iIndex(lcorr); + //Note oIndex and OcorrFromOindex (and same for iIndex) are not inverse for checkerboarded lattice, the former coordinates being defined on the full lattice and the latter on the reduced lattice + //Until this is fixed we need to circumvent the problem locally. Here I will use the coordinates defined on the reduced lattice for simplicity + int in_oidx = 0, in_lane = 0; + for(int d=0;d_ndimension;d++){ + in_oidx += in_grid->_ostride[d] * ( lcorr[d] % in_grid->_rdimensions[d] ); + in_lane += in_grid->_istride[d] * ( lcorr[d] / in_grid->_rdimensions[d] ); + } + fmap_host[out_lane + Nsimd_out*out_oidx] = std::pair( in_oidx, in_lane ); + } + }); + + //Copy the map to the device (if we had a way to tell if an accelerator is in use we could avoid this copy for CPU-only machines) + size_t fmap_bytes = out_grid->lSites() * sizeof(std::pair); + fmap_device = (std::pair*)acceleratorAllocDevice(fmap_bytes); + acceleratorCopyToDevice(fmap_host.data(), fmap_device, fmap_bytes); + } + + //Prevent moving or copying + precisionChangeWorkspace(const precisionChangeWorkspace &r) = delete; + precisionChangeWorkspace(precisionChangeWorkspace &&r) = delete; + precisionChangeWorkspace &operator=(const precisionChangeWorkspace &r) = delete; + precisionChangeWorkspace &operator=(precisionChangeWorkspace &&r) = delete; + + std::pair const* getMap() const{ return fmap_device; } + + void checkGrids(GridBase* out, GridBase* in) const{ + conformable(out, _out_grid); + conformable(in, _in_grid); + } + + ~precisionChangeWorkspace(){ + acceleratorFreeDevice(fmap_device); + } +}; + + +//We would like to use precisionChangeFast when possible. However usage of this requires the Grids to be the same (runtime check) +//*and* the precisionChange(VobjOut::vector_type, VobjIn, int) function to be defined for the types; this requires an extra compile-time check which we do using some SFINAE trickery +template +auto _precisionChangeFastWrap(Lattice &out, const Lattice &in, int dummy)->decltype( precisionChange( ((typename VobjOut::vector_type*)0), ((typename VobjIn::vector_type*)0), 1), int()){ + if(out.Grid() == in.Grid()){ + precisionChangeFast(out,in); + return 1; + }else{ + return 0; + } +} +template +int _precisionChangeFastWrap(Lattice &out, const Lattice &in, long dummy){ //note long here is intentional; it means the above is preferred if available + return 0; +} + + +//Convert a lattice of one precision to another. Much faster than original implementation but requires a pregenerated workspace +//which contains the mapping data. +template +void precisionChange(Lattice &out, const Lattice &in, const precisionChangeWorkspace &workspace){ + if(_precisionChangeFastWrap(out,in,0)) return; + + static_assert( std::is_same::value == 1, "precisionChange: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same + + out.Checkerboard() = in.Checkerboard(); + constexpr int Nsimd_out = VobjOut::Nsimd(); + + workspace.checkGrids(out.Grid(),in.Grid()); + std::pair const* fmap_device = workspace.getMap(); + + //Do the copy/precision change + autoView( out_v , out, AcceleratorWrite); + autoView( in_v , in, AcceleratorRead); + + accelerator_for(out_oidx, out.Grid()->oSites(), 1,{ + std::pair const* fmap_osite = fmap_device + out_oidx*Nsimd_out; + for(int out_lane=0; out_lane < Nsimd_out; out_lane++){ + int in_oidx = fmap_osite[out_lane].first; + int in_lane = fmap_osite[out_lane].second; + copyLane(out_v[out_oidx], out_lane, in_v[in_oidx], in_lane); + } + }); +} + +//Convert a Lattice from one precision to another. Much faster than original implementation but slower than precisionChangeFast +//or precisionChange called with pregenerated workspace, as it needs to internally generate the workspace on the host and copy to device +template +void precisionChange(Lattice &out, const Lattice &in){ + if(_precisionChangeFastWrap(out,in,0)) return; + precisionChangeWorkspace workspace(out.Grid(), in.Grid()); + precisionChange(out, in, workspace); +} + + + + //////////////////////////////////////////////////////////////////////////////// // Communicate between grids //////////////////////////////////////////////////////////////////////////////// diff --git a/Grid/parallelIO/NerscIO.h b/Grid/parallelIO/NerscIO.h index b5a7a8df..0f4a64e9 100644 --- a/Grid/parallelIO/NerscIO.h +++ b/Grid/parallelIO/NerscIO.h @@ -42,9 +42,11 @@ using namespace Grid; //////////////////////////////////////////////////////////////////////////////// class NerscIO : public BinaryIO { public: - typedef Lattice GaugeField; + // Enable/disable exiting if the plaquette in the header does not match the value computed (default true) + static bool & exitOnReadPlaquetteMismatch(){ static bool v=true; return v; } + static inline void truncate(std::string file){ std::ofstream fout(file,std::ios::out); } @@ -203,7 +205,7 @@ public: std::cerr << " nersc_csum " < struct isCoarsened { template using IfCoarsened = Invoke::value,int> > ; template using IfNotCoarsened = Invoke::value,int> > ; +const int GparityFlavourTensorIndex = 3; //TensorLevel counts from the bottom! + // ChrisK very keen to add extra space for Gparity doubling. // // Also add domain wall index, in a way where Wilson operator @@ -111,8 +114,10 @@ template using iHalfSpinColourVector = iScalar using iSpinColourSpinColourMatrix = iScalar, Ns>, Nc>, Ns> >; +template using iGparityFlavourVector = iVector >, Ngp>; template using iGparitySpinColourVector = iVector, Ns>, Ngp >; template using iGparityHalfSpinColourVector = iVector, Nhs>, Ngp >; +template using iGparityFlavourMatrix = iMatrix >, Ngp>; // Spin matrix typedef iSpinMatrix SpinMatrix; @@ -122,6 +127,7 @@ typedef iSpinMatrix SpinMatrixD; typedef iSpinMatrix vSpinMatrix; typedef iSpinMatrix vSpinMatrixF; typedef iSpinMatrix vSpinMatrixD; +typedef iSpinMatrix vSpinMatrixD2; // Colour Matrix typedef iColourMatrix ColourMatrix; @@ -131,6 +137,7 @@ typedef iColourMatrix ColourMatrixD; typedef iColourMatrix vColourMatrix; typedef iColourMatrix vColourMatrixF; typedef iColourMatrix vColourMatrixD; +typedef iColourMatrix vColourMatrixD2; // SpinColour matrix typedef iSpinColourMatrix SpinColourMatrix; @@ -140,6 +147,7 @@ typedef iSpinColourMatrix SpinColourMatrixD; typedef iSpinColourMatrix vSpinColourMatrix; typedef iSpinColourMatrix vSpinColourMatrixF; typedef iSpinColourMatrix vSpinColourMatrixD; +typedef iSpinColourMatrix vSpinColourMatrixD2; // SpinColourSpinColour matrix typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrix; @@ -149,6 +157,7 @@ typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrixD; typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrix; typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixF; typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixD; +typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixD2; // SpinColourSpinColour matrix typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrix; @@ -158,15 +167,17 @@ typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrixD; typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrix; typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixF; typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixD; +typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixD2; // LorentzColour typedef iLorentzColourMatrix LorentzColourMatrix; typedef iLorentzColourMatrix LorentzColourMatrixF; typedef iLorentzColourMatrix LorentzColourMatrixD; -typedef iLorentzColourMatrix vLorentzColourMatrix; -typedef iLorentzColourMatrix vLorentzColourMatrixF; -typedef iLorentzColourMatrix vLorentzColourMatrixD; +typedef iLorentzColourMatrix vLorentzColourMatrix; +typedef iLorentzColourMatrix vLorentzColourMatrixF; +typedef iLorentzColourMatrix vLorentzColourMatrixD; +typedef iLorentzColourMatrix vLorentzColourMatrixD2; // LorentzComplex typedef iLorentzComplex LorentzComplex; @@ -182,9 +193,21 @@ typedef iDoubleStoredColourMatrix DoubleStoredColourMatrix; typedef iDoubleStoredColourMatrix DoubleStoredColourMatrixF; typedef iDoubleStoredColourMatrix DoubleStoredColourMatrixD; -typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrix; -typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrixF; -typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrixD; +typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrix; +typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrixF; +typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrixD; +typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrixD2; + +//G-parity flavour matrix +typedef iGparityFlavourMatrix GparityFlavourMatrix; +typedef iGparityFlavourMatrix GparityFlavourMatrixF; +typedef iGparityFlavourMatrix GparityFlavourMatrixD; + +typedef iGparityFlavourMatrix vGparityFlavourMatrix; +typedef iGparityFlavourMatrix vGparityFlavourMatrixF; +typedef iGparityFlavourMatrix vGparityFlavourMatrixD; +typedef iGparityFlavourMatrix vGparityFlavourMatrixD2; + // Spin vector typedef iSpinVector SpinVector; @@ -194,6 +217,7 @@ typedef iSpinVector SpinVectorD; typedef iSpinVector vSpinVector; typedef iSpinVector vSpinVectorF; typedef iSpinVector vSpinVectorD; +typedef iSpinVector vSpinVectorD2; // Colour vector typedef iColourVector ColourVector; @@ -203,6 +227,7 @@ typedef iColourVector ColourVectorD; typedef iColourVector vColourVector; typedef iColourVector vColourVectorF; typedef iColourVector vColourVectorD; +typedef iColourVector vColourVectorD2; // SpinColourVector typedef iSpinColourVector SpinColourVector; @@ -212,6 +237,7 @@ typedef iSpinColourVector SpinColourVectorD; typedef iSpinColourVector vSpinColourVector; typedef iSpinColourVector vSpinColourVectorF; typedef iSpinColourVector vSpinColourVectorD; +typedef iSpinColourVector vSpinColourVectorD2; // HalfSpin vector typedef iHalfSpinVector HalfSpinVector; @@ -221,15 +247,27 @@ typedef iHalfSpinVector HalfSpinVectorD; typedef iHalfSpinVector vHalfSpinVector; typedef iHalfSpinVector vHalfSpinVectorF; typedef iHalfSpinVector vHalfSpinVectorD; +typedef iHalfSpinVector vHalfSpinVectorD2; // HalfSpinColour vector typedef iHalfSpinColourVector HalfSpinColourVector; typedef iHalfSpinColourVector HalfSpinColourVectorF; typedef iHalfSpinColourVector HalfSpinColourVectorD; -typedef iHalfSpinColourVector vHalfSpinColourVector; -typedef iHalfSpinColourVector vHalfSpinColourVectorF; -typedef iHalfSpinColourVector vHalfSpinColourVectorD; +typedef iHalfSpinColourVector vHalfSpinColourVector; +typedef iHalfSpinColourVector vHalfSpinColourVectorF; +typedef iHalfSpinColourVector vHalfSpinColourVectorD; +typedef iHalfSpinColourVector vHalfSpinColourVectorD2; + +//G-parity flavour vector +typedef iGparityFlavourVector GparityFlavourVector; +typedef iGparityFlavourVector GparityFlavourVectorF; +typedef iGparityFlavourVector GparityFlavourVectorD; + +typedef iGparityFlavourVector vGparityFlavourVector; +typedef iGparityFlavourVector vGparityFlavourVectorF; +typedef iGparityFlavourVector vGparityFlavourVectorD; +typedef iGparityFlavourVector vGparityFlavourVectorD2; // singlets typedef iSinglet TComplex; // FIXME This is painful. Tensor singlet complex type. @@ -239,6 +277,7 @@ typedef iSinglet TComplexD; // FIXME This is painful. Tenso typedef iSinglet vTComplex ; // what if we don't know the tensor structure typedef iSinglet vTComplexF; // what if we don't know the tensor structure typedef iSinglet vTComplexD; // what if we don't know the tensor structure +typedef iSinglet vTComplexD2; // what if we don't know the tensor structure typedef iSinglet TReal; // Shouldn't need these; can I make it work without? typedef iSinglet TRealF; // Shouldn't need these; can I make it work without? @@ -256,51 +295,62 @@ typedef iSinglet TInteger; typedef Lattice LatticeColourMatrix; typedef Lattice LatticeColourMatrixF; typedef Lattice LatticeColourMatrixD; +typedef Lattice LatticeColourMatrixD2; typedef Lattice LatticeSpinMatrix; typedef Lattice LatticeSpinMatrixF; typedef Lattice LatticeSpinMatrixD; +typedef Lattice LatticeSpinMatrixD2; typedef Lattice LatticeSpinColourMatrix; typedef Lattice LatticeSpinColourMatrixF; typedef Lattice LatticeSpinColourMatrixD; +typedef Lattice LatticeSpinColourMatrixD2; typedef Lattice LatticeSpinColourSpinColourMatrix; typedef Lattice LatticeSpinColourSpinColourMatrixF; typedef Lattice LatticeSpinColourSpinColourMatrixD; +typedef Lattice LatticeSpinColourSpinColourMatrixD2; -typedef Lattice LatticeLorentzColourMatrix; -typedef Lattice LatticeLorentzColourMatrixF; -typedef Lattice LatticeLorentzColourMatrixD; +typedef Lattice LatticeLorentzColourMatrix; +typedef Lattice LatticeLorentzColourMatrixF; +typedef Lattice LatticeLorentzColourMatrixD; +typedef Lattice LatticeLorentzColourMatrixD2; typedef Lattice LatticeLorentzComplex; typedef Lattice LatticeLorentzComplexF; typedef Lattice LatticeLorentzComplexD; // DoubleStored gauge field -typedef Lattice LatticeDoubleStoredColourMatrix; -typedef Lattice LatticeDoubleStoredColourMatrixF; -typedef Lattice LatticeDoubleStoredColourMatrixD; +typedef Lattice LatticeDoubleStoredColourMatrix; +typedef Lattice LatticeDoubleStoredColourMatrixF; +typedef Lattice LatticeDoubleStoredColourMatrixD; +typedef Lattice LatticeDoubleStoredColourMatrixD2; typedef Lattice LatticeSpinVector; typedef Lattice LatticeSpinVectorF; typedef Lattice LatticeSpinVectorD; +typedef Lattice LatticeSpinVectorD2; typedef Lattice LatticeColourVector; typedef Lattice LatticeColourVectorF; typedef Lattice LatticeColourVectorD; +typedef Lattice LatticeColourVectorD2; typedef Lattice LatticeSpinColourVector; typedef Lattice LatticeSpinColourVectorF; typedef Lattice LatticeSpinColourVectorD; +typedef Lattice LatticeSpinColourVectorD2; typedef Lattice LatticeHalfSpinVector; typedef Lattice LatticeHalfSpinVectorF; typedef Lattice LatticeHalfSpinVectorD; +typedef Lattice LatticeHalfSpinVectorD2; -typedef Lattice LatticeHalfSpinColourVector; -typedef Lattice LatticeHalfSpinColourVectorF; -typedef Lattice LatticeHalfSpinColourVectorD; +typedef Lattice LatticeHalfSpinColourVector; +typedef Lattice LatticeHalfSpinColourVectorF; +typedef Lattice LatticeHalfSpinColourVectorD; +typedef Lattice LatticeHalfSpinColourVectorD2; typedef Lattice LatticeReal; typedef Lattice LatticeRealF; @@ -309,6 +359,7 @@ typedef Lattice LatticeRealD; typedef Lattice LatticeComplex; typedef Lattice LatticeComplexF; typedef Lattice LatticeComplexD; +typedef Lattice LatticeComplexD2; typedef Lattice LatticeInteger; // Predicates for "where" @@ -316,37 +367,42 @@ typedef Lattice LatticeInteger; // Predicates for "where" /////////////////////////////////////////// // Physical names for things /////////////////////////////////////////// -typedef LatticeHalfSpinColourVector LatticeHalfFermion; -typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF; -typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD; +typedef LatticeHalfSpinColourVector LatticeHalfFermion; +typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF; +typedef LatticeHalfSpinColourVectorD LatticeHalfFermionD; +typedef LatticeHalfSpinColourVectorD2 LatticeHalfFermionD2; typedef LatticeSpinColourVector LatticeFermion; typedef LatticeSpinColourVectorF LatticeFermionF; typedef LatticeSpinColourVectorD LatticeFermionD; +typedef LatticeSpinColourVectorD2 LatticeFermionD2; typedef LatticeSpinColourMatrix LatticePropagator; typedef LatticeSpinColourMatrixF LatticePropagatorF; typedef LatticeSpinColourMatrixD LatticePropagatorD; +typedef LatticeSpinColourMatrixD2 LatticePropagatorD2; typedef LatticeLorentzColourMatrix LatticeGaugeField; typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF; typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD; +typedef LatticeLorentzColourMatrixD2 LatticeGaugeFieldD2; typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField; typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF; typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD; +typedef LatticeDoubleStoredColourMatrixD2 LatticeDoubledGaugeFieldD2; template using LorentzScalar = Lattice >; -// Uhgg... typing this hurt ;) -// (my keyboard got burning hot when I typed this, must be the anti-Fermion) typedef Lattice LatticeStaggeredFermion; typedef Lattice LatticeStaggeredFermionF; typedef Lattice LatticeStaggeredFermionD; +typedef Lattice LatticeStaggeredFermionD2; typedef Lattice LatticeStaggeredPropagator; typedef Lattice LatticeStaggeredPropagatorF; typedef Lattice LatticeStaggeredPropagatorD; +typedef Lattice LatticeStaggeredPropagatorD2; ////////////////////////////////////////////////////////////////////////////// // Peek and Poke named after physics attributes diff --git a/Grid/qcd/action/ActionBase.h b/Grid/qcd/action/ActionBase.h index 17980ee0..1e8d6d7a 100644 --- a/Grid/qcd/action/ActionBase.h +++ b/Grid/qcd/action/ActionBase.h @@ -40,9 +40,47 @@ class Action public: bool is_smeared = false; + RealD deriv_norm_sum; + RealD deriv_max_sum; + RealD Fdt_norm_sum; + RealD Fdt_max_sum; + int deriv_num; + RealD deriv_us; + RealD S_us; + RealD refresh_us; + void reset_timer(void) { + deriv_us = S_us = refresh_us = 0.0; + deriv_norm_sum = deriv_max_sum=0.0; + Fdt_max_sum = Fdt_norm_sum = 0.0; + deriv_num=0; + } + void deriv_log(RealD nrm, RealD max,RealD Fdt_nrm,RealD Fdt_max) { + if ( max > deriv_max_sum ) { + deriv_max_sum=max; + } + deriv_norm_sum+=nrm; + if ( Fdt_max > Fdt_max_sum ) { + Fdt_max_sum=Fdt_max; + } + Fdt_norm_sum+=Fdt_nrm; deriv_num++; + } + RealD deriv_max_average(void) { return deriv_max_sum; }; + RealD deriv_norm_average(void) { return deriv_norm_sum/deriv_num; }; + RealD Fdt_max_average(void) { return Fdt_max_sum; }; + RealD Fdt_norm_average(void) { return Fdt_norm_sum/deriv_num; }; + RealD deriv_timer(void) { return deriv_us; }; + RealD S_timer(void) { return S_us; }; + RealD refresh_timer(void) { return refresh_us; }; + void deriv_timer_start(void) { deriv_us-=usecond(); } + void deriv_timer_stop(void) { deriv_us+=usecond(); } + void refresh_timer_start(void) { refresh_us-=usecond(); } + void refresh_timer_stop(void) { refresh_us+=usecond(); } + void S_timer_start(void) { S_us-=usecond(); } + void S_timer_stop(void) { S_us+=usecond(); } // Heatbath? virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions virtual RealD S(const GaugeField& U) = 0; // evaluate the action + virtual RealD Sinitial(const GaugeField& U) { return this->S(U); } ; // if the refresh computes the action, can cache it. Alternately refreshAndAction() ? virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative virtual std::string action_name() = 0; // return the action name virtual std::string LogParameters() = 0; // prints action parameters diff --git a/Grid/qcd/action/ActionCore.h b/Grid/qcd/action/ActionCore.h index 6544318d..eb77236a 100644 --- a/Grid/qcd/action/ActionCore.h +++ b/Grid/qcd/action/ActionCore.h @@ -37,6 +37,10 @@ NAMESPACE_CHECK(ActionSet); #include NAMESPACE_CHECK(ActionParams); +#include +#include +#include + //////////////////////////////////////////// // Gauge Actions //////////////////////////////////////////// diff --git a/Grid/qcd/action/ActionParams.h b/Grid/qcd/action/ActionParams.h index 0e6a11c6..122dfb9c 100644 --- a/Grid/qcd/action/ActionParams.h +++ b/Grid/qcd/action/ActionParams.h @@ -34,27 +34,45 @@ directory NAMESPACE_BEGIN(Grid); -// These can move into a params header and be given MacroMagic serialisation + struct GparityWilsonImplParams { Coordinate twists; - GparityWilsonImplParams() : twists(Nd, 0) {}; + //mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs + Coordinate dirichlet; // Blocksize of dirichlet BCs + int partialDirichlet; + GparityWilsonImplParams() : twists(Nd, 0) { + dirichlet.resize(0); + partialDirichlet=0; + }; }; struct WilsonImplParams { bool overlapCommsCompute; + Coordinate dirichlet; // Blocksize of dirichlet BCs + int partialDirichlet; AcceleratorVector twist_n_2pi_L; AcceleratorVector boundary_phases; WilsonImplParams() { + dirichlet.resize(0); + partialDirichlet=0; boundary_phases.resize(Nd, 1.0); twist_n_2pi_L.resize(Nd, 0.0); }; WilsonImplParams(const AcceleratorVector phi) : boundary_phases(phi), overlapCommsCompute(false) { twist_n_2pi_L.resize(Nd, 0.0); + partialDirichlet=0; + dirichlet.resize(0); } }; struct StaggeredImplParams { - StaggeredImplParams() {}; + Coordinate dirichlet; // Blocksize of dirichlet BCs + int partialDirichlet; + StaggeredImplParams() + { + partialDirichlet=0; + dirichlet.resize(0); + }; }; struct OneFlavourRationalParams : Serializable { @@ -63,9 +81,11 @@ struct StaggeredImplParams { RealD, hi, int, MaxIter, RealD, tolerance, + RealD, mdtolerance, int, degree, int, precision, - int, BoundsCheckFreq); + int, BoundsCheckFreq, + RealD, BoundsCheckTol); // MaxIter and tolerance, vectors?? @@ -76,16 +96,62 @@ struct StaggeredImplParams { RealD tol = 1.0e-8, int _degree = 10, int _precision = 64, - int _BoundsCheckFreq=20) + int _BoundsCheckFreq=20, + RealD mdtol = 1.0e-6, + double _BoundsCheckTol=1e-6) : lo(_lo), hi(_hi), MaxIter(_maxit), tolerance(tol), + mdtolerance(mdtol), degree(_degree), precision(_precision), - BoundsCheckFreq(_BoundsCheckFreq){}; + BoundsCheckFreq(_BoundsCheckFreq), + BoundsCheckTol(_BoundsCheckTol){}; }; + /*Action parameters for the generalized rational action + The approximation is for (M^dag M)^{1/inv_pow} + where inv_pow is the denominator of the fractional power. + Default inv_pow=2 for square root, making this equivalent to + the OneFlavourRational action + */ + struct RationalActionParams : Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(RationalActionParams, + int, inv_pow, + RealD, lo, //low eigenvalue bound of rational approx + RealD, hi, //high eigenvalue bound of rational approx + int, MaxIter, //maximum iterations in msCG + RealD, action_tolerance, //msCG tolerance in action evaluation + int, action_degree, //rational approx tolerance in action evaluation + RealD, md_tolerance, //msCG tolerance in MD integration + int, md_degree, //rational approx tolerance in MD integration + int, precision, //precision of floating point arithmetic + int, BoundsCheckFreq); //frequency the approximation is tested (with Metropolis degree/tolerance); 0 disables the check + // constructor + RationalActionParams(int _inv_pow = 2, + RealD _lo = 0.0, + RealD _hi = 1.0, + int _maxit = 1000, + RealD _action_tolerance = 1.0e-8, + int _action_degree = 10, + RealD _md_tolerance = 1.0e-8, + int _md_degree = 10, + int _precision = 64, + int _BoundsCheckFreq=20) + : inv_pow(_inv_pow), + lo(_lo), + hi(_hi), + MaxIter(_maxit), + action_tolerance(_action_tolerance), + action_degree(_action_degree), + md_tolerance(_md_tolerance), + md_degree(_md_degree), + precision(_precision), + BoundsCheckFreq(_BoundsCheckFreq){}; + }; + + NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.h b/Grid/qcd/action/fermion/CayleyFermion5D.h index 3e06aa26..cf39ec99 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.h +++ b/Grid/qcd/action/fermion/CayleyFermion5D.h @@ -71,6 +71,7 @@ public: RealD Mass(void) { return (mass_plus + mass_minus) / 2.0; }; RealD MassPlus(void) { return mass_plus; }; RealD MassMinus(void) { return mass_minus; }; + void SetMass(RealD _mass) { mass_plus=mass_minus=_mass; SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs @@ -182,16 +183,6 @@ public: GridRedBlackCartesian &FourDimRedBlackGrid, RealD _mass,RealD _M5,const ImplParams &p= ImplParams()); - void CayleyReport(void); - void CayleyZeroCounters(void); - - double M5Dflops; - double M5Dcalls; - double M5Dtime; - - double MooeeInvFlops; - double MooeeInvCalls; - double MooeeInvTime; protected: virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index cd469ea7..d94f31d4 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -140,6 +140,7 @@ public: return NMAX; } + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-12,R);} static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-12,R);} static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} diff --git a/Grid/qcd/action/fermion/DWFSlow.h b/Grid/qcd/action/fermion/DWFSlow.h new file mode 100644 index 00000000..61298504 --- /dev/null +++ b/Grid/qcd/action/fermion/DWFSlow.h @@ -0,0 +1,291 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DWFSlow.h + +Copyright (C) 2022 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +template +class DWFSlowFermion : public FermionOperator +{ +public: + INHERIT_IMPL_TYPES(Impl); + + /////////////////////////////////////////////////////////////// + // Implement the abstract base + /////////////////////////////////////////////////////////////// + GridBase *GaugeGrid(void) { return _grid4; } + GridBase *GaugeRedBlackGrid(void) { return _cbgrid4; } + GridBase *FermionGrid(void) { return _grid; } + GridBase *FermionRedBlackGrid(void) { return _cbgrid; } + + FermionField _tmp; + FermionField &tmp(void) { return _tmp; } + + ////////////////////////////////////////////////////////////////// + // override multiply; cut number routines if pass dagger argument + // and also make interface more uniformly consistent + ////////////////////////////////////////////////////////////////// + virtual void M(const FermionField &in, FermionField &out) + { + FermionField tmp(_grid); + out = (5.0 - M5) * in; + Dhop(in,tmp,DaggerNo); + out = out + tmp; + } + virtual void Mdag(const FermionField &in, FermionField &out) + { + FermionField tmp(_grid); + out = (5.0 - M5) * in; + Dhop(in,tmp,DaggerYes); + out = out + tmp; + }; + + ///////////////////////////////////////////////////////// + // half checkerboard operations 5D redblack so just site identiy + ///////////////////////////////////////////////////////// + void Meooe(const FermionField &in, FermionField &out) + { + if ( in.Checkerboard() == Odd ) { + this->DhopEO(in,out,DaggerNo); + } else { + this->DhopOE(in,out,DaggerNo); + } + } + void MeooeDag(const FermionField &in, FermionField &out) + { + if ( in.Checkerboard() == Odd ) { + this->DhopEO(in,out,DaggerYes); + } else { + this->DhopOE(in,out,DaggerYes); + } + }; + + // allow override for twisted mass and clover + virtual void Mooee(const FermionField &in, FermionField &out) + { + out = (5.0 - M5) * in; + } + virtual void MooeeDag(const FermionField &in, FermionField &out) + { + out = (5.0 - M5) * in; + } + virtual void MooeeInv(const FermionField &in, FermionField &out) + { + out = (1.0/(5.0 - M5)) * in; + }; + virtual void MooeeInvDag(const FermionField &in, FermionField &out) + { + out = (1.0/(5.0 - M5)) * in; + }; + + virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _mass,std::vector twist) {} ; + + //////////////////////// + // Derivative interface + //////////////////////// + // Interface calls an internal routine + void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) { assert(0);}; + void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){ assert(0);}; + void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){ assert(0);}; + + /////////////////////////////////////////////////////////////// + // non-hermitian hopping term; half cb or both + /////////////////////////////////////////////////////////////// + void Dhop(const FermionField &in, FermionField &out, int dag) + { + FermionField tmp(in.Grid()); + Dhop5(in,out,MassField,MassField,dag ); + for(int mu=0;mu<4;mu++){ + DhopDirU(in,Umu[mu],Umu[mu],tmp,mu,dag ); out = out + tmp; + } + }; + void DhopOE(const FermionField &in, FermionField &out, int dag) + { + FermionField tmp(in.Grid()); + assert(in.Checkerboard()==Even); + Dhop5(in,out,MassFieldOdd,MassFieldEven,dag); + for(int mu=0;mu<4;mu++){ + DhopDirU(in,UmuOdd[mu],UmuEven[mu],tmp,mu,dag ); out = out + tmp; + } + }; + void DhopEO(const FermionField &in, FermionField &out, int dag) + { + FermionField tmp(in.Grid()); + assert(in.Checkerboard()==Odd); + Dhop5(in,out, MassFieldEven,MassFieldOdd ,dag ); + for(int mu=0;mu<4;mu++){ + DhopDirU(in,UmuEven[mu],UmuOdd[mu],tmp,mu,dag ); out = out + tmp; + } + }; + + /////////////////////////////////////////////////////////////// + // Multigrid assistance; force term uses too + /////////////////////////////////////////////////////////////// + void Mdir(const FermionField &in, FermionField &out, int dir, int disp){ assert(0);}; + void MdirAll(const FermionField &in, std::vector &out) { assert(0);}; + void DhopDir(const FermionField &in, FermionField &out, int dir, int disp) { assert(0);}; + void DhopDirAll(const FermionField &in, std::vector &out) { assert(0);}; + void DhopDirCalc(const FermionField &in, FermionField &out, int dirdisp,int gamma, int dag) { assert(0);}; + + void DhopDirU(const FermionField &in, const GaugeLinkField &U5e, const GaugeLinkField &U5o, FermionField &out, int mu, int dag) + { + RealD sgn= 1.0; + if (dag ) sgn=-1.0; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + + // mass is 1,1,1,1,-m has to multiply the round the world term + FermionField tmp (in.Grid()); + tmp = U5e * Cshift(in,mu+1,1); + out = tmp - Gamma(Gmu[mu])*tmp*sgn; + + tmp = Cshift(adj(U5o)*in,mu+1,-1); + out = out + tmp + Gamma(Gmu[mu])*tmp*sgn; + + out = -0.5*out; + }; + + void Dhop5(const FermionField &in, FermionField &out, ComplexField &massE, ComplexField &massO, int dag) + { + // Mass term.... must multiple the round world with mass = 1,1,1,1, -m + RealD sgn= 1.0; + if (dag ) sgn=-1.0; + + Gamma G5(Gamma::Algebra::Gamma5); + + FermionField tmp (in.Grid()); + tmp = massE*Cshift(in,0,1); + out = tmp - G5*tmp*sgn; + + tmp = Cshift(massO*in,0,-1); + out = out + tmp + G5*tmp*sgn; + out = -0.5*out; + }; + + // Constructor + DWFSlowFermion(GaugeField &_Umu, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, RealD _M5) + : + _grid(&Fgrid), + _cbgrid(&Hgrid), + _grid4(_Umu.Grid()), + Umu(Nd,&Fgrid), + UmuEven(Nd,&Hgrid), + UmuOdd(Nd,&Hgrid), + MassField(&Fgrid), + MassFieldEven(&Hgrid), + MassFieldOdd(&Hgrid), + M5(_M5), + mass(_mass), + _tmp(&Hgrid) + { + Ls=Fgrid._fdimensions[0]; + ImportGauge(_Umu); + + typedef typename FermionField::scalar_type scalar; + + Lattice > coor(&Fgrid); + LatticeCoordinate(coor, 0); // Scoor + ComplexField one(&Fgrid); + MassField =scalar(-mass); + one =scalar(1.0); + MassField =where(coor==Integer(Ls-1),MassField,one); + for(int mu=0;mu(_Umu4, mu); + for(int s=0;sLs;s++){ + InsertSlice(U4,Umu[mu],s,0); + } + } + } + + /////////////////////////////////////////////////////////////// + // Data members require to support the functionality + /////////////////////////////////////////////////////////////// + +public: + virtual RealD Mass(void) { return mass; } + virtual int isTrivialEE(void) { return 1; }; + RealD mass; + RealD M5; + int Ls; + + GridBase *_grid4; + GridBase *_grid; + GridBase *_cbgrid4; + GridBase *_cbgrid; + + // Copy of the gauge field , with even and odd subsets + std::vector Umu; + std::vector UmuEven; + std::vector UmuOdd; + ComplexField MassField; + ComplexField MassFieldEven; + ComplexField MassFieldOdd; + + /////////////////////////////////////////////////////////////// + // Conserved current utilities + /////////////////////////////////////////////////////////////// + void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + PropagatorField &phys_src, + Current curr_type, + unsigned int mu){} + void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + PropagatorField &phys_src, + Current curr_type, + unsigned int mu, + unsigned int tmin, + unsigned int tmax, + ComplexField &lattice_cmplx){} +}; + +typedef DWFSlowFermion DWFSlowFermionF; +typedef DWFSlowFermion DWFSlowFermionD; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/Fermion.h b/Grid/qcd/action/fermion/Fermion.h index 223ff9dd..bad736cf 100644 --- a/Grid/qcd/action/fermion/Fermion.h +++ b/Grid/qcd/action/fermion/Fermion.h @@ -47,6 +47,7 @@ Author: Peter Boyle //////////////////////////////////////////// // Fermion operators / actions //////////////////////////////////////////// +#include // Slow DWF #include // 4d wilson like NAMESPACE_CHECK(Wilson); @@ -112,28 +113,21 @@ NAMESPACE_CHECK(DWFutils); // Cayley 5d NAMESPACE_BEGIN(Grid); -typedef WilsonFermion WilsonFermionR; +typedef WilsonFermion WilsonFermionD2; typedef WilsonFermion WilsonFermionF; typedef WilsonFermion WilsonFermionD; -//typedef WilsonFermion WilsonFermionRL; -//typedef WilsonFermion WilsonFermionFH; -//typedef WilsonFermion WilsonFermionDF; - -typedef WilsonFermion WilsonAdjFermionR; typedef WilsonFermion WilsonAdjFermionF; typedef WilsonFermion WilsonAdjFermionD; -typedef WilsonFermion WilsonTwoIndexSymmetricFermionR; typedef WilsonFermion WilsonTwoIndexSymmetricFermionF; typedef WilsonFermion WilsonTwoIndexSymmetricFermionD; -typedef WilsonFermion WilsonTwoIndexAntiSymmetricFermionR; typedef WilsonFermion WilsonTwoIndexAntiSymmetricFermionF; typedef WilsonFermion WilsonTwoIndexAntiSymmetricFermionD; // Twisted mass fermion -typedef WilsonTMFermion WilsonTMFermionR; +typedef WilsonTMFermion WilsonTMFermionD2; typedef WilsonTMFermion WilsonTMFermionF; typedef WilsonTMFermion WilsonTMFermionD; @@ -141,23 +135,20 @@ typedef WilsonTMFermion WilsonTMFermionD; template using WilsonClover = WilsonCloverFermion>; template using WilsonExpClover = WilsonCloverFermion>; -typedef WilsonClover WilsonCloverFermionR; +typedef WilsonClover WilsonCloverFermionD2; typedef WilsonClover WilsonCloverFermionF; typedef WilsonClover WilsonCloverFermionD; -typedef WilsonExpClover WilsonExpCloverFermionR; +typedef WilsonExpClover WilsonExpCloverFermionD2; typedef WilsonExpClover WilsonExpCloverFermionF; typedef WilsonExpClover WilsonExpCloverFermionD; -typedef WilsonClover WilsonCloverAdjFermionR; typedef WilsonClover WilsonCloverAdjFermionF; typedef WilsonClover WilsonCloverAdjFermionD; -typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionR; typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionF; typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionD; -typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionR; typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionF; typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionD; @@ -165,161 +156,108 @@ typedef WilsonClover WilsonCloverTwoIndexAntiS template using CompactWilsonClover = CompactWilsonCloverFermion>; template using CompactWilsonExpClover = CompactWilsonCloverFermion>; -typedef CompactWilsonClover CompactWilsonCloverFermionR; +typedef CompactWilsonClover CompactWilsonCloverFermionD2; typedef CompactWilsonClover CompactWilsonCloverFermionF; typedef CompactWilsonClover CompactWilsonCloverFermionD; -typedef CompactWilsonExpClover CompactWilsonExpCloverFermionR; +typedef CompactWilsonExpClover CompactWilsonExpCloverFermionD2; typedef CompactWilsonExpClover CompactWilsonExpCloverFermionF; typedef CompactWilsonExpClover CompactWilsonExpCloverFermionD; -typedef CompactWilsonClover CompactWilsonCloverAdjFermionR; typedef CompactWilsonClover CompactWilsonCloverAdjFermionF; typedef CompactWilsonClover CompactWilsonCloverAdjFermionD; -typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionR; typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionF; typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionD; -typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionR; typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionF; typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionD; // Domain Wall fermions -typedef DomainWallFermion DomainWallFermionR; typedef DomainWallFermion DomainWallFermionF; typedef DomainWallFermion DomainWallFermionD; +typedef DomainWallFermion DomainWallFermionD2; -//typedef DomainWallFermion DomainWallFermionRL; -//typedef DomainWallFermion DomainWallFermionFH; -//typedef DomainWallFermion DomainWallFermionDF; - -typedef DomainWallEOFAFermion DomainWallEOFAFermionR; +typedef DomainWallEOFAFermion DomainWallEOFAFermionD2; typedef DomainWallEOFAFermion DomainWallEOFAFermionF; typedef DomainWallEOFAFermion DomainWallEOFAFermionD; -//typedef DomainWallEOFAFermion DomainWallEOFAFermionRL; -//typedef DomainWallEOFAFermion DomainWallEOFAFermionFH; -//typedef DomainWallEOFAFermion DomainWallEOFAFermionDF; - -typedef MobiusFermion MobiusFermionR; +typedef MobiusFermion MobiusFermionD2; typedef MobiusFermion MobiusFermionF; typedef MobiusFermion MobiusFermionD; -//typedef MobiusFermion MobiusFermionRL; -//typedef MobiusFermion MobiusFermionFH; -//typedef MobiusFermion MobiusFermionDF; - -typedef MobiusEOFAFermion MobiusEOFAFermionR; +typedef MobiusEOFAFermion MobiusEOFAFermionD2; typedef MobiusEOFAFermion MobiusEOFAFermionF; typedef MobiusEOFAFermion MobiusEOFAFermionD; -//typedef MobiusEOFAFermion MobiusEOFAFermionRL; -//typedef MobiusEOFAFermion MobiusEOFAFermionFH; -//typedef MobiusEOFAFermion MobiusEOFAFermionDF; - -typedef ZMobiusFermion ZMobiusFermionR; +typedef ZMobiusFermion ZMobiusFermionD2; typedef ZMobiusFermion ZMobiusFermionF; typedef ZMobiusFermion ZMobiusFermionD; -//typedef ZMobiusFermion ZMobiusFermionRL; -//typedef ZMobiusFermion ZMobiusFermionFH; -//typedef ZMobiusFermion ZMobiusFermionDF; - -// Ls vectorised -typedef ScaledShamirFermion ScaledShamirFermionR; +typedef ScaledShamirFermion ScaledShamirFermionD2; typedef ScaledShamirFermion ScaledShamirFermionF; typedef ScaledShamirFermion ScaledShamirFermionD; -typedef MobiusZolotarevFermion MobiusZolotarevFermionR; +typedef MobiusZolotarevFermion MobiusZolotarevFermionD2; typedef MobiusZolotarevFermion MobiusZolotarevFermionF; typedef MobiusZolotarevFermion MobiusZolotarevFermionD; -typedef ShamirZolotarevFermion ShamirZolotarevFermionR; +typedef ShamirZolotarevFermion ShamirZolotarevFermionD2; typedef ShamirZolotarevFermion ShamirZolotarevFermionF; typedef ShamirZolotarevFermion ShamirZolotarevFermionD; -typedef OverlapWilsonCayleyTanhFermion OverlapWilsonCayleyTanhFermionR; +typedef OverlapWilsonCayleyTanhFermion OverlapWilsonCayleyTanhFermionD2; typedef OverlapWilsonCayleyTanhFermion OverlapWilsonCayleyTanhFermionF; typedef OverlapWilsonCayleyTanhFermion OverlapWilsonCayleyTanhFermionD; -typedef OverlapWilsonCayleyZolotarevFermion OverlapWilsonCayleyZolotarevFermionR; +typedef OverlapWilsonCayleyZolotarevFermion OverlapWilsonCayleyZolotarevFermionD2; typedef OverlapWilsonCayleyZolotarevFermion OverlapWilsonCayleyZolotarevFermionF; typedef OverlapWilsonCayleyZolotarevFermion OverlapWilsonCayleyZolotarevFermionD; // Continued fraction -typedef OverlapWilsonContFracTanhFermion OverlapWilsonContFracTanhFermionR; +typedef OverlapWilsonContFracTanhFermion OverlapWilsonContFracTanhFermionD2; typedef OverlapWilsonContFracTanhFermion OverlapWilsonContFracTanhFermionF; typedef OverlapWilsonContFracTanhFermion OverlapWilsonContFracTanhFermionD; -typedef OverlapWilsonContFracZolotarevFermion OverlapWilsonContFracZolotarevFermionR; +typedef OverlapWilsonContFracZolotarevFermion OverlapWilsonContFracZolotarevFermionD2; typedef OverlapWilsonContFracZolotarevFermion OverlapWilsonContFracZolotarevFermionF; typedef OverlapWilsonContFracZolotarevFermion OverlapWilsonContFracZolotarevFermionD; // Partial fraction -typedef OverlapWilsonPartialFractionTanhFermion OverlapWilsonPartialFractionTanhFermionR; +typedef OverlapWilsonPartialFractionTanhFermion OverlapWilsonPartialFractionTanhFermionD2; typedef OverlapWilsonPartialFractionTanhFermion OverlapWilsonPartialFractionTanhFermionF; typedef OverlapWilsonPartialFractionTanhFermion OverlapWilsonPartialFractionTanhFermionD; -typedef OverlapWilsonPartialFractionZolotarevFermion OverlapWilsonPartialFractionZolotarevFermionR; +typedef OverlapWilsonPartialFractionZolotarevFermion OverlapWilsonPartialFractionZolotarevFermionD2; typedef OverlapWilsonPartialFractionZolotarevFermion OverlapWilsonPartialFractionZolotarevFermionF; typedef OverlapWilsonPartialFractionZolotarevFermion OverlapWilsonPartialFractionZolotarevFermionD; // Gparity cases; partial list until tested -typedef WilsonFermion GparityWilsonFermionR; typedef WilsonFermion GparityWilsonFermionF; typedef WilsonFermion GparityWilsonFermionD; -//typedef WilsonFermion GparityWilsonFermionRL; -//typedef WilsonFermion GparityWilsonFermionFH; -//typedef WilsonFermion GparityWilsonFermionDF; - -typedef DomainWallFermion GparityDomainWallFermionR; typedef DomainWallFermion GparityDomainWallFermionF; typedef DomainWallFermion GparityDomainWallFermionD; -//typedef DomainWallFermion GparityDomainWallFermionRL; -//typedef DomainWallFermion GparityDomainWallFermionFH; -//typedef DomainWallFermion GparityDomainWallFermionDF; - -typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionR; +typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionD2; typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionF; typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionD; -//typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionRL; -//typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionFH; -//typedef DomainWallEOFAFermion GparityDomainWallEOFAFermionDF; - -typedef WilsonTMFermion GparityWilsonTMFermionR; +typedef WilsonTMFermion GparityWilsonTMFermionD2; typedef WilsonTMFermion GparityWilsonTMFermionF; typedef WilsonTMFermion GparityWilsonTMFermionD; -//typedef WilsonTMFermion GparityWilsonTMFermionRL; -//typedef WilsonTMFermion GparityWilsonTMFermionFH; -//typedef WilsonTMFermion GparityWilsonTMFermionDF; - -typedef MobiusFermion GparityMobiusFermionR; +typedef MobiusFermion GparityMobiusFermionD2; typedef MobiusFermion GparityMobiusFermionF; typedef MobiusFermion GparityMobiusFermionD; -//typedef MobiusFermion GparityMobiusFermionRL; -//typedef MobiusFermion GparityMobiusFermionFH; -//typedef MobiusFermion GparityMobiusFermionDF; - -typedef MobiusEOFAFermion GparityMobiusEOFAFermionR; +typedef MobiusEOFAFermion GparityMobiusEOFAFermionD2; typedef MobiusEOFAFermion GparityMobiusEOFAFermionF; typedef MobiusEOFAFermion GparityMobiusEOFAFermionD; -//typedef MobiusEOFAFermion GparityMobiusEOFAFermionRL; -//typedef MobiusEOFAFermion GparityMobiusEOFAFermionFH; -//typedef MobiusEOFAFermion GparityMobiusEOFAFermionDF; - -typedef ImprovedStaggeredFermion ImprovedStaggeredFermionR; typedef ImprovedStaggeredFermion ImprovedStaggeredFermionF; typedef ImprovedStaggeredFermion ImprovedStaggeredFermionD; -typedef NaiveStaggeredFermion NaiveStaggeredFermionR; typedef NaiveStaggeredFermion NaiveStaggeredFermionF; typedef NaiveStaggeredFermion NaiveStaggeredFermionD; -typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DR; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DF; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DD; diff --git a/Grid/qcd/action/fermion/FermionOperator.h b/Grid/qcd/action/fermion/FermionOperator.h index 570e350d..66644d7f 100644 --- a/Grid/qcd/action/fermion/FermionOperator.h +++ b/Grid/qcd/action/fermion/FermionOperator.h @@ -49,6 +49,8 @@ public: virtual FermionField &tmp(void) = 0; + virtual void DirichletBlock(const Coordinate & _Block) { assert(0); }; + GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); }; diff --git a/Grid/qcd/action/fermion/GparityWilsonImpl.h b/Grid/qcd/action/fermion/GparityWilsonImpl.h index fd627aed..8017bc76 100644 --- a/Grid/qcd/action/fermion/GparityWilsonImpl.h +++ b/Grid/qcd/action/fermion/GparityWilsonImpl.h @@ -30,6 +30,18 @@ directory NAMESPACE_BEGIN(Grid); +/* + Policy implementation for G-parity boundary conditions + + Rather than treating the gauge field as a flavored field, the Grid implementation of G-parity treats the gauge field as a regular + field with complex conjugate boundary conditions. In order to ensure the second flavor interacts with the conjugate links and the first + with the regular links we overload the functionality of doubleStore, whose purpose is to store the gauge field and the barrel-shifted gauge field + to avoid communicating links when applying the Dirac operator, such that the double-stored field contains also a flavor index which maps to + either the link or the conjugate link. This flavored field is then used by multLink to apply the correct link to a spinor. + + Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction. + mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs + */ template class GparityWilsonImpl : public ConjugateGaugeImpl > { public: @@ -113,7 +125,7 @@ public: || ((distance== 1)&&(icoor[direction]==1)) || ((distance==-1)&&(icoor[direction]==0)); - permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu]; //only if we are going around the world + permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu] && mmu < Nd-1; //only if we are going around the world in a spatial direction //Apply the links int f_upper = permute_lane ? 1 : 0; @@ -139,10 +151,10 @@ public: assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code assert((sl == 1) || (sl == 2)); - if ( SE->_around_the_world && St.parameters.twists[mmu] ) { - + //If this site is an global boundary site, perform the G-parity flavor twist + if ( mmu < Nd-1 && SE->_around_the_world && St.parameters.twists[mmu] ) { if ( sl == 2 ) { - + //Only do the twist for lanes on the edge of the physical node ExtractBuffer vals(Nsimd); extract(chi,vals); @@ -197,6 +209,19 @@ public: reg = memory; } + + //Poke 'poke_f0' onto flavor 0 and 'poke_f1' onto flavor 1 in direction mu of the doubled gauge field Uds + inline void pokeGparityDoubledGaugeField(DoubledGaugeField &Uds, const GaugeLinkField &poke_f0, const GaugeLinkField &poke_f1, const int mu){ + autoView(poke_f0_v, poke_f0, CpuRead); + autoView(poke_f1_v, poke_f1, CpuRead); + autoView(Uds_v, Uds, CpuWrite); + thread_foreach(ss,poke_f0_v,{ + Uds_v[ss](0)(mu) = poke_f0_v[ss](); + Uds_v[ss](1)(mu) = poke_f1_v[ss](); + }); + } + + inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) { conformable(Uds.Grid(),GaugeGrid); @@ -207,14 +232,19 @@ public: GaugeLinkField Uconj(GaugeGrid); Lattice > coor(GaugeGrid); - - for(int mu=0;mu(Umu,mu); Uconj = conjugate(U); + // Implement the isospin rotation sign on the boundary between f=1 and f=0 // This phase could come from a simple bc 1,1,-1,1 .. int neglink = GaugeGrid->GlobalDimensions()[mu]-1; if ( Params.twists[mu] ) { @@ -229,7 +259,7 @@ public: thread_foreach(ss,U_v,{ Uds_v[ss](0)(mu) = U_v[ss](); Uds_v[ss](1)(mu) = Uconj_v[ss](); - }); + }); } U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary @@ -260,6 +290,38 @@ public: }); } } + + { //periodic / antiperiodic temporal BCs + int mu = Nd-1; + int L = GaugeGrid->GlobalDimensions()[mu]; + int Lmu = L - 1; + + LatticeCoordinate(coor, mu); + + U = PeekIndex(Umu, mu); //Get t-directed links + + GaugeLinkField *Upoke = &U; + + if(Params.twists[mu]){ //antiperiodic + Utmp = where(coor == Lmu, -U, U); + Upoke = &Utmp; + } + + Uconj = conjugate(*Upoke); //second flavor interacts with conjugate links + pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu); + + //Get the barrel-shifted field + Utmp = adj(Cshift(U, mu, -1)); //is a forward shift! + Upoke = &Utmp; + + if(Params.twists[mu]){ + U = where(coor == 0, -Utmp, Utmp); //boundary phase + Upoke = &U; + } + + Uconj = conjugate(*Upoke); + pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + 4); + } } inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) { @@ -298,28 +360,48 @@ public: inline void extractLinkField(std::vector &mat, DoubledGaugeField &Uds){ assert(0); } - + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { - - int Ls = Btilde.Grid()->_fdimensions[0]; - - GaugeLinkField tmp(mat.Grid()); - tmp = Zero(); + int Ls=Btilde.Grid()->_fdimensions[0]; + { - autoView( tmp_v , tmp, CpuWrite); - autoView( Atilde_v , Atilde, CpuRead); - autoView( Btilde_v , Btilde, CpuRead); - thread_for(ss,tmp.Grid()->oSites(),{ - for (int s = 0; s < Ls; s++) { - int sF = s + Ls * ss; - auto ttmp = traceIndex(outerProduct(Btilde_v[sF], Atilde_v[sF])); - tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); - } - }); + GridBase *GaugeGrid = mat.Grid(); + Lattice > coor(GaugeGrid); + + if( Params.twists[mu] ){ + LatticeCoordinate(coor,mu); + } + + autoView( mat_v , mat, AcceleratorWrite); + autoView( Btilde_v , Btilde, AcceleratorRead); + autoView( Atilde_v , Atilde, AcceleratorRead); + accelerator_for(sss,mat.Grid()->oSites(), FermionField::vector_type::Nsimd(),{ + int sU=sss; + typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType; + ColorMatrixType sum; + zeroit(sum); + for(int s=0;s(mat, tmp, mu); - return; } + + + + }; diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h index ecf44ed7..60cfc727 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h @@ -47,18 +47,6 @@ public: FermionField _tmp; FermionField &tmp(void) { return _tmp; } - //////////////////////////////////////// - // Performance monitoring - //////////////////////////////////////// - void Report(void); - void ZeroCounters(void); - double DhopTotalTime; - double DhopCalls; - double DhopCommTime; - double DhopComputeTime; - double DhopComputeTime2; - double DhopFaceTime; - /////////////////////////////////////////////////////////////// // Implement the abstract base /////////////////////////////////////////////////////////////// diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h index ca660610..5b26b35c 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h @@ -52,18 +52,6 @@ public: FermionField _tmp; FermionField &tmp(void) { return _tmp; } - //////////////////////////////////////// - // Performance monitoring - //////////////////////////////////////// - void Report(void); - void ZeroCounters(void); - double DhopTotalTime; - double DhopCalls; - double DhopCommTime; - double DhopComputeTime; - double DhopComputeTime2; - double DhopFaceTime; - /////////////////////////////////////////////////////////////// // Implement the abstract base /////////////////////////////////////////////////////////////// diff --git a/Grid/qcd/action/fermion/NaiveStaggeredFermion.h b/Grid/qcd/action/fermion/NaiveStaggeredFermion.h index ca38a64f..5f69c2b1 100644 --- a/Grid/qcd/action/fermion/NaiveStaggeredFermion.h +++ b/Grid/qcd/action/fermion/NaiveStaggeredFermion.h @@ -47,18 +47,6 @@ public: FermionField _tmp; FermionField &tmp(void) { return _tmp; } - //////////////////////////////////////// - // Performance monitoring - //////////////////////////////////////// - void Report(void); - void ZeroCounters(void); - double DhopTotalTime; - double DhopCalls; - double DhopCommTime; - double DhopComputeTime; - double DhopComputeTime2; - double DhopFaceTime; - /////////////////////////////////////////////////////////////// // Implement the abstract base /////////////////////////////////////////////////////////////// diff --git a/Grid/qcd/action/fermion/WilsonCompressor.h b/Grid/qcd/action/fermion/WilsonCompressor.h index 5c3351a6..186fa278 100644 --- a/Grid/qcd/action/fermion/WilsonCompressor.h +++ b/Grid/qcd/action/fermion/WilsonCompressor.h @@ -32,17 +32,218 @@ Author: paboyle NAMESPACE_BEGIN(Grid); +/////////////////////////////////////////////////////////////// +// Wilson compressor will need FaceGather policies for: +// Periodic, Dirichlet, and partial Dirichlet for DWF +/////////////////////////////////////////////////////////////// +const int dwf_compressor_depth=2; +#define DWF_COMPRESS +class FaceGatherPartialDWF +{ +public: +#ifdef DWF_COMPRESS + static int PartialCompressionFactor(GridBase *grid) {return grid->_fdimensions[0]/(2*dwf_compressor_depth);}; +#else + static int PartialCompressionFactor(GridBase *grid) { return 1;} +#endif + template + static void Gather_plane_simple (commVector >& table, + const Lattice &rhs, + cobj *buffer, + compressor &compress, + int off,int so,int partial) + { + //DWF only hack: If a direction that is OFF node we use Partial Dirichlet + // Shrinks local and remote comms buffers + GridBase *Grid = rhs.Grid(); + int Ls = Grid->_rdimensions[0]; +#ifdef DWF_COMPRESS + int depth=dwf_compressor_depth; +#else + int depth=Ls/2; +#endif + std::pair *table_v = & table[0]; + auto rhs_v = rhs.View(AcceleratorRead); + int vol=table.size()/Ls; + accelerator_forNB( idx,table.size(), vobj::Nsimd(), { + Integer i=idx/Ls; + Integer s=idx%Ls; + Integer sc=depth+s-(Ls-depth); + if(s=Ls-depth) compress.Compress(buffer[off+i+sc*vol],rhs_v[so+table_v[idx].second]); + }); + rhs_v.ViewClose(); + } + template + static void DecompressFace(decompressor decompress,Decompression &dd) + { + auto Ls = dd.dims[0]; +#ifdef DWF_COMPRESS + int depth=dwf_compressor_depth; +#else + int depth=Ls/2; +#endif + // Just pass in the Grid + auto kp = dd.kernel_p; + auto mp = dd.mpi_p; + int size= dd.buffer_size; + int vol= size/Ls; + accelerator_forNB(o,size,1,{ + int idx=o/Ls; + int s=o%Ls; + if ( s < depth ) { + int oo=s*vol+idx; + kp[o]=mp[oo]; + } else if ( s >= Ls-depth ) { + int sc = depth + s - (Ls-depth); + int oo=sc*vol+idx; + kp[o]=mp[oo]; + } else { + kp[o] = Zero();//fill rest with zero if partial dirichlet + } + }); + } + //////////////////////////////////////////////////////////////////////////////////////////// + // Need to gather *interior portions* for ALL s-slices in simd directions + // Do the gather as need to treat SIMD lanes differently, and insert zeroes on receive side + // Reorder the fifth dim to be s=Ls-1 , s=0, s=1,...,Ls-2. + //////////////////////////////////////////////////////////////////////////////////////////// + template + static void Gather_plane_exchange(commVector >& table,const Lattice &rhs, + std::vector pointers,int dimension,int plane,int cbmask, + compressor &compress,int type,int partial) + { + GridBase *Grid = rhs.Grid(); + int Ls = Grid->_rdimensions[0]; +#ifdef DWF_COMPRESS + int depth=dwf_compressor_depth; +#else + int depth = Ls/2; +#endif + + // insertion of zeroes... + assert( (table.size()&0x1)==0); + int num=table.size()/2; + int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane + + auto rhs_v = rhs.View(AcceleratorRead); + auto p0=&pointers[0][0]; + auto p1=&pointers[1][0]; + auto tp=&table[0]; + int nnum=num/Ls; + accelerator_forNB(j, num, vobj::Nsimd(), { + // Reorders both local and remote comms buffers + // + int s = j % Ls; + int sp1 = (s+depth)%Ls; // peri incremented s slice + + int hxyz= j/Ls; + + int xyz0= hxyz*2; // xyzt part of coor + int xyz1= hxyz*2+1; + + int jj= hxyz + sp1*nnum ; // 0,1,2,3 -> Ls-1 slice , 0-slice, 1-slice .... + + int kk0= xyz0*Ls + s ; // s=0 goes to s=1 + int kk1= xyz1*Ls + s ; // s=Ls-1 -> s=0 + compress.CompressExchange(p0[jj],p1[jj], + rhs_v[so+tp[kk0 ].second], // Same s, consecutive xyz sites + rhs_v[so+tp[kk1 ].second], + type); + }); + rhs_v.ViewClose(); + } + // Merge routine is for SIMD faces + template + static void MergeFace(decompressor decompress,Merger &mm) + { + auto Ls = mm.dims[0]; +#ifdef DWF_COMPRESS + int depth=dwf_compressor_depth; +#else + int depth = Ls/2; +#endif + int num= mm.buffer_size/2; // relate vol and Ls to buffer size + auto mp = &mm.mpointer[0]; + auto vp0= &mm.vpointers[0][0]; // First arg is exchange first + auto vp1= &mm.vpointers[1][0]; + auto type= mm.type; + int nnum = num/Ls; + accelerator_forNB(o,num,Merger::Nsimd,{ + + int s=o%Ls; + int hxyz=o/Ls; // xyzt related component + int xyz0=hxyz*2; + int xyz1=hxyz*2+1; + + int sp = (s+depth)%Ls; + int jj= hxyz + sp*nnum ; // 0,1,2,3 -> Ls-1 slice , 0-slice, 1-slice .... + + int oo0= s+xyz0*Ls; + int oo1= s+xyz1*Ls; + + // same ss0, ss1 pair goes to new layout + decompress.Exchange(mp[oo0],mp[oo1],vp0[jj],vp1[jj],type); + }); + } +}; +class FaceGatherDWFMixedBCs +{ +public: +#ifdef DWF_COMPRESS + static int PartialCompressionFactor(GridBase *grid) {return grid->_fdimensions[0]/(2*dwf_compressor_depth);}; +#else + static int PartialCompressionFactor(GridBase *grid) {return 1;} +#endif + + template + static void Gather_plane_simple (commVector >& table, + const Lattice &rhs, + cobj *buffer, + compressor &compress, + int off,int so,int partial) + { + // std::cout << " face gather simple DWF partial "< + static void Gather_plane_exchange(commVector >& table,const Lattice &rhs, + std::vector pointers,int dimension,int plane,int cbmask, + compressor &compress,int type,int partial) + { + // std::cout << " face gather exch DWF partial "< + static void MergeFace(decompressor decompress,Merger &mm) + { + int partial = mm.partial; + // std::cout << " merge DWF partial "< + static void DecompressFace(decompressor decompress,Decompression &dd) + { + int partial = dd.partial; + // std::cout << " decompress DWF partial "< -class WilsonCompressorTemplate; - +//Could make FaceGather a template param, but then behaviour is runtime not compile time template -class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, - typename std::enable_if::value>::type > +class WilsonCompressorTemplate : public FaceGatherDWFMixedBCs +// : public FaceGatherSimple { public: @@ -79,172 +280,81 @@ public: /*****************************************************/ /* Exchange includes precision change if mpi data is not same */ /*****************************************************/ - accelerator_inline void Exchange(SiteHalfSpinor *mp, - const SiteHalfSpinor * __restrict__ vp0, - const SiteHalfSpinor * __restrict__ vp1, - Integer type,Integer o) const { + accelerator_inline void Exchange(SiteHalfSpinor &mp0, + SiteHalfSpinor &mp1, + const SiteHalfSpinor & vp0, + const SiteHalfSpinor & vp1, + Integer type) const { #ifdef GRID_SIMT - exchangeSIMT(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); + exchangeSIMT(mp0,mp1,vp0,vp1,type); #else SiteHalfSpinor tmp1; SiteHalfSpinor tmp2; - exchange(tmp1,tmp2,vp0[o],vp1[o],type); - vstream(mp[2*o ],tmp1); - vstream(mp[2*o+1],tmp2); + exchange(tmp1,tmp2,vp0,vp1,type); + vstream(mp0,tmp1); + vstream(mp1,tmp2); #endif } - + /*****************************************************/ /* Have a decompression step if mpi data is not same */ /*****************************************************/ - accelerator_inline void Decompress(SiteHalfSpinor * __restrict__ out, - SiteHalfSpinor * __restrict__ in, Integer o) const { - assert(0); + accelerator_inline void Decompress(SiteHalfSpinor &out, + SiteHalfSpinor &in) const { + out = in; } /*****************************************************/ /* Compress Exchange */ /*****************************************************/ - accelerator_inline void CompressExchange(SiteHalfSpinor * __restrict__ out0, - SiteHalfSpinor * __restrict__ out1, - const SiteSpinor * __restrict__ in, - Integer j,Integer k, Integer m,Integer type) const + accelerator_inline void CompressExchange(SiteHalfSpinor &out0, + SiteHalfSpinor &out1, + const SiteSpinor &in0, + const SiteSpinor &in1, + Integer type) const { #ifdef GRID_SIMT typedef SiteSpinor vobj; typedef SiteHalfSpinor hvobj; - typedef decltype(coalescedRead(*in)) sobj; - typedef decltype(coalescedRead(*out0)) hsobj; + typedef decltype(coalescedRead(in0)) sobj; + typedef decltype(coalescedRead(out0)) hsobj; constexpr unsigned int Nsimd = vobj::Nsimd(); unsigned int mask = Nsimd >> (type + 1); int lane = acceleratorSIMTlane(Nsimd); int j0 = lane &(~mask); // inner coor zero int j1 = lane |(mask) ; // inner coor one - const vobj *vp0 = &in[k]; // out0[j] = merge low bit of type from in[k] and in[m] - const vobj *vp1 = &in[m]; // out1[j] = merge hi bit of type from in[k] and in[m] - const vobj *vp = (lane&mask) ? vp1:vp0;// if my lane has high bit take vp1, low bit take vp0 - auto sa = coalescedRead(*vp,j0); // lane to read for out 0, NB 50% read coalescing - auto sb = coalescedRead(*vp,j1); // lane to read for out 1 + const vobj *vp0 = &in0; + const vobj *vp1 = &in1; + const vobj *vp = (lane&mask) ? vp1:vp0; + auto sa = coalescedRead(*vp,j0); + auto sb = coalescedRead(*vp,j1); hsobj psa, psb; - projector::Proj(psa,sa,mu,dag); // spin project the result0 - projector::Proj(psb,sb,mu,dag); // spin project the result1 - coalescedWrite(out0[j],psa); - coalescedWrite(out1[j],psb); + projector::Proj(psa,sa,mu,dag); + projector::Proj(psb,sb,mu,dag); + coalescedWrite(out0,psa); + coalescedWrite(out1,psb); #else SiteHalfSpinor temp1, temp2; SiteHalfSpinor temp3, temp4; - projector::Proj(temp1,in[k],mu,dag); - projector::Proj(temp2,in[m],mu,dag); + projector::Proj(temp1,in0,mu,dag); + projector::Proj(temp2,in1,mu,dag); exchange(temp3,temp4,temp1,temp2,type); - vstream(out0[j],temp3); - vstream(out1[j],temp4); + vstream(out0,temp3); + vstream(out1,temp4); #endif } /*****************************************************/ /* Pass the info to the stencil */ /*****************************************************/ - accelerator_inline bool DecompressionStep(void) const { return false; } + accelerator_inline bool DecompressionStep(void) const { + return false; + } }; -#if 0 -template -class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, - typename std::enable_if::value>::type > -{ -public: - - int mu,dag; - - void Point(int p) { mu=p; }; - - WilsonCompressorTemplate(int _dag=0){ - dag = _dag; - } - - typedef _Spinor SiteSpinor; - typedef _Hspinor SiteHalfSpinor; - typedef _HCspinor SiteHalfCommSpinor; - typedef typename SiteHalfCommSpinor::vector_type vComplexLow; - typedef typename SiteHalfSpinor::vector_type vComplexHigh; - constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh); - - accelerator_inline int CommDatumSize(void) const { - return sizeof(SiteHalfCommSpinor); - } - - /*****************************************************/ - /* Compress includes precision change if mpi data is not same */ - /*****************************************************/ - accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { - SiteHalfSpinor hsp; - SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf; - projector::Proj(hsp,in,mu,dag); - precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw); - } - accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { -#ifdef GRID_SIMT - typedef decltype(coalescedRead(buf)) sobj; - sobj sp; - auto sin = coalescedRead(in); - projector::Proj(sp,sin,mu,dag); - coalescedWrite(buf,sp); -#else - projector::Proj(buf,in,mu,dag); -#endif - } - - /*****************************************************/ - /* Exchange includes precision change if mpi data is not same */ - /*****************************************************/ - accelerator_inline void Exchange(SiteHalfSpinor *mp, - SiteHalfSpinor *vp0, - SiteHalfSpinor *vp1, - Integer type,Integer o) const { - SiteHalfSpinor vt0,vt1; - SiteHalfCommSpinor *vpp0 = (SiteHalfCommSpinor *)vp0; - SiteHalfCommSpinor *vpp1 = (SiteHalfCommSpinor *)vp1; - precisionChange((vComplexHigh *)&vt0,(vComplexLow *)&vpp0[o],Nw); - precisionChange((vComplexHigh *)&vt1,(vComplexLow *)&vpp1[o],Nw); - exchange(mp[2*o],mp[2*o+1],vt0,vt1,type); - } - - /*****************************************************/ - /* Have a decompression step if mpi data is not same */ - /*****************************************************/ - accelerator_inline void Decompress(SiteHalfSpinor *out, SiteHalfSpinor *in, Integer o) const { - SiteHalfCommSpinor *hin=(SiteHalfCommSpinor *)in; - precisionChange((vComplexHigh *)&out[o],(vComplexLow *)&hin[o],Nw); - } - - /*****************************************************/ - /* Compress Exchange */ - /*****************************************************/ - accelerator_inline void CompressExchange(SiteHalfSpinor *out0, - SiteHalfSpinor *out1, - const SiteSpinor *in, - Integer j,Integer k, Integer m,Integer type) const { - SiteHalfSpinor temp1, temp2,temp3,temp4; - SiteHalfCommSpinor *hout0 = (SiteHalfCommSpinor *)out0; - SiteHalfCommSpinor *hout1 = (SiteHalfCommSpinor *)out1; - projector::Proj(temp1,in[k],mu,dag); - projector::Proj(temp2,in[m],mu,dag); - exchange(temp3,temp4,temp1,temp2,type); - precisionChange((vComplexLow *)&hout0[j],(vComplexHigh *)&temp3,Nw); - precisionChange((vComplexLow *)&hout1[j],(vComplexHigh *)&temp4,Nw); - } - - /*****************************************************/ - /* Pass the info to the stencil */ - /*****************************************************/ - accelerator_inline bool DecompressionStep(void) const { return true; } - -}; -#endif - #define DECLARE_PROJ(Projector,Compressor,spProj) \ class Projector { \ public: \ @@ -294,11 +404,7 @@ public: typedef typename Base::View_type View_type; typedef typename Base::StencilVector StencilVector; - void ZeroCountersi(void) { } - void Reporti(int calls) { } - - std::vector surface_list; - + // Vector surface_list; WilsonStencil(GridBase *grid, int npoints, int checkerboard, @@ -306,11 +412,11 @@ public: const std::vector &distances,Parameters p) : CartesianStencil (grid,npoints,checkerboard,directions,distances,p) { - ZeroCountersi(); - surface_list.resize(0); + // surface_list.resize(0); this->same_node.resize(npoints); }; + /* void BuildSurfaceList(int Ls,int vol4){ // find same node for SHM @@ -331,7 +437,8 @@ public: } } } - + */ + template < class compressor> void HaloExchangeOpt(const Lattice &source,compressor &compress) { @@ -377,24 +484,26 @@ public: int dag = compress.dag; int face_idx=0; +#define vet_same_node(a,b) \ + { auto tmp = b; } if ( dag ) { - assert(this->same_node[Xp]==this->HaloGatherDir(source,XpCompress,Xp,face_idx)); - assert(this->same_node[Yp]==this->HaloGatherDir(source,YpCompress,Yp,face_idx)); - assert(this->same_node[Zp]==this->HaloGatherDir(source,ZpCompress,Zp,face_idx)); - assert(this->same_node[Tp]==this->HaloGatherDir(source,TpCompress,Tp,face_idx)); - assert(this->same_node[Xm]==this->HaloGatherDir(source,XmCompress,Xm,face_idx)); - assert(this->same_node[Ym]==this->HaloGatherDir(source,YmCompress,Ym,face_idx)); - assert(this->same_node[Zm]==this->HaloGatherDir(source,ZmCompress,Zm,face_idx)); - assert(this->same_node[Tm]==this->HaloGatherDir(source,TmCompress,Tm,face_idx)); + vet_same_node(this->same_node[Xp],this->HaloGatherDir(source,XpCompress,Xp,face_idx)); + vet_same_node(this->same_node[Yp],this->HaloGatherDir(source,YpCompress,Yp,face_idx)); + vet_same_node(this->same_node[Zp],this->HaloGatherDir(source,ZpCompress,Zp,face_idx)); + vet_same_node(this->same_node[Tp],this->HaloGatherDir(source,TpCompress,Tp,face_idx)); + vet_same_node(this->same_node[Xm],this->HaloGatherDir(source,XmCompress,Xm,face_idx)); + vet_same_node(this->same_node[Ym],this->HaloGatherDir(source,YmCompress,Ym,face_idx)); + vet_same_node(this->same_node[Zm],this->HaloGatherDir(source,ZmCompress,Zm,face_idx)); + vet_same_node(this->same_node[Tm],this->HaloGatherDir(source,TmCompress,Tm,face_idx)); } else { - assert(this->same_node[Xp]==this->HaloGatherDir(source,XmCompress,Xp,face_idx)); - assert(this->same_node[Yp]==this->HaloGatherDir(source,YmCompress,Yp,face_idx)); - assert(this->same_node[Zp]==this->HaloGatherDir(source,ZmCompress,Zp,face_idx)); - assert(this->same_node[Tp]==this->HaloGatherDir(source,TmCompress,Tp,face_idx)); - assert(this->same_node[Xm]==this->HaloGatherDir(source,XpCompress,Xm,face_idx)); - assert(this->same_node[Ym]==this->HaloGatherDir(source,YpCompress,Ym,face_idx)); - assert(this->same_node[Zm]==this->HaloGatherDir(source,ZpCompress,Zm,face_idx)); - assert(this->same_node[Tm]==this->HaloGatherDir(source,TpCompress,Tm,face_idx)); + vet_same_node(this->same_node[Xp],this->HaloGatherDir(source,XmCompress,Xp,face_idx)); + vet_same_node(this->same_node[Yp],this->HaloGatherDir(source,YmCompress,Yp,face_idx)); + vet_same_node(this->same_node[Zp],this->HaloGatherDir(source,ZmCompress,Zp,face_idx)); + vet_same_node(this->same_node[Tp],this->HaloGatherDir(source,TmCompress,Tp,face_idx)); + vet_same_node(this->same_node[Xm],this->HaloGatherDir(source,XpCompress,Xm,face_idx)); + vet_same_node(this->same_node[Ym],this->HaloGatherDir(source,YpCompress,Ym,face_idx)); + vet_same_node(this->same_node[Zm],this->HaloGatherDir(source,ZpCompress,Zm,face_idx)); + vet_same_node(this->same_node[Tm],this->HaloGatherDir(source,TpCompress,Tm,face_idx)); } this->face_table_computed=1; assert(this->u_comm_offset==this->_unified_buffer_size); diff --git a/Grid/qcd/action/fermion/WilsonFermion.h b/Grid/qcd/action/fermion/WilsonFermion.h index bf8926d0..a7a1bb69 100644 --- a/Grid/qcd/action/fermion/WilsonFermion.h +++ b/Grid/qcd/action/fermion/WilsonFermion.h @@ -74,20 +74,6 @@ public: FermionField _tmp; FermionField &tmp(void) { return _tmp; } - void Report(void); - void ZeroCounters(void); - double DhopCalls; - double DhopCommTime; - double DhopComputeTime; - double DhopComputeTime2; - double DhopFaceTime; - double DhopTotalTime; - - double DerivCalls; - double DerivCommTime; - double DerivComputeTime; - double DerivDhopComputeTime; - ////////////////////////////////////////////////////////////////// // override multiply; cut number routines if pass dagger argument // and also make interface more uniformly consistent diff --git a/Grid/qcd/action/fermion/WilsonFermion5D.h b/Grid/qcd/action/fermion/WilsonFermion5D.h index 80231bb4..0b07d320 100644 --- a/Grid/qcd/action/fermion/WilsonFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonFermion5D.h @@ -75,19 +75,8 @@ public: FermionField _tmp; FermionField &tmp(void) { return _tmp; } - void Report(void); - void ZeroCounters(void); - double DhopCalls; - double DhopCommTime; - double DhopComputeTime; - double DhopComputeTime2; - double DhopFaceTime; - double DhopTotalTime; - - double DerivCalls; - double DerivCommTime; - double DerivComputeTime; - double DerivDhopComputeTime; + int Dirichlet; + Coordinate Block; /////////////////////////////////////////////////////////////// // Implement the abstract base @@ -173,7 +162,10 @@ public: GridCartesian &FourDimGrid, GridRedBlackCartesian &FourDimRedBlackGrid, double _M5,const ImplParams &p= ImplParams()); - + + virtual void DirichletBlock(const Coordinate & block) + { + } // Constructors /* WilsonFermion5D(int simd, diff --git a/Grid/qcd/action/fermion/WilsonImpl.h b/Grid/qcd/action/fermion/WilsonImpl.h index 2685796d..c7180115 100644 --- a/Grid/qcd/action/fermion/WilsonImpl.h +++ b/Grid/qcd/action/fermion/WilsonImpl.h @@ -37,7 +37,7 @@ NAMESPACE_BEGIN(Grid); template class WilsonImpl : public PeriodicGaugeImpl > { public: - + static const int Dimension = Representation::Dimension; static const bool isFundamental = Representation::isFundamental; static const bool LsVectorised=false; @@ -242,19 +242,13 @@ public: typedef WilsonImpl WilsonImplR; // Real.. whichever prec typedef WilsonImpl WilsonImplF; // Float typedef WilsonImpl WilsonImplD; // Double - -//typedef WilsonImpl WilsonImplRL; // Real.. whichever prec -//typedef WilsonImpl WilsonImplFH; // Float -//typedef WilsonImpl WilsonImplDF; // Double +typedef WilsonImpl WilsonImplD2; // Double typedef WilsonImpl ZWilsonImplR; // Real.. whichever prec typedef WilsonImpl ZWilsonImplF; // Float typedef WilsonImpl ZWilsonImplD; // Double +typedef WilsonImpl ZWilsonImplD2; // Double -//typedef WilsonImpl ZWilsonImplRL; // Real.. whichever prec -//typedef WilsonImpl ZWilsonImplFH; // Float -//typedef WilsonImpl ZWilsonImplDF; // Double - typedef WilsonImpl WilsonAdjImplR; // Real.. whichever prec typedef WilsonImpl WilsonAdjImplF; // Float typedef WilsonImpl WilsonAdjImplD; // Double diff --git a/Grid/qcd/action/fermion/WilsonKernels.h b/Grid/qcd/action/fermion/WilsonKernels.h index 68422f28..2d868c27 100644 --- a/Grid/qcd/action/fermion/WilsonKernels.h +++ b/Grid/qcd/action/fermion/WilsonKernels.h @@ -52,13 +52,6 @@ public: typedef AcceleratorVector StencilVector; public: -#ifdef GRID_SYCL -#define SYCL_HACK -#endif -#ifdef SYCL_HACK - static void HandDhopSiteSycl(StencilVector st_perm,StencilEntry *st_p, SiteDoubledGaugeField *U,SiteHalfSpinor *buf, - int ss,int sU,const SiteSpinor *in, SiteSpinor *out); -#endif static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, int Ls, int Nsite, const FermionField &in, FermionField &out, diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h index 51a7990c..2b8a3a18 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h @@ -152,58 +152,6 @@ void CayleyFermion5D::DminusDag(const FermionField &psi, FermionField &chi } } -template void CayleyFermion5D::CayleyReport(void) -{ - this->Report(); - Coordinate latt = GridDefaultLatt(); - RealD volume = this->Ls; for(int mu=0;mu_FourDimGrid->_Nprocessors; - if ( M5Dcalls > 0 ) { - std::cout << GridLogMessage << "#### M5D calls report " << std::endl; - std::cout << GridLogMessage << "CayleyFermion5D Number of M5D Calls : " << M5Dcalls << std::endl; - std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << M5Dtime / M5Dcalls << " us" << std::endl; - - // Flops = 10.0*(Nc*Ns) *Ls*vol - RealD mflops = 10.0*(Nc*Ns)*volume*M5Dcalls/M5Dtime/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; - - // Bytes = sizeof(Real) * (Nc*Ns*Nreim) * Ls * vol * (read+write) (/2 for red black counting) - // read = 2 ( psi[ss+s+1] and psi[ss+s-1] count as 1 ) - // write = 1 - RealD Gbytes = sizeof(Real) * (Nc*Ns*2) * volume * 3 /2. * 1.e-9; - std::cout << GridLogMessage << "Average bandwidth (GB/s) : " << Gbytes/M5Dtime*M5Dcalls*1.e6 << std::endl; - } - - if ( MooeeInvCalls > 0 ) { - - std::cout << GridLogMessage << "#### MooeeInv calls report " << std::endl; - std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl; - std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl; -#ifdef GRID_CUDA - RealD mflops = ( -16.*Nc*Ns+this->Ls*(1.+18.*Nc*Ns) )*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; -#else - // Flops = MADD * Ls *Ls *4dvol * spin/colour/complex - RealD mflops = 2.0*24*this->Ls*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; -#endif - } - -} -template void CayleyFermion5D::CayleyZeroCounters(void) -{ - this->ZeroCounters(); - M5Dflops=0; - M5Dcalls=0; - M5Dtime=0; - MooeeInvFlops=0; - MooeeInvCalls=0; - MooeeInvTime=0; -} - template void CayleyFermion5D::M5D (const FermionField &psi, FermionField &chi) { @@ -646,7 +594,6 @@ void CayleyFermion5D::ContractConservedCurrent( PropagatorField &q_in_1, assert(mass_plus == mass_minus); RealD mass = mass_plus; -#if (!defined(GRID_HIP)) Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, @@ -765,7 +712,7 @@ void CayleyFermion5D::ContractConservedCurrent( PropagatorField &q_in_1, else q_out += C; } -#endif + } template @@ -832,7 +779,6 @@ void CayleyFermion5D::SeqConservedCurrent(PropagatorField &q_in, } #endif -#if (!defined(GRID_HIP)) int tshift = (mu == Nd-1) ? 1 : 0; unsigned int LLt = GridDefaultLatt()[Tp]; //////////////////////////////////////////////// @@ -952,7 +898,6 @@ void CayleyFermion5D::SeqConservedCurrent(PropagatorField &q_in, InsertSlice(L_Q, q_out, s , 0); } -#endif } #undef Pp #undef Pm @@ -960,88 +905,6 @@ void CayleyFermion5D::SeqConservedCurrent(PropagatorField &q_in, #undef TopRowWithSource - -#if 0 -template -void CayleyFermion5D::MooeeInternalCompute(int dag, int inv, - Vector > & Matp, - Vector > & Matm) -{ - int Ls=this->Ls; - - GridBase *grid = this->FermionRedBlackGrid(); - int LLs = grid->_rdimensions[0]; - - if ( LLs == Ls ) { - return; // Not vectorised in 5th direction - } - - Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls); - Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); - - for(int s=0;s::iscomplex() ) { - sp[l] = PplusMat (l*istride+s1*ostride,s2); - sm[l] = PminusMat(l*istride+s1*ostride,s2); - } else { - // if real - scalar_type tmp; - tmp = PplusMat (l*istride+s1*ostride,s2); - sp[l] = scalar_type(tmp.real(),tmp.real()); - tmp = PminusMat(l*istride+s1*ostride,s2); - sm[l] = scalar_type(tmp.real(),tmp.real()); - } - } - Matp[LLs*s2+s1] = Vp; - Matm[LLs*s2+s1] = Vm; - }} -} -#endif - NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h index d2537ccf..0d2516c4 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h @@ -63,23 +63,18 @@ CayleyFermion5D::M5D(const FermionField &psi_i, // 10 = 3 complex mult + 2 complex add // Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting) - M5Dcalls++; - M5Dtime-=usecond(); - - uint64_t nloop = grid->oSites()/Ls; + uint64_t nloop = grid->oSites(); accelerator_for(sss,nloop,Simd::Nsimd(),{ - uint64_t ss= sss*Ls; + uint64_t s = sss%Ls; + uint64_t ss= sss-s; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1, tmp2; - for(int s=0;s @@ -105,23 +100,18 @@ CayleyFermion5D::M5Ddag(const FermionField &psi_i, int Ls=this->Ls; // Flops = 6.0*(Nc*Ns) *Ls*vol - M5Dcalls++; - M5Dtime-=usecond(); - - uint64_t nloop = grid->oSites()/Ls; + uint64_t nloop = grid->oSites(); accelerator_for(sss,nloop,Simd::Nsimd(),{ - uint64_t ss=sss*Ls; + uint64_t s = sss%Ls; + uint64_t ss= sss-s; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1,tmp2; - for(int s=0;s @@ -142,8 +132,6 @@ CayleyFermion5D::MooeeInv (const FermionField &psi_i, FermionField &chi auto pleem = & leem[0]; auto pueem = & ueem[0]; - MooeeInvCalls++; - MooeeInvTime-=usecond(); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; @@ -180,8 +168,6 @@ CayleyFermion5D::MooeeInv (const FermionField &psi_i, FermionField &chi coalescedWrite(chi[ss+s],res); } }); - - MooeeInvTime+=usecond(); } @@ -204,10 +190,6 @@ CayleyFermion5D::MooeeInvDag (const FermionField &psi_i, FermionField &chi assert(psi.Checkerboard() == psi.Checkerboard()); - MooeeInvCalls++; - MooeeInvTime-=usecond(); - - uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; @@ -244,7 +226,6 @@ CayleyFermion5D::MooeeInvDag (const FermionField &psi_i, FermionField &chi coalescedWrite(chi[ss+s],res); } }); - MooeeInvTime+=usecond(); } diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h index b54f63ad..e3bf67db 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h @@ -94,10 +94,6 @@ CayleyFermion5D::M5D(const FermionField &psi_i, d_p[ss] = diag[s]; }} - - M5Dcalls++; - M5Dtime-=usecond(); - assert(Nc==3); thread_loop( (int ss=0;ssoSites();ss+=LLs),{ // adds LLs @@ -198,7 +194,6 @@ CayleyFermion5D::M5D(const FermionField &psi_i, } #endif }); - M5Dtime+=usecond(); } template @@ -242,8 +237,6 @@ CayleyFermion5D::M5Ddag(const FermionField &psi_i, d_p[ss] = diag[s]; }} - M5Dcalls++; - M5Dtime-=usecond(); thread_loop( (int ss=0;ssoSites();ss+=LLs),{ // adds LLs #if 0 alignas(64) SiteHalfSpinor hp; @@ -339,7 +332,6 @@ CayleyFermion5D::M5Ddag(const FermionField &psi_i, } #endif }); - M5Dtime+=usecond(); } @@ -813,9 +805,6 @@ CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi, } assert(_Matp->size()==Ls*LLs); - MooeeInvCalls++; - MooeeInvTime-=usecond(); - if ( switcheroo::iscomplex() ) { thread_loop( (auto site=0;site::MooeeInternal(const FermionField &psi, FermionField &chi, MooeeInternalAsm(psi,chi,LLs,site,*_Matp,*_Matm); }); } - MooeeInvTime+=usecond(); + } NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h index 9a8454ef..6b8336cc 100644 --- a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h +++ b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h @@ -54,8 +54,6 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionFi auto pupper = &upper[0]; auto plower = &lower[0]; // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); auto nloop=grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ @@ -71,7 +69,6 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionFi } }); - this->M5Dtime += usecond(); } template @@ -91,8 +88,6 @@ void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const Fermio auto plower = &lower[0]; // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); auto nloop=grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ @@ -108,7 +103,6 @@ void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const Fermio } }); - this->M5Dtime += usecond(); } template @@ -127,8 +121,6 @@ void DomainWallEOFAFermion::MooeeInv(const FermionField& psi_i, FermionFie auto pleem = & this->leem[0]; auto pueem = & this->ueem[0]; - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); uint64_t nloop=grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; @@ -164,7 +156,6 @@ void DomainWallEOFAFermion::MooeeInv(const FermionField& psi_i, FermionFie coalescedWrite(chi[ss+s],res); } }); - this->MooeeInvTime += usecond(); } template @@ -185,8 +176,6 @@ void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi_i, Fermion assert(psi.Checkerboard() == psi.Checkerboard()); - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); auto nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; @@ -223,7 +212,6 @@ void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi_i, Fermion } }); - this->MooeeInvTime += usecond(); } NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h index 888691c4..d235abbb 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h @@ -298,45 +298,33 @@ void ImprovedStaggeredFermion5D::DhopInternalOverlappedComms(StencilImpl & int LLs = in.Grid()->_rdimensions[0]; int len = U.Grid()->oSites(); - DhopFaceTime-=usecond(); st.Prepare(); st.HaloGather(in,compressor); - DhopFaceTime+=usecond(); - DhopCommTime -=usecond(); std::vector > requests; st.CommunicateBegin(requests); // st.HaloExchangeOptGather(in,compressor); // Wilson compressor - DhopFaceTime-=usecond(); st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms - DhopFaceTime+=usecond(); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Remove explicit thread mapping introduced for OPA reasons. ////////////////////////////////////////////////////////////////////////////////////////////////////// - DhopComputeTime-=usecond(); { int interior=1; int exterior=0; Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } - DhopComputeTime+=usecond(); - DhopFaceTime-=usecond(); st.CommsMerge(compressor); - DhopFaceTime+=usecond(); st.CommunicateComplete(requests); - DhopCommTime +=usecond(); - DhopComputeTime2-=usecond(); { int interior=0; int exterior=1; Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } - DhopComputeTime2+=usecond(); } template @@ -347,22 +335,14 @@ void ImprovedStaggeredFermion5D::DhopInternalSerialComms(StencilImpl & st, Compressor compressor; int LLs = in.Grid()->_rdimensions[0]; - //double t1=usecond(); - DhopTotalTime -= usecond(); - DhopCommTime -= usecond(); st.HaloExchange(in,compressor); - DhopCommTime += usecond(); - DhopComputeTime -= usecond(); // Dhop takes the 4d grid from U, and makes a 5d index for fermion { int interior=1; int exterior=1; Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } - DhopComputeTime += usecond(); - DhopTotalTime += usecond(); - } /*CHANGE END*/ @@ -371,7 +351,6 @@ void ImprovedStaggeredFermion5D::DhopInternalSerialComms(StencilImpl & st, template void ImprovedStaggeredFermion5D::DhopOE(const FermionField &in, FermionField &out,int dag) { - DhopCalls+=1; conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid conformable(in.Grid(),out.Grid()); // drops the cb check @@ -383,7 +362,6 @@ void ImprovedStaggeredFermion5D::DhopOE(const FermionField &in, FermionFie template void ImprovedStaggeredFermion5D::DhopEO(const FermionField &in, FermionField &out,int dag) { - DhopCalls+=1; conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid conformable(in.Grid(),out.Grid()); // drops the cb check @@ -395,7 +373,6 @@ void ImprovedStaggeredFermion5D::DhopEO(const FermionField &in, FermionFie template void ImprovedStaggeredFermion5D::Dhop(const FermionField &in, FermionField &out,int dag) { - DhopCalls+=2; conformable(in.Grid(),FermionGrid()); // verifies full grid conformable(in.Grid(),out.Grid()); @@ -404,58 +381,6 @@ void ImprovedStaggeredFermion5D::Dhop(const FermionField &in, FermionField DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag); } -template -void ImprovedStaggeredFermion5D::Report(void) -{ - Coordinate latt = GridDefaultLatt(); - RealD volume = Ls; for(int mu=0;mu_Nprocessors; - RealD NN = _FourDimGrid->NodeCount(); - - std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; - - std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Number of DhopEO Calls : " - << DhopCalls << std::endl; - std::cout << GridLogMessage << "ImprovedStaggeredFermion5D TotalTime /Calls : " - << DhopTotalTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "ImprovedStaggeredFermion5D CommTime /Calls : " - << DhopCommTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "ImprovedStaggeredFermion5D ComputeTime/Calls : " - << DhopComputeTime / DhopCalls << " us" << std::endl; - - // Average the compute time - _FourDimGrid->GlobalSum(DhopComputeTime); - DhopComputeTime/=NP; - - RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl; - - RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl; - - std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Stencil" < -void ImprovedStaggeredFermion5D::ZeroCounters(void) -{ - DhopCalls = 0; - DhopTotalTime = 0; - DhopCommTime = 0; - DhopComputeTime = 0; - DhopFaceTime = 0; - - - Stencil.ZeroCounters(); - StencilEven.ZeroCounters(); - StencilOdd.ZeroCounters(); -} - ///////////////////////////////////////////////////////////////////////// // Implement the general interface. Here we use SAME mass on all slices ///////////////////////////////////////////////////////////////////////// diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h index 05d9a17e..4c80a1d5 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h @@ -334,7 +334,6 @@ void ImprovedStaggeredFermion::DhopDerivEO(GaugeField &mat, const FermionF template void ImprovedStaggeredFermion::Dhop(const FermionField &in, FermionField &out, int dag) { - DhopCalls+=2; conformable(in.Grid(), _grid); // verifies full grid conformable(in.Grid(), out.Grid()); @@ -346,7 +345,6 @@ void ImprovedStaggeredFermion::Dhop(const FermionField &in, FermionField & template void ImprovedStaggeredFermion::DhopOE(const FermionField &in, FermionField &out, int dag) { - DhopCalls+=1; conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check @@ -359,7 +357,6 @@ void ImprovedStaggeredFermion::DhopOE(const FermionField &in, FermionField template void ImprovedStaggeredFermion::DhopEO(const FermionField &in, FermionField &out, int dag) { - DhopCalls+=1; conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check @@ -418,47 +415,33 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st Compressor compressor; int len = U.Grid()->oSites(); - DhopTotalTime -= usecond(); - - DhopFaceTime -= usecond(); st.Prepare(); st.HaloGather(in,compressor); - DhopFaceTime += usecond(); - DhopCommTime -=usecond(); std::vector > requests; st.CommunicateBegin(requests); - DhopFaceTime-=usecond(); st.CommsMergeSHM(compressor); - DhopFaceTime+= usecond(); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Removed explicit thread comms ////////////////////////////////////////////////////////////////////////////////////////////////////// - DhopComputeTime -= usecond(); { int interior=1; int exterior=0; Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } - DhopComputeTime += usecond(); st.CommunicateComplete(requests); - DhopCommTime +=usecond(); // First to enter, last to leave timing - DhopFaceTime -= usecond(); st.CommsMerge(compressor); - DhopFaceTime -= usecond(); - DhopComputeTime2 -= usecond(); { int interior=0; int exterior=1; Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } - DhopComputeTime2 += usecond(); } @@ -471,78 +454,16 @@ void ImprovedStaggeredFermion::DhopInternalSerialComms(StencilImpl &st, Le { assert((dag == DaggerNo) || (dag == DaggerYes)); - DhopTotalTime -= usecond(); - - DhopCommTime -= usecond(); Compressor compressor; st.HaloExchange(in, compressor); - DhopCommTime += usecond(); - DhopComputeTime -= usecond(); { int interior=1; int exterior=1; Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } - DhopComputeTime += usecond(); - DhopTotalTime += usecond(); }; - //////////////////////////////////////////////////////////////// - // Reporting - //////////////////////////////////////////////////////////////// -template -void ImprovedStaggeredFermion::Report(void) -{ - Coordinate latt = _grid->GlobalDimensions(); - RealD volume = 1; for(int mu=0;mu_Nprocessors; - RealD NN = _grid->NodeCount(); - - std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; - - std::cout << GridLogMessage << "ImprovedStaggeredFermion Number of DhopEO Calls : " - << DhopCalls << std::endl; - std::cout << GridLogMessage << "ImprovedStaggeredFermion TotalTime /Calls : " - << DhopTotalTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "ImprovedStaggeredFermion CommTime /Calls : " - << DhopCommTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "ImprovedStaggeredFermion ComputeTime/Calls : " - << DhopComputeTime / DhopCalls << " us" << std::endl; - - // Average the compute time - _grid->GlobalSum(DhopComputeTime); - DhopComputeTime/=NP; - - RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl; - - RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl; - - std::cout << GridLogMessage << "ImprovedStaggeredFermion Stencil" < -void ImprovedStaggeredFermion::ZeroCounters(void) -{ - DhopCalls = 0; - DhopTotalTime = 0; - DhopCommTime = 0; - DhopComputeTime = 0; - DhopFaceTime = 0; - - Stencil.ZeroCounters(); - StencilEven.ZeroCounters(); - StencilOdd.ZeroCounters(); -} - - //////////////////////////////////////////////////////// // Conserved current - not yet implemented. //////////////////////////////////////////////////////// diff --git a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h index 41b9170d..617a18df 100644 --- a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h +++ b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h @@ -55,9 +55,6 @@ void MobiusEOFAFermion::M5D(const FermionField &psi_i, const FermionField auto plower = &lower[0]; // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); - int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss = sss*Ls; @@ -73,7 +70,6 @@ void MobiusEOFAFermion::M5D(const FermionField &psi_i, const FermionField } }); - this->M5Dtime += usecond(); } template @@ -99,9 +95,6 @@ void MobiusEOFAFermion::M5D_shift(const FermionField &psi_i, const Fermion auto pshift_coeffs = &shift_coeffs[0]; // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); - int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss = sss*Ls; @@ -122,7 +115,6 @@ void MobiusEOFAFermion::M5D_shift(const FermionField &psi_i, const Fermion } }); - this->M5Dtime += usecond(); } template @@ -143,9 +135,6 @@ void MobiusEOFAFermion::M5Ddag(const FermionField &psi_i, const FermionFie auto plower = &lower[0]; // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); - int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(), { uint64_t ss = sss*Ls; @@ -161,8 +150,6 @@ void MobiusEOFAFermion::M5Ddag(const FermionField &psi_i, const FermionFie coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2); } }); - - this->M5Dtime += usecond(); } template @@ -186,9 +173,6 @@ void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi_i, const Ferm auto pshift_coeffs = &shift_coeffs[0]; // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); - auto pm = this->pm; int nloop = grid->oSites()/Ls; @@ -217,7 +201,6 @@ void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi_i, const Ferm } }); - this->M5Dtime += usecond(); } template @@ -237,9 +220,6 @@ void MobiusEOFAFermion::MooeeInv(const FermionField &psi_i, FermionField & if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; } - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; @@ -277,7 +257,6 @@ void MobiusEOFAFermion::MooeeInv(const FermionField &psi_i, FermionField & } }); - this->MooeeInvTime += usecond(); } template @@ -297,8 +276,6 @@ void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi_i, FermionF auto pueem= & this->ueem[0]; auto pMooeeInv_shift_lc = &MooeeInv_shift_lc[0]; auto pMooeeInv_shift_norm = &MooeeInv_shift_norm[0]; - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ @@ -343,7 +320,6 @@ void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi_i, FermionF } }); - this->MooeeInvTime += usecond(); } template @@ -363,9 +339,6 @@ void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi_i, FermionFiel auto pleem= & this->leem[0]; auto pueem= & this->ueem[0]; - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; @@ -402,7 +375,6 @@ void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi_i, FermionFiel coalescedWrite(chi[ss+s],res); } }); - this->MooeeInvTime += usecond(); } template @@ -423,9 +395,6 @@ void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi_i, Fermi auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0]; auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0]; - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; @@ -469,7 +438,6 @@ void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi_i, Fermi } }); - this->MooeeInvTime += usecond(); } NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h index 788e02cf..bf23d99d 100644 --- a/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h @@ -263,7 +263,6 @@ void NaiveStaggeredFermion::DhopDerivEO(GaugeField &mat, const FermionFiel template void NaiveStaggeredFermion::Dhop(const FermionField &in, FermionField &out, int dag) { - DhopCalls+=2; conformable(in.Grid(), _grid); // verifies full grid conformable(in.Grid(), out.Grid()); @@ -275,7 +274,6 @@ void NaiveStaggeredFermion::Dhop(const FermionField &in, FermionField &out template void NaiveStaggeredFermion::DhopOE(const FermionField &in, FermionField &out, int dag) { - DhopCalls+=1; conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check @@ -288,7 +286,6 @@ void NaiveStaggeredFermion::DhopOE(const FermionField &in, FermionField &o template void NaiveStaggeredFermion::DhopEO(const FermionField &in, FermionField &out, int dag) { - DhopCalls+=1; conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check @@ -345,47 +342,33 @@ void NaiveStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st, L Compressor compressor; int len = U.Grid()->oSites(); - DhopTotalTime -= usecond(); - - DhopFaceTime -= usecond(); st.Prepare(); st.HaloGather(in,compressor); - DhopFaceTime += usecond(); - DhopCommTime -=usecond(); std::vector > requests; st.CommunicateBegin(requests); - DhopFaceTime-=usecond(); st.CommsMergeSHM(compressor); - DhopFaceTime+= usecond(); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Removed explicit thread comms ////////////////////////////////////////////////////////////////////////////////////////////////////// - DhopComputeTime -= usecond(); { int interior=1; int exterior=0; Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior); } - DhopComputeTime += usecond(); st.CommunicateComplete(requests); - DhopCommTime +=usecond(); // First to enter, last to leave timing - DhopFaceTime -= usecond(); st.CommsMerge(compressor); - DhopFaceTime -= usecond(); - DhopComputeTime2 -= usecond(); { int interior=0; int exterior=1; Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior); } - DhopComputeTime2 += usecond(); } template @@ -396,78 +379,16 @@ void NaiveStaggeredFermion::DhopInternalSerialComms(StencilImpl &st, Lebes { assert((dag == DaggerNo) || (dag == DaggerYes)); - DhopTotalTime -= usecond(); - - DhopCommTime -= usecond(); Compressor compressor; st.HaloExchange(in, compressor); - DhopCommTime += usecond(); - DhopComputeTime -= usecond(); { int interior=1; int exterior=1; Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior); } - DhopComputeTime += usecond(); - DhopTotalTime += usecond(); }; - //////////////////////////////////////////////////////////////// - // Reporting - //////////////////////////////////////////////////////////////// -template -void NaiveStaggeredFermion::Report(void) -{ - Coordinate latt = _grid->GlobalDimensions(); - RealD volume = 1; for(int mu=0;mu_Nprocessors; - RealD NN = _grid->NodeCount(); - - std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; - - std::cout << GridLogMessage << "NaiveStaggeredFermion Number of DhopEO Calls : " - << DhopCalls << std::endl; - std::cout << GridLogMessage << "NaiveStaggeredFermion TotalTime /Calls : " - << DhopTotalTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "NaiveStaggeredFermion CommTime /Calls : " - << DhopCommTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "NaiveStaggeredFermion ComputeTime/Calls : " - << DhopComputeTime / DhopCalls << " us" << std::endl; - - // Average the compute time - _grid->GlobalSum(DhopComputeTime); - DhopComputeTime/=NP; - - RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl; - - RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl; - - std::cout << GridLogMessage << "NaiveStaggeredFermion Stencil" < -void NaiveStaggeredFermion::ZeroCounters(void) -{ - DhopCalls = 0; - DhopTotalTime = 0; - DhopCommTime = 0; - DhopComputeTime = 0; - DhopFaceTime = 0; - - Stencil.ZeroCounters(); - StencilEven.ZeroCounters(); - StencilOdd.ZeroCounters(); -} - - //////////////////////////////////////////////////////// // Conserved current - not yet implemented. //////////////////////////////////////////////////////// diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h index 2cc308cc..95af4c38 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h @@ -60,8 +60,13 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, UmuOdd (_FourDimRedBlackGrid), Lebesgue(_FourDimGrid), LebesgueEvenOdd(_FourDimRedBlackGrid), - _tmp(&FiveDimRedBlackGrid) + _tmp(&FiveDimRedBlackGrid), + Dirichlet(0) { + Stencil.lo = &Lebesgue; + StencilEven.lo = &LebesgueEvenOdd; + StencilOdd.lo = &LebesgueEvenOdd; + // some assertions assert(FiveDimGrid._ndimension==5); assert(FourDimGrid._ndimension==4); @@ -91,6 +96,19 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]); } + if ( p.dirichlet.size() == Nd+1) { + Coordinate block = p.dirichlet; + if ( block[0] || block[1] || block[2] || block[3] || block[4] ){ + Dirichlet = 1; + std::cout << GridLogMessage << " WilsonFermion: non-trivial Dirichlet condition "<< block << std::endl; + std::cout << GridLogMessage << " WilsonFermion: partial Dirichlet "<< p.partialDirichlet << std::endl; + Block = block; + } + } else { + Coordinate block(Nd+1,0); + Block = block; + } + if (Impl::LsVectorised) { int nsimd = Simd::Nsimd(); @@ -125,99 +143,38 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, StencilEven.BuildSurfaceList(LLs,vol4); StencilOdd.BuildSurfaceList(LLs,vol4); - // std::cout << GridLogMessage << " SurfaceLists "<< Stencil.surface_list.size() - // <<" " << StencilEven.surface_list.size()< -void WilsonFermion5D::Report(void) -{ - RealD NP = _FourDimGrid->_Nprocessors; - RealD NN = _FourDimGrid->NodeCount(); - RealD volume = Ls; - Coordinate latt = _FourDimGrid->GlobalDimensions(); - for(int mu=0;mu 0 ) { - std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; - std::cout << GridLogMessage << "WilsonFermion5D Number of DhopEO Calls : " << DhopCalls << std::endl; - std::cout << GridLogMessage << "WilsonFermion5D TotalTime /Calls : " << DhopTotalTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "WilsonFermion5D CommTime /Calls : " << DhopCommTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "WilsonFermion5D FaceTime /Calls : " << DhopFaceTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "WilsonFermion5D ComputeTime1/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "WilsonFermion5D ComputeTime2/Calls : " << DhopComputeTime2/ DhopCalls << " us" << std::endl; - - // Average the compute time - _FourDimGrid->GlobalSum(DhopComputeTime); - DhopComputeTime/=NP; - RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl; - - RealD Fullmflops = 1344*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl; - - } - - if ( DerivCalls > 0 ) { - std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl; - std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " < 0 || DhopCalls > 0){ - std::cout << GridLogMessage << "WilsonFermion5D Stencil" < 0){ - std::cout << GridLogMessage << "WilsonFermion5D Stencil Reporti()" < -void WilsonFermion5D::ZeroCounters(void) { - DhopCalls = 0; - DhopCommTime = 0; - DhopComputeTime = 0; - DhopComputeTime2= 0; - DhopFaceTime = 0; - DhopTotalTime = 0; - - DerivCalls = 0; - DerivCommTime = 0; - DerivComputeTime = 0; - DerivDhopComputeTime = 0; - - Stencil.ZeroCounters(); - StencilEven.ZeroCounters(); - StencilOdd.ZeroCounters(); - Stencil.ZeroCountersi(); - StencilEven.ZeroCountersi(); - StencilOdd.ZeroCountersi(); -} - template void WilsonFermion5D::ImportGauge(const GaugeField &_Umu) { GaugeField HUmu(_Umu.Grid()); HUmu = _Umu*(-0.5); + if ( Dirichlet ) { + + if ( this->Params.partialDirichlet ) { + std::cout << GridLogMessage << " partialDirichlet BCs " <LocalDimensions()[d]; + if (GaugeBlock) assert( (GaugeBlock%ldim)==0); + } + + if (!this->Params.partialDirichlet) { + std::cout << GridLogMessage << " Dirichlet filtering gauge field BCs block " < Filter(GaugeBlock); + Filter.applyFilter(HUmu); + } else { + std::cout << GridLogMessage << " Dirichlet "<< Dirichlet << " NOT filtered gauge field" <::DerivInternal(StencilImpl & st, const FermionField &B, int dag) { - DerivCalls++; assert((dag==DaggerNo) ||(dag==DaggerYes)); conformable(st.Grid(),A.Grid()); @@ -270,15 +226,12 @@ void WilsonFermion5D::DerivInternal(StencilImpl & st, FermionField Btilde(B.Grid()); FermionField Atilde(B.Grid()); - DerivCommTime-=usecond(); st.HaloExchange(B,compressor); - DerivCommTime+=usecond(); Atilde=A; int LLs = B.Grid()->_rdimensions[0]; - DerivComputeTime-=usecond(); for (int mu = 0; mu < Nd; mu++) { //////////////////////////////////////////////////////////////////////// // Flip gamma if dag @@ -290,8 +243,6 @@ void WilsonFermion5D::DerivInternal(StencilImpl & st, // Call the single hop //////////////////////// - DerivDhopComputeTime -= usecond(); - int Usites = U.Grid()->oSites(); Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma); @@ -299,10 +250,8 @@ void WilsonFermion5D::DerivInternal(StencilImpl & st, //////////////////////////// // spin trace outer product //////////////////////////// - DerivDhopComputeTime += usecond(); Impl::InsertForce5D(mat, Btilde, Atilde, mu); } - DerivComputeTime += usecond(); } template @@ -360,12 +309,10 @@ void WilsonFermion5D::DhopInternal(StencilImpl & st, LebesgueOrder &lo, DoubledGaugeField & U, const FermionField &in, FermionField &out,int dag) { - DhopTotalTime-=usecond(); if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) DhopInternalOverlappedComms(st,lo,U,in,out,dag); else DhopInternalSerialComms(st,lo,U,in,out,dag); - DhopTotalTime+=usecond(); } @@ -374,6 +321,7 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg DoubledGaugeField & U, const FermionField &in, FermionField &out,int dag) { + GRID_TRACE("DhopInternalOverlappedComms"); Compressor compressor(dag); int LLs = in.Grid()->_rdimensions[0]; @@ -382,53 +330,57 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg ///////////////////////////// // Start comms // Gather intranode and extra node differentiated?? ///////////////////////////// - DhopFaceTime-=usecond(); - st.HaloExchangeOptGather(in,compressor); - DhopFaceTime+=usecond(); - - DhopCommTime -=usecond(); + { + GRID_TRACE("Gather"); + st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine + } + std::vector > requests; + auto id=traceStart("Communicate overlapped"); st.CommunicateBegin(requests); ///////////////////////////// // Overlap with comms ///////////////////////////// - DhopFaceTime-=usecond(); - st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms - DhopFaceTime+=usecond(); + { + GRID_TRACE("MergeSHM"); + st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms + } ///////////////////////////// // do the compute interior ///////////////////////////// int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know - DhopComputeTime-=usecond(); if (dag == DaggerYes) { + GRID_TRACE("DhopDagInterior"); Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); } else { + GRID_TRACE("DhopInterior"); Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); } - DhopComputeTime+=usecond(); ///////////////////////////// // Complete comms ///////////////////////////// st.CommunicateComplete(requests); - DhopCommTime +=usecond(); + traceStop(id); ///////////////////////////// // do the compute exterior ///////////////////////////// - DhopFaceTime-=usecond(); - st.CommsMerge(compressor); - DhopFaceTime+=usecond(); + { + GRID_TRACE("Merge"); + st.CommsMerge(compressor); + } + - DhopComputeTime2-=usecond(); if (dag == DaggerYes) { + GRID_TRACE("DhopDagExterior"); Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1); } else { + GRID_TRACE("DhopExterior"); Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1); } - DhopComputeTime2+=usecond(); } @@ -438,29 +390,30 @@ void WilsonFermion5D::DhopInternalSerialComms(StencilImpl & st, LebesgueOr const FermionField &in, FermionField &out,int dag) { + GRID_TRACE("DhopInternalSerialComms"); Compressor compressor(dag); int LLs = in.Grid()->_rdimensions[0]; + + { + GRID_TRACE("HaloExchange"); + st.HaloExchangeOpt(in,compressor); + } - DhopCommTime-=usecond(); - st.HaloExchangeOpt(in,compressor); - DhopCommTime+=usecond(); - - DhopComputeTime-=usecond(); int Opt = WilsonKernelsStatic::Opt; if (dag == DaggerYes) { + GRID_TRACE("DhopDag"); Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out); } else { + GRID_TRACE("Dhop"); Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out); } - DhopComputeTime+=usecond(); } template void WilsonFermion5D::DhopOE(const FermionField &in, FermionField &out,int dag) { - DhopCalls++; conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid conformable(in.Grid(),out.Grid()); // drops the cb check @@ -472,7 +425,6 @@ void WilsonFermion5D::DhopOE(const FermionField &in, FermionField &out,int template void WilsonFermion5D::DhopEO(const FermionField &in, FermionField &out,int dag) { - DhopCalls++; conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid conformable(in.Grid(),out.Grid()); // drops the cb check @@ -484,7 +436,6 @@ void WilsonFermion5D::DhopEO(const FermionField &in, FermionField &out,int template void WilsonFermion5D::Dhop(const FermionField &in, FermionField &out,int dag) { - DhopCalls+=2; conformable(in.Grid(),FermionGrid()); // verifies full grid conformable(in.Grid(),out.Grid()); @@ -539,12 +490,17 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const LatComplex sk(_grid); sk = Zero(); LatComplex sk2(_grid); sk2= Zero(); LatComplex W(_grid); W= Zero(); - LatComplex a(_grid); a= Zero(); LatComplex one (_grid); one = ScalComplex(1.0,0.0); LatComplex cosha(_grid); LatComplex kmu(_grid); LatComplex Wea(_grid); LatComplex Wema(_grid); + LatComplex ea(_grid); + LatComplex ema(_grid); + LatComplex eaLs(_grid); + LatComplex emaLs(_grid); + LatComplex ea2Ls(_grid); + LatComplex ema2Ls(_grid); LatComplex sinha(_grid); LatComplex sinhaLs(_grid); LatComplex coshaLs(_grid); @@ -579,39 +535,29 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const //////////////////////////////////////////// cosha = (one + W*W + sk) / (abs(W)*2.0); - // FIXME Need a Lattice acosh - - { - autoView(cosha_v,cosha,CpuRead); - autoView(a_v,a,CpuWrite); - for(int idx=0;idx<_grid->lSites();idx++){ - Coordinate lcoor(Nd); - Tcomplex cc; - // RealD sgn; - _grid->LocalIndexToLocalCoor(idx,lcoor); - peekLocalSite(cc,cosha_v,lcoor); - assert((double)real(cc)>=1.0); - assert(fabs((double)imag(cc))<=1.0e-15); - cc = ScalComplex(::acosh(real(cc)),0.0); - pokeLocalSite(cc,a_v,lcoor); - } - } - - Wea = ( exp( a) * abs(W) ); - Wema= ( exp(-a) * abs(W) ); - sinha = 0.5*(exp( a) - exp(-a)); - sinhaLs = 0.5*(exp( a*Ls) - exp(-a*Ls)); - coshaLs = 0.5*(exp( a*Ls) + exp(-a*Ls)); + ea = (cosha + sqrt(cosha*cosha-one)); + ema= (cosha - sqrt(cosha*cosha-one)); + eaLs = pow(ea,Ls); + emaLs= pow(ema,Ls); + ea2Ls = pow(ea,2.0*Ls); + ema2Ls= pow(ema,2.0*Ls); + Wea= abs(W) * ea; + Wema= abs(W) * ema; + // a=log(ea); + + sinha = 0.5*(ea - ema); + sinhaLs = 0.5*(eaLs-emaLs); + coshaLs = 0.5*(eaLs+emaLs); A = one / (abs(W) * sinha * 2.0) * one / (sinhaLs * 2.0); - F = exp( a*Ls) * (one - Wea + (Wema - one) * mass*mass); - F = F + exp(-a*Ls) * (Wema - one + (one - Wea) * mass*mass); + F = eaLs * (one - Wea + (Wema - one) * mass*mass); + F = F + emaLs * (Wema - one + (one - Wea) * mass*mass); F = F - abs(W) * sinha * 4.0 * mass; - Bpp = (A/F) * (exp(-a*Ls*2.0) - one) * (one - Wema) * (one - mass*mass * one); - Bmm = (A/F) * (one - exp(a*Ls*2.0)) * (one - Wea) * (one - mass*mass * one); - App = (A/F) * (exp(-a*Ls*2.0) - one) * exp(-a) * (exp(-a) - abs(W)) * (one - mass*mass * one); - Amm = (A/F) * (one - exp(a*Ls*2.0)) * exp(a) * (exp(a) - abs(W)) * (one - mass*mass * one); + Bpp = (A/F) * (ema2Ls - one) * (one - Wema) * (one - mass*mass * one); + Bmm = (A/F) * (one - ea2Ls) * (one - Wea) * (one - mass*mass * one); + App = (A/F) * (ema2Ls - one) * ema * (ema - abs(W)) * (one - mass*mass * one); + Amm = (A/F) * (one - ea2Ls) * ea * (ea - abs(W)) * (one - mass*mass * one); ABpm = (A/F) * abs(W) * sinha * 2.0 * (one + mass * coshaLs * 2.0 + mass*mass * one); //P+ source, P- source @@ -634,29 +580,29 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const buf1_4d = Zero(); ExtractSlice(buf1_4d, PRsource, (tt-1), 0); //G(s,t) - bufR_4d = bufR_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf1_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf1_4d; + bufR_4d = bufR_4d + A * eaLs * pow(ema,f) * signW * buf1_4d + A * emaLs * pow(ea,f) * signW * buf1_4d; //A++*exp(a(s+t)) - bufR_4d = bufR_4d + App * exp(a*ss) * exp(a*tt) * signW * buf1_4d ; + bufR_4d = bufR_4d + App * pow(ea,ss) * pow(ea,tt) * signW * buf1_4d ; //A+-*exp(a(s-t)) - bufR_4d = bufR_4d + ABpm * exp(a*ss) * exp(-a*tt) * signW * buf1_4d ; + bufR_4d = bufR_4d + ABpm * pow(ea,ss) * pow(ema,tt) * signW * buf1_4d ; //A-+*exp(a(-s+t)) - bufR_4d = bufR_4d + ABpm * exp(-a*ss) * exp(a*tt) * signW * buf1_4d ; + bufR_4d = bufR_4d + ABpm * pow(ema,ss) * pow(ea,tt) * signW * buf1_4d ; //A--*exp(a(-s-t)) - bufR_4d = bufR_4d + Amm * exp(-a*ss) * exp(-a*tt) * signW * buf1_4d ; + bufR_4d = bufR_4d + Amm * pow(ema,ss) * pow(ema,tt) * signW * buf1_4d ; //GL buf2_4d = Zero(); ExtractSlice(buf2_4d, PLsource, (tt-1), 0); //G(s,t) - bufL_4d = bufL_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf2_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf2_4d; + bufL_4d = bufL_4d + A * eaLs * pow(ema,f) * signW * buf2_4d + A * emaLs * pow(ea,f) * signW * buf2_4d; //B++*exp(a(s+t)) - bufL_4d = bufL_4d + Bpp * exp(a*ss) * exp(a*tt) * signW * buf2_4d ; + bufL_4d = bufL_4d + Bpp * pow(ea,ss) * pow(ea,tt) * signW * buf2_4d ; //B+-*exp(a(s-t)) - bufL_4d = bufL_4d + ABpm * exp(a*ss) * exp(-a*tt) * signW * buf2_4d ; + bufL_4d = bufL_4d + ABpm * pow(ea,ss) * pow(ema,tt) * signW * buf2_4d ; //B-+*exp(a(-s+t)) - bufL_4d = bufL_4d + ABpm * exp(-a*ss) * exp(a*tt) * signW * buf2_4d ; + bufL_4d = bufL_4d + ABpm * pow(ema,ss) * pow(ea,tt) * signW * buf2_4d ; //B--*exp(a(-s-t)) - bufL_4d = bufL_4d + Bmm * exp(-a*ss) * exp(-a*tt) * signW * buf2_4d ; + bufL_4d = bufL_4d + Bmm * pow(ema,ss) * pow(ema,tt) * signW * buf2_4d ; } InsertSlice(bufR_4d, GR, (ss-1), 0); InsertSlice(bufL_4d, GL, (ss-1), 0); @@ -775,28 +721,12 @@ void WilsonFermion5D::MomentumSpacePropagatorHt(FermionField &out,const Fe W = one - M5 + sk2; //////////////////////////////////////////// - // Cosh alpha -> alpha + // Cosh alpha -> exp(+/- alpha) //////////////////////////////////////////// cosha = (one + W*W + sk) / (abs(W)*2.0); - // FIXME Need a Lattice acosh - { - autoView(cosha_v,cosha,CpuRead); - autoView(a_v,a,CpuWrite); - for(int idx=0;idx<_grid->lSites();idx++){ - Coordinate lcoor(Nd); - Tcomplex cc; - // RealD sgn; - _grid->LocalIndexToLocalCoor(idx,lcoor); - peekLocalSite(cc,cosha_v,lcoor); - assert((double)real(cc)>=1.0); - assert(fabs((double)imag(cc))<=1.0e-15); - cc = ScalComplex(::acosh(real(cc)),0.0); - pokeLocalSite(cc,a_v,lcoor); - }} - - Wea = ( exp( a) * abs(W) ); - Wema= ( exp(-a) * abs(W) ); + Wea = abs(W)*(cosha + sqrt(cosha*cosha-one)); + Wema= abs(W)*(cosha - sqrt(cosha*cosha-one)); num = num + ( one - Wema ) * mass * in; denom= ( Wea - one ) + mass*mass * (one - Wema); diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index c958019d..1a262533 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -60,6 +60,9 @@ WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, _tmp(&Hgrid), anisotropyCoeff(anis) { + Stencil.lo = &Lebesgue; + StencilEven.lo = &LebesgueEvenOdd; + StencilOdd.lo = &LebesgueEvenOdd; // Allocate the required comms buffer ImportGauge(_Umu); if (anisotropyCoeff.isAnisotropic){ @@ -76,91 +79,6 @@ WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, StencilOdd.BuildSurfaceList(1,vol4); } -template -void WilsonFermion::Report(void) -{ - RealD NP = _grid->_Nprocessors; - RealD NN = _grid->NodeCount(); - RealD volume = 1; - Coordinate latt = _grid->GlobalDimensions(); - for(int mu=0;mu 0 ) { - std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; - std::cout << GridLogMessage << "WilsonFermion Number of DhopEO Calls : " << DhopCalls << std::endl; - std::cout << GridLogMessage << "WilsonFermion TotalTime /Calls : " << DhopTotalTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "WilsonFermion CommTime /Calls : " << DhopCommTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "WilsonFermion FaceTime /Calls : " << DhopFaceTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "WilsonFermion ComputeTime1/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "WilsonFermion ComputeTime2/Calls : " << DhopComputeTime2/ DhopCalls << " us" << std::endl; - - // Average the compute time - _grid->GlobalSum(DhopComputeTime); - DhopComputeTime/=NP; - RealD mflops = 1320*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl; - - RealD Fullmflops = 1320*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting - std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl; - std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl; - - } - - if ( DerivCalls > 0 ) { - std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl; - std::cout << GridLogMessage << "WilsonFermion Number of Deriv Calls : " < 0 || DhopCalls > 0){ - std::cout << GridLogMessage << "WilsonFermion Stencil" < 0){ - std::cout << GridLogMessage << "WilsonFermion Stencil Reporti()" < -void WilsonFermion::ZeroCounters(void) { - DhopCalls = 0; // ok - DhopCommTime = 0; - DhopComputeTime = 0; - DhopComputeTime2= 0; - DhopFaceTime = 0; - DhopTotalTime = 0; - - DerivCalls = 0; // ok - DerivCommTime = 0; - DerivComputeTime = 0; - DerivDhopComputeTime = 0; - - Stencil.ZeroCounters(); - StencilEven.ZeroCounters(); - StencilOdd.ZeroCounters(); - Stencil.ZeroCountersi(); - StencilEven.ZeroCountersi(); - StencilOdd.ZeroCountersi(); -} - - template void WilsonFermion::ImportGauge(const GaugeField &_Umu) { @@ -320,7 +238,6 @@ template void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, GaugeField &mat, const FermionField &A, const FermionField &B, int dag) { - DerivCalls++; assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor(dag); @@ -329,11 +246,8 @@ void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, FermionField Atilde(B.Grid()); Atilde = A; - DerivCommTime-=usecond(); st.HaloExchange(B, compressor); - DerivCommTime+=usecond(); - DerivComputeTime-=usecond(); for (int mu = 0; mu < Nd; mu++) { //////////////////////////////////////////////////////////////////////// // Flip gamma (1+g)<->(1-g) if dag @@ -341,7 +255,6 @@ void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, int gamma = mu; if (!dag) gamma += Nd; - DerivDhopComputeTime -= usecond(); int Ls=1; Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma); @@ -349,9 +262,7 @@ void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, // spin trace outer product ////////////////////////////////////////////////// Impl::InsertForce4D(mat, Btilde, Atilde, mu); - DerivDhopComputeTime += usecond(); } - DerivComputeTime += usecond(); } template @@ -398,7 +309,6 @@ void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, co template void WilsonFermion::Dhop(const FermionField &in, FermionField &out, int dag) { - DhopCalls+=2; conformable(in.Grid(), _grid); // verifies full grid conformable(in.Grid(), out.Grid()); @@ -410,7 +320,6 @@ void WilsonFermion::Dhop(const FermionField &in, FermionField &out, int da template void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, int dag) { - DhopCalls++; conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check @@ -423,7 +332,6 @@ void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, int template void WilsonFermion::DhopEO(const FermionField &in, FermionField &out,int dag) { - DhopCalls++; conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check @@ -488,14 +396,12 @@ void WilsonFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, const FermionField &in, FermionField &out, int dag) { - DhopTotalTime-=usecond(); #ifdef GRID_OMP if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) DhopInternalOverlappedComms(st,lo,U,in,out,dag); else #endif DhopInternalSerial(st,lo,U,in,out,dag); - DhopTotalTime+=usecond(); } template @@ -504,6 +410,7 @@ void WilsonFermion::DhopInternalOverlappedComms(StencilImpl &st, LebesgueO const FermionField &in, FermionField &out, int dag) { + GRID_TRACE("DhopOverlapped"); assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor(dag); @@ -514,53 +421,55 @@ void WilsonFermion::DhopInternalOverlappedComms(StencilImpl &st, LebesgueO ///////////////////////////// std::vector > requests; st.Prepare(); - DhopFaceTime-=usecond(); - st.HaloGather(in,compressor); - DhopFaceTime+=usecond(); + { + GRID_TRACE("Gather"); + st.HaloGather(in,compressor); + } - DhopCommTime -=usecond(); + tracePush("Communication"); st.CommunicateBegin(requests); ///////////////////////////// // Overlap with comms ///////////////////////////// - DhopFaceTime-=usecond(); - st.CommsMergeSHM(compressor); - DhopFaceTime+=usecond(); + { + GRID_TRACE("MergeSHM"); + st.CommsMergeSHM(compressor); + } ///////////////////////////// // do the compute interior ///////////////////////////// int Opt = WilsonKernelsStatic::Opt; - DhopComputeTime-=usecond(); if (dag == DaggerYes) { + GRID_TRACE("DhopDagInterior"); Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); } else { + GRID_TRACE("DhopInterior"); Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); } - DhopComputeTime+=usecond(); ///////////////////////////// // Complete comms ///////////////////////////// st.CommunicateComplete(requests); - DhopCommTime +=usecond(); - - DhopFaceTime-=usecond(); - st.CommsMerge(compressor); - DhopFaceTime+=usecond(); + tracePop("Communication"); + { + GRID_TRACE("Merge"); + st.CommsMerge(compressor); + } ///////////////////////////// // do the compute exterior ///////////////////////////// - DhopComputeTime2-=usecond(); if (dag == DaggerYes) { + GRID_TRACE("DhopDagExterior"); Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1); } else { + GRID_TRACE("DhopExterior"); Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1); } - DhopComputeTime2+=usecond(); }; @@ -570,20 +479,22 @@ void WilsonFermion::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, const FermionField &in, FermionField &out, int dag) { + GRID_TRACE("DhopSerial"); assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor(dag); - DhopCommTime-=usecond(); - st.HaloExchange(in, compressor); - DhopCommTime+=usecond(); + { + GRID_TRACE("HaloExchange"); + st.HaloExchange(in, compressor); + } - DhopComputeTime-=usecond(); int Opt = WilsonKernelsStatic::Opt; if (dag == DaggerYes) { + GRID_TRACE("DhopDag"); Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out); } else { + GRID_TRACE("Dhop"); Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out); } - DhopComputeTime+=usecond(); }; /*Change ends */ diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index 9f6960af..e9a3a500 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -72,20 +72,15 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) if (SE->_is_local) { \ int perm= SE->_permute; \ auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \ - spProj(chi,tmp); \ - } else if ( st.same_node[Dir] ) { \ - chi = coalescedRead(buf[SE->_offset],lane); \ - } \ - acceleratorSynchronise(); \ - if (SE->_is_local || st.same_node[Dir] ) { \ - Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ - Recon(result, Uchi); \ - } \ + spProj(chi,tmp); \ + Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ + Recon(result, Uchi); \ + } \ acceleratorSynchronise(); #define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \ SE = st.GetEntry(ptype, Dir, sF); \ - if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \ + if (!SE->_is_local ) { \ auto chi = coalescedRead(buf[SE->_offset],lane); \ Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ Recon(result, Uchi); \ @@ -416,19 +411,6 @@ void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S #undef LoopBody } -#define KERNEL_CALL_TMP(A) \ - const uint64_t NN = Nsite*Ls; \ - auto U_p = & U_v[0]; \ - auto in_p = & in_v[0]; \ - auto out_p = & out_v[0]; \ - auto st_p = st_v._entries_p; \ - auto st_perm = st_v._permute_type; \ - accelerator_forNB( ss, NN, Simd::Nsimd(), { \ - int sF = ss; \ - int sU = ss/Ls; \ - WilsonKernels::A(st_perm,st_p,U_p,buf,sF,sU,in_p,out_p); \ - }); \ - accelerator_barrier(); #define KERNEL_CALLNB(A) \ const uint64_t NN = Nsite*Ls; \ @@ -440,12 +422,35 @@ void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S #define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier(); +#define KERNEL_CALL_EXT(A) \ + const uint64_t NN = Nsite*Ls; \ + const uint64_t sz = st.surface_list.size(); \ + auto ptr = &st.surface_list[0]; \ + accelerator_forNB( ss, sz, Simd::Nsimd(), { \ + int sF = ptr[ss]; \ + int sU = sF/Ls; \ + WilsonKernels::A(st_v,U_v,buf,sF,sU,in_v,out_v); \ + }); \ + accelerator_barrier(); + #define ASM_CALL(A) \ - thread_for( ss, Nsite, { \ + thread_for( sss, Nsite, { \ + int ss = st.lo->Reorder(sss); \ int sU = ss; \ int sF = ss*Ls; \ WilsonKernels::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \ }); +#define ASM_CALL_SLICE(A) \ + auto grid = in.Grid() ; \ + int nt = grid->LocalDimensions()[4]; \ + int nxyz = Nsite/nt ; \ + for(int t=0;t::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \ + });} template void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, @@ -459,11 +464,7 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField if( interior && exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;} -#ifdef SYCL_HACK - if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_TMP(HandDhopSiteSycl); return; } -#else if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;} -#endif #ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); return;} #endif @@ -474,8 +475,10 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;} #endif } else if( exterior ) { - if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;} - if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;} + // dependent on result of merge + acceleratorFenceComputeStream(); + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteExt); return;} + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteExt); return;} #ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;} #endif @@ -498,21 +501,20 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField #ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;} #endif - acceleratorFenceComputeStream(); } else if( interior ) { - if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;} - if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;} + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteDagInt); return;} + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteDagInt); return;} #ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;} #endif } else if( exterior ) { + // Dependent on result of merge acceleratorFenceComputeStream(); - if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;} - if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;} + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;} + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteDagExt); return;} #ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;} #endif - acceleratorFenceComputeStream(); } assert(0 && " Kernel optimisation case not covered "); } diff --git a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh index d6845e75..4ccc01e8 100755 --- a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh +++ b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh @@ -9,6 +9,7 @@ STAG5_IMPL_LIST="" WILSON_IMPL_LIST=" \ WilsonImplF \ WilsonImplD \ + WilsonImplD2 \ WilsonAdjImplF \ WilsonAdjImplD \ WilsonTwoIndexSymmetricImplF \ @@ -25,8 +26,9 @@ COMPACT_WILSON_IMPL_LIST=" \ DWF_IMPL_LIST=" \ WilsonImplF \ WilsonImplD \ + WilsonImplD2 \ ZWilsonImplF \ - ZWilsonImplD " + ZWilsonImplD2 " GDWF_IMPL_LIST=" \ GparityWilsonImplF \ diff --git a/Grid/qcd/action/filters/DDHMCFilter.h b/Grid/qcd/action/filters/DDHMCFilter.h new file mode 100644 index 00000000..f2ea358e --- /dev/null +++ b/Grid/qcd/action/filters/DDHMCFilter.h @@ -0,0 +1,115 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/hmc/integrators/DirichletFilter.h + +Copyright (C) 2015 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +//-------------------------------------------------------------------- +#pragma once + +NAMESPACE_BEGIN(Grid); +//////////////////////////////////////////////////// +// DDHMC filter with sub-block size B[mu] +//////////////////////////////////////////////////// + +template +struct DDHMCFilter: public MomentumFilterBase +{ + Coordinate Block; + int Width; + + DDHMCFilter(const Coordinate &_Block,int _Width=2): Block(_Block) { Width=_Width; } + + void applyFilter(GaugeField &U) const override + { + GridBase *grid = U.Grid(); + Coordinate Global=grid->GlobalDimensions(); + GaugeField zzz(grid); zzz = Zero(); + LatticeInteger coor(grid); + + auto zzz_mu = PeekIndex(zzz,0); + //////////////////////////////////////////////////// + // Zero BDY layers + //////////////////////////////////////////////////// + std::cout<(U,mu); + U_mu = where(mod(coor,B1)==Integer(B1-2),zzz_mu,U_mu); + PokeIndex(U, U_mu, mu); + } + if ( Width==2) { + U = where(mod(coor,B1)==Integer(B1-2),zzz,U); + U = where(mod(coor,B1)==Integer(B1-1),zzz,U); + U = where(mod(coor,B1)==Integer(0) ,zzz,U); + U = where(mod(coor,B1)==Integer(1) ,zzz,U); + auto U_mu = PeekIndex(U,mu); + U_mu = where(mod(coor,B1)==Integer(B1-3),zzz_mu,U_mu); + PokeIndex(U, U_mu, mu); + } + if ( Width==3) { + U = where(mod(coor,B1)==Integer(B1-3),zzz,U); + U = where(mod(coor,B1)==Integer(B1-2),zzz,U); + U = where(mod(coor,B1)==Integer(B1-1),zzz,U); + U = where(mod(coor,B1)==Integer(0) ,zzz,U); + U = where(mod(coor,B1)==Integer(1) ,zzz,U); + U = where(mod(coor,B1)==Integer(2) ,zzz,U); + auto U_mu = PeekIndex(U,mu); + U_mu = where(mod(coor,B1)==Integer(B1-4),zzz_mu,U_mu); + PokeIndex(U, U_mu, mu); + } + if ( Width==4) { + U = where(mod(coor,B1)==Integer(B1-4),zzz,U); + U = where(mod(coor,B1)==Integer(B1-3),zzz,U); + U = where(mod(coor,B1)==Integer(B1-2),zzz,U); + U = where(mod(coor,B1)==Integer(B1-1),zzz,U); + U = where(mod(coor,B1)==Integer(0) ,zzz,U); + U = where(mod(coor,B1)==Integer(1) ,zzz,U); + U = where(mod(coor,B1)==Integer(2) ,zzz,U); + U = where(mod(coor,B1)==Integer(3) ,zzz,U); + auto U_mu = PeekIndex(U,mu); + U_mu = where(mod(coor,B1)==Integer(B1-5),zzz_mu,U_mu); + PokeIndex(U, U_mu, mu); + } + } + + } + + } +}; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/filters/DirichletFilter.h b/Grid/qcd/action/filters/DirichletFilter.h new file mode 100644 index 00000000..e388891f --- /dev/null +++ b/Grid/qcd/action/filters/DirichletFilter.h @@ -0,0 +1,71 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/hmc/integrators/DirichletFilter.h + +Copyright (C) 2015 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +//-------------------------------------------------------------------- +#pragma once + +NAMESPACE_BEGIN(Grid); + +template +struct DirichletFilter: public MomentumFilterBase +{ + typedef typename MomentaField::vector_type vector_type; //SIMD-vectorized complex type + typedef typename MomentaField::scalar_type scalar_type; //scalar complex type + + typedef iScalar > > ScalarType; //complex phase for each site + + Coordinate Block; + + DirichletFilter(const Coordinate &_Block): Block(_Block){} + + void applyFilter(MomentaField &P) const override + { + GridBase *grid = P.Grid(); + typedef decltype(PeekIndex(P, 0)) LatCM; + //////////////////////////////////////////////////// + // Zero strictly links crossing between domains + //////////////////////////////////////////////////// + LatticeInteger coor(grid); + LatCM zz(grid); zz = Zero(); + for(int mu=0;muGlobalDimensions()[mu] ) ) { + // If costly could provide Grid earlier and precompute masks + std::cout << GridLogMessage << " Dirichlet in mu="<(P, mu); + P_mu = where(mod(coor,Block[mu])==Integer(Block[mu]-1),zz,P_mu); + PokeIndex(P, P_mu, mu); + } + } + } +}; + + + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/hmc/integrators/MomentumFilter.h b/Grid/qcd/action/filters/MomentumFilter.h similarity index 97% rename from Grid/qcd/hmc/integrators/MomentumFilter.h rename to Grid/qcd/action/filters/MomentumFilter.h index 2a15d80c..275f2c9c 100644 --- a/Grid/qcd/hmc/integrators/MomentumFilter.h +++ b/Grid/qcd/action/filters/MomentumFilter.h @@ -37,7 +37,8 @@ NAMESPACE_BEGIN(Grid); template struct MomentumFilterBase{ - virtual void applyFilter(MomentaField &P) const; + virtual void applyFilter(MomentaField &P) const = 0; + virtual ~MomentumFilterBase(){}; }; //Do nothing @@ -83,7 +84,6 @@ struct MomentumFilterApplyPhase: public MomentumFilterBase{ } - }; diff --git a/Grid/qcd/action/gauge/GaugeImplementations.h b/Grid/qcd/action/gauge/GaugeImplementations.h index 16147c77..f518b236 100644 --- a/Grid/qcd/action/gauge/GaugeImplementations.h +++ b/Grid/qcd/action/gauge/GaugeImplementations.h @@ -69,6 +69,11 @@ public: return PeriodicBC::ShiftStaple(Link,mu); } + //Same as Cshift for periodic BCs + static inline GaugeLinkField CshiftLink(const GaugeLinkField &Link, int mu, int shift){ + return PeriodicBC::CshiftLink(Link,mu,shift); + } + static inline bool isPeriodicGaugeField(void) { return true; } }; @@ -110,6 +115,11 @@ public: return PeriodicBC::CovShiftBackward(Link, mu, field); } + //If mu is a conjugate BC direction + //Out(x) = U^dag_\mu(x-mu) | x_\mu != 0 + // = U^T_\mu(L-1) | x_\mu == 0 + //else + //Out(x) = U^dag_\mu(x-mu mod L) static inline GaugeLinkField CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) { @@ -129,6 +139,13 @@ public: return PeriodicBC::CovShiftIdentityForward(Link,mu); } + + //If mu is a conjugate BC direction + //Out(x) = S_\mu(x+mu) | x_\mu != L-1 + // = S*_\mu(x+mu) | x_\mu == L-1 + //else + //Out(x) = S_\mu(x+mu mod L) + //Note: While this is used for Staples it is also applicable for shifting gauge links or gauge transformation matrices static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) { assert(_conjDirs.size() == Nd); @@ -138,6 +155,27 @@ public: return PeriodicBC::ShiftStaple(Link,mu); } + //Boundary-aware C-shift of gauge links / gauge transformation matrices + //For conjugate BC direction + //shift = 1 + //Out(x) = U_\mu(x+\hat\mu) | x_\mu != L-1 + // = U*_\mu(0) | x_\mu == L-1 + //shift = -1 + //Out(x) = U_\mu(x-mu) | x_\mu != 0 + // = U*_\mu(L-1) | x_\mu == 0 + //else + //shift = 1 + //Out(x) = U_\mu(x+\hat\mu mod L) + //shift = -1 + //Out(x) = U_\mu(x-\hat\mu mod L) + static inline GaugeLinkField CshiftLink(const GaugeLinkField &Link, int mu, int shift){ + assert(_conjDirs.size() == Nd); + if(_conjDirs[mu]) + return ConjugateBC::CshiftLink(Link,mu,shift); + else + return PeriodicBC::CshiftLink(Link,mu,shift); + } + static inline void setDirections(std::vector &conjDirs) { _conjDirs=conjDirs; } static inline std::vector getDirections(void) { return _conjDirs; } static inline bool isPeriodicGaugeField(void) { return false; } diff --git a/Grid/qcd/action/pseudofermion/Bounds.h b/Grid/qcd/action/pseudofermion/Bounds.h index 535e1a49..8864b1d7 100644 --- a/Grid/qcd/action/pseudofermion/Bounds.h +++ b/Grid/qcd/action/pseudofermion/Bounds.h @@ -13,6 +13,31 @@ NAMESPACE_BEGIN(Grid); std::cout << GridLogMessage << "Pseudofermion action lamda_max "< void ChebyBoundsCheck(LinearOperatorBase &HermOp, + Field &GaussNoise, + RealD lo,RealD hi) + { + int orderfilter = 1000; + Chebyshev Cheb(lo,hi,orderfilter); + + GridBase *FermionGrid = GaussNoise.Grid(); + + Field X(FermionGrid); + Field Z(FermionGrid); + + X=GaussNoise; + RealD Nx = norm2(X); + Cheb(HermOp,X,Z); + RealD Nz = norm2(Z); + + std::cout << "************************* "< void InverseSqrtBoundsCheck(int MaxIter,double tol, LinearOperatorBase &HermOp, @@ -40,13 +65,65 @@ NAMESPACE_BEGIN(Grid); X=X-Y; RealD Nd = norm2(X); std::cout << "************************* "< void InversePowerBoundsCheck(int inv_pow, + int MaxIter,double tol, + LinearOperatorBase &HermOp, + Field &GaussNoise, + MultiShiftFunction &ApproxNegPow) + { + GridBase *FermionGrid = GaussNoise.Grid(); + + Field X(FermionGrid); + Field Y(FermionGrid); + Field Z(FermionGrid); + + Field tmp1(FermionGrid), tmp2(FermionGrid); + + X=GaussNoise; + RealD Nx = norm2(X); + + ConjugateGradientMultiShift msCG(MaxIter,ApproxNegPow); + + tmp1 = X; + + Field* in = &tmp1; + Field* out = &tmp2; + for(int i=0;i + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +/////////////////////////////////////// +// Two flavour ratio +/////////////////////////////////////// +template +class DomainDecomposedBoundaryTwoFlavourBosonPseudoFermion : public Action { +public: + INHERIT_IMPL_TYPES(ImplD); + +private: + SchurFactoredFermionOperator & NumOp;// the basic operator + RealD InnerStoppingCondition; + RealD ActionStoppingCondition; + RealD DerivativeStoppingCondition; + FermionField Phi; // the pseudo fermion field for this trajectory +public: + DomainDecomposedBoundaryTwoFlavourBosonPseudoFermion(SchurFactoredFermionOperator &_NumOp,RealD _DerivativeTol, RealD _ActionTol, RealD _InnerTol=1.0e-6) + : NumOp(_NumOp), + DerivativeStoppingCondition(_DerivativeTol), + ActionStoppingCondition(_ActionTol), + InnerStoppingCondition(_InnerTol), + Phi(_NumOp.FermionGrid()) {}; + + virtual std::string action_name(){return "DomainDecomposedBoundaryTwoFlavourBosonPseudoFermion";} + + virtual std::string LogParameters(){ + std::stringstream sstream; + return sstream.str(); + } + + virtual void refresh(const GaugeField &U, GridSerialRNG& sRNG, GridParallelRNG& pRNG) + { + // P(phi) = e^{- phi^dag P^dag P phi} + // + // NumOp == P + // + // Take phi = P^{-1} eta ; eta = P Phi + // + // P(eta) = e^{- eta^dag eta} + // + // e^{x^2/2 sig^2} => sig^2 = 0.5. + // + // So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707.... + // + RealD scale = std::sqrt(0.5); + + NumOp.tolinner=InnerStoppingCondition; + NumOp.tol=ActionStoppingCondition; + NumOp.ImportGauge(U); + + FermionField eta(NumOp.FermionGrid()); + + gaussian(pRNG,eta); eta=eta*scale; + + NumOp.ProjectBoundaryBar(eta); + //DumpSliceNorm("eta",eta); + NumOp.RInv(eta,Phi); + + //DumpSliceNorm("Phi",Phi); + + }; + + ////////////////////////////////////////////////////// + // S = phi^dag Pdag P phi + ////////////////////////////////////////////////////// + virtual RealD S(const GaugeField &U) { + + NumOp.tolinner=InnerStoppingCondition; + NumOp.tol=ActionStoppingCondition; + NumOp.ImportGauge(U); + + FermionField Y(NumOp.FermionGrid()); + + NumOp.R(Phi,Y); + + RealD action = norm2(Y); + + return action; + }; + + virtual void deriv(const GaugeField &U,GaugeField & dSdU) + { + NumOp.tolinner=InnerStoppingCondition; + NumOp.tol=DerivativeStoppingCondition; + NumOp.ImportGauge(U); + + GridBase *fgrid = NumOp.FermionGrid(); + GridBase *ugrid = NumOp.GaugeGrid(); + + FermionField X(fgrid); + FermionField Y(fgrid); + FermionField tmp(fgrid); + + GaugeField force(ugrid); + + FermionField DobiDdbPhi(fgrid); // Vector A in my notes + FermionField DoiDdDobiDdbPhi(fgrid); // Vector B in my notes + FermionField DoidP_Phi(fgrid); // Vector E in my notes + FermionField DobidDddDoidP_Phi(fgrid); // Vector F in my notes + + FermionField P_Phi(fgrid); + + // P term + NumOp.dBoundaryBar(Phi,tmp); + NumOp.dOmegaBarInv(tmp,DobiDdbPhi); // Vector A + NumOp.dBoundary(DobiDdbPhi,tmp); + NumOp.dOmegaInv(tmp,DoiDdDobiDdbPhi); // Vector B + P_Phi = Phi - DoiDdDobiDdbPhi; + NumOp.ProjectBoundaryBar(P_Phi); + + // P^dag P term + NumOp.dOmegaDagInv(P_Phi,DoidP_Phi); // Vector E + NumOp.dBoundaryDag(DoidP_Phi,tmp); + NumOp.dOmegaBarDagInv(tmp,DobidDddDoidP_Phi); // Vector F + NumOp.dBoundaryBarDag(DobidDddDoidP_Phi,tmp); + + X = DobiDdbPhi; + Y = DobidDddDoidP_Phi; + NumOp.DirichletFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=force; + NumOp.DirichletFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force; + + X = DoiDdDobiDdbPhi; + Y = DoidP_Phi; + NumOp.DirichletFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU+force; + NumOp.DirichletFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force; + + dSdU *= -1.0; + + }; +}; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/pseudofermion/DomainDecomposedBoundaryTwoFlavourPseudoFermion.h b/Grid/qcd/action/pseudofermion/DomainDecomposedBoundaryTwoFlavourPseudoFermion.h new file mode 100644 index 00000000..1f3687ca --- /dev/null +++ b/Grid/qcd/action/pseudofermion/DomainDecomposedBoundaryTwoFlavourPseudoFermion.h @@ -0,0 +1,158 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/pseudofermion/DomainDecomposedTwoFlavourBoundary.h + + Copyright (C) 2021 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +/////////////////////////////////////// +// Two flavour ratio +/////////////////////////////////////// +template +class DomainDecomposedBoundaryTwoFlavourPseudoFermion : public Action { +public: + INHERIT_IMPL_TYPES(ImplD); + +private: + SchurFactoredFermionOperator & DenOp;// the basic operator + RealD ActionStoppingCondition; + RealD DerivativeStoppingCondition; + RealD InnerStoppingCondition; + + FermionField Phi; // the pseudo fermion field for this trajectory + + RealD refresh_action; +public: + DomainDecomposedBoundaryTwoFlavourPseudoFermion(SchurFactoredFermionOperator &_DenOp,RealD _DerivativeTol, RealD _ActionTol, RealD _InnerTol = 1.0e-6 ) + : DenOp(_DenOp), + DerivativeStoppingCondition(_DerivativeTol), + ActionStoppingCondition(_ActionTol), + InnerStoppingCondition(_InnerTol), + Phi(_DenOp.FermionGrid()) {}; + + virtual std::string action_name(){return "DomainDecomposedBoundaryTwoFlavourPseudoFermion";} + + + virtual std::string LogParameters(){ + std::stringstream sstream; + return sstream.str(); + } + + virtual void refresh(const GaugeField &U, GridSerialRNG& sRNG, GridParallelRNG& pRNG) + { + // P(phi) = e^{- phi^dag Rdag^-1 R^-1 phi} + // + // DenOp == R + // + // Take phi = R eta ; eta = R^-1 Phi + // + // P(eta) = e^{- eta^dag eta} + // + // e^{x^2/2 sig^2} => sig^2 = 0.5. + // + // So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707.... + // + RealD scale = std::sqrt(0.5); + + DenOp.tolinner=InnerStoppingCondition; + DenOp.tol =ActionStoppingCondition; + DenOp.ImportGauge(U); + + FermionField eta(DenOp.FermionGrid()); + + gaussian(pRNG,eta); eta=eta*scale; + + DenOp.ProjectBoundaryBar(eta); + DenOp.R(eta,Phi); + //DumpSliceNorm("Phi",Phi); + refresh_action = norm2(eta); + }; + + ////////////////////////////////////////////////////// + // S = phi^dag Rdag^-1 R^-1 phi + ////////////////////////////////////////////////////// + virtual RealD S(const GaugeField &U) { + + DenOp.tolinner=InnerStoppingCondition; + DenOp.tol=ActionStoppingCondition; + DenOp.ImportGauge(U); + + FermionField X(DenOp.FermionGrid()); + + DenOp.RInv(Phi,X); + + RealD action = norm2(X); + + return action; + }; + + virtual void deriv(const GaugeField &U,GaugeField & dSdU) + { + DenOp.tolinner=InnerStoppingCondition; + DenOp.tol=DerivativeStoppingCondition; + DenOp.ImportGauge(U); + + GridBase *fgrid = DenOp.FermionGrid(); + GridBase *ugrid = DenOp.GaugeGrid(); + + FermionField X(fgrid); + FermionField Y(fgrid); + FermionField tmp(fgrid); + + GaugeField force(ugrid); + + FermionField DiDdb_Phi(fgrid); // Vector C in my notes + FermionField DidRinv_Phi(fgrid); // Vector D in my notes + FermionField Rinv_Phi(fgrid); + +// FermionField RinvDagRinv_Phi(fgrid); +// FermionField DdbdDidRinv_Phi(fgrid); + + // R^-1 term + DenOp.dBoundaryBar(Phi,tmp); + DenOp.Dinverse(tmp,DiDdb_Phi); // Vector C + Rinv_Phi = Phi - DiDdb_Phi; + DenOp.ProjectBoundaryBar(Rinv_Phi); + + // R^-dagger R^-1 term + DenOp.DinverseDag(Rinv_Phi,DidRinv_Phi); // Vector D +/* + DenOp.dBoundaryBarDag(DidRinv_Phi,DdbdDidRinv_Phi); + RinvDagRinv_Phi = Rinv_Phi - DdbdDidRinv_Phi; + DenOp.ProjectBoundaryBar(RinvDagRinv_Phi); +*/ + X = DiDdb_Phi; + Y = DidRinv_Phi; + DenOp.PeriodicFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=force; + DenOp.PeriodicFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force; + DumpSliceNorm("force",dSdU); + dSdU *= -1.0; + }; +}; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/pseudofermion/DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion.h b/Grid/qcd/action/pseudofermion/DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion.h new file mode 100644 index 00000000..cb9ce0a4 --- /dev/null +++ b/Grid/qcd/action/pseudofermion/DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion.h @@ -0,0 +1,237 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/pseudofermion/DomainDecomposedTwoFlavourBoundary.h + + Copyright (C) 2021 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +/////////////////////////////////////// +// Two flavour ratio +/////////////////////////////////////// +template +class DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion : public Action { +public: + INHERIT_IMPL_TYPES(ImplD); + +private: + SchurFactoredFermionOperator & NumOp;// the basic operator + SchurFactoredFermionOperator & DenOp;// the basic operator + + RealD InnerStoppingCondition; + RealD ActionStoppingCondition; + RealD DerivativeStoppingCondition; + + FermionField Phi; // the pseudo fermion field for this trajectory + +public: + DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion(SchurFactoredFermionOperator &_NumOp, + SchurFactoredFermionOperator &_DenOp, + RealD _DerivativeTol, RealD _ActionTol, RealD _InnerTol=1.0e-6) + : NumOp(_NumOp), DenOp(_DenOp), + Phi(_NumOp.PeriodicFermOpD.FermionGrid()), + InnerStoppingCondition(_InnerTol), + DerivativeStoppingCondition(_DerivativeTol), + ActionStoppingCondition(_ActionTol) + {}; + + virtual std::string action_name(){return "DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion";} + + virtual std::string LogParameters(){ + std::stringstream sstream; + return sstream.str(); + } + + virtual void refresh(const GaugeField &U, GridSerialRNG& sRNG, GridParallelRNG& pRNG) + { + NumOp.ImportGauge(U); + DenOp.ImportGauge(U); + + FermionField eta(NumOp.PeriodicFermOpD.FermionGrid()); + FermionField tmp(NumOp.PeriodicFermOpD.FermionGrid()); + + // P(phi) = e^{- phi^dag P^dag Rdag^-1 R^-1 P phi} + // + // NumOp == P + // DenOp == R + // + // Take phi = P^{-1} R eta ; eta = R^-1 P Phi + // + // P(eta) = e^{- eta^dag eta} + // + // e^{x^2/2 sig^2} => sig^2 = 0.5. + // + // So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707.... + // + RealD scale = std::sqrt(0.5); + + gaussian(pRNG,eta); eta=eta*scale; + + NumOp.ProjectBoundaryBar(eta); + NumOp.tolinner=InnerStoppingCondition; + DenOp.tolinner=InnerStoppingCondition; + DenOp.tol = ActionStoppingCondition; + NumOp.tol = ActionStoppingCondition; + DenOp.R(eta,tmp); + NumOp.RInv(tmp,Phi); + DumpSliceNorm("Phi",Phi); + + }; + + ////////////////////////////////////////////////////// + // S = phi^dag Pdag Rdag^-1 R^-1 P phi + ////////////////////////////////////////////////////// + virtual RealD S(const GaugeField &U) { + + NumOp.ImportGauge(U); + DenOp.ImportGauge(U); + + FermionField X(NumOp.PeriodicFermOpD.FermionGrid()); + FermionField Y(NumOp.PeriodicFermOpD.FermionGrid()); + + NumOp.tolinner=InnerStoppingCondition; + DenOp.tolinner=InnerStoppingCondition; + DenOp.tol = ActionStoppingCondition; + NumOp.tol = ActionStoppingCondition; + NumOp.R(Phi,Y); + DenOp.RInv(Y,X); + + RealD action = norm2(X); + // std::cout << " DD boundary action is " < class ExactOneFlavourRatioPseudoFermionAction : public Action { @@ -57,37 +61,60 @@ NAMESPACE_BEGIN(Grid); bool use_heatbath_forecasting; AbstractEOFAFermion& Lop; // the basic LH operator AbstractEOFAFermion& Rop; // the basic RH operator - SchurRedBlackDiagMooeeSolve SolverHB; + SchurRedBlackDiagMooeeSolve SolverHBL; + SchurRedBlackDiagMooeeSolve SolverHBR; SchurRedBlackDiagMooeeSolve SolverL; SchurRedBlackDiagMooeeSolve SolverR; SchurRedBlackDiagMooeeSolve DerivativeSolverL; SchurRedBlackDiagMooeeSolve DerivativeSolverR; FermionField Phi; // the pseudofermion field for this trajectory + RealD norm2_eta; //|eta|^2 where eta is the random gaussian field used to generate the pseudofermion field + bool initial_action; //true for the first call to S after refresh, for which the identity S = |eta|^2 holds provided the rational approx is good public: + //Used in the heatbath, refresh the shift coefficients of the L (LorR=0) or R (LorR=1) operator + virtual void heatbathRefreshShiftCoefficients(int LorR, RealD to){ + AbstractEOFAFermion&op = LorR == 0 ? Lop : Rop; + op.RefreshShiftCoefficients(to); + } + + + //Use the same solver for L,R in all cases ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, AbstractEOFAFermion& _Rop, OperatorFunction& CG, Params& p, bool use_fc=false) - : ExactOneFlavourRatioPseudoFermionAction(_Lop,_Rop,CG,CG,CG,CG,CG,p,use_fc) {}; - + : ExactOneFlavourRatioPseudoFermionAction(_Lop,_Rop,CG,CG,CG,CG,CG,CG,p,use_fc) {}; + + //Use the same solver for L,R in the heatbath but different solvers elsewhere ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, AbstractEOFAFermion& _Rop, - OperatorFunction& HeatbathCG, + OperatorFunction& HeatbathCG, + OperatorFunction& ActionCGL, OperatorFunction& ActionCGR, + OperatorFunction& DerivCGL , OperatorFunction& DerivCGR, + Params& p, + bool use_fc=false) + : ExactOneFlavourRatioPseudoFermionAction(_Lop,_Rop,HeatbathCG,HeatbathCG, ActionCGL, ActionCGR, DerivCGL,DerivCGR,p,use_fc) {}; + + //Use different solvers for L,R in all cases + ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, + AbstractEOFAFermion& _Rop, + OperatorFunction& HeatbathCGL, OperatorFunction& HeatbathCGR, OperatorFunction& ActionCGL, OperatorFunction& ActionCGR, OperatorFunction& DerivCGL , OperatorFunction& DerivCGR, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), - SolverHB(HeatbathCG,false,true), + SolverHBL(HeatbathCGL,false,true), SolverHBR(HeatbathCGR,false,true), SolverL(ActionCGL, false, true), SolverR(ActionCGR, false, true), DerivativeSolverL(DerivCGL, false, true), DerivativeSolverR(DerivCGR, false, true), Phi(_Lop.FermionGrid()), param(p), - use_heatbath_forecasting(use_fc) + use_heatbath_forecasting(use_fc), + initial_action(false) { AlgRemez remez(param.lo, param.hi, param.precision); @@ -97,6 +124,8 @@ NAMESPACE_BEGIN(Grid); PowerNegHalf.Init(remez, param.tolerance, true); }; + const FermionField &getPhi() const{ return Phi; } + virtual std::string action_name() { return "ExactOneFlavourRatioPseudoFermionAction"; } virtual std::string LogParameters() { @@ -117,6 +146,19 @@ NAMESPACE_BEGIN(Grid); else{ for(int s=0; s sig^2 = 0.5. + // + RealD scale = std::sqrt(0.5); + + FermionField eta (Lop.FermionGrid()); + gaussian(pRNG,eta); eta = eta * scale; + + refresh(U,eta); + } + // EOFA heatbath: see Eqn. (29) of arXiv:1706.05843 // We generate a Gaussian noise vector \eta, and then compute // \Phi = M_{\rm EOFA}^{-1/2} * \eta @@ -124,12 +166,10 @@ NAMESPACE_BEGIN(Grid); // // As a check of rational require \Phi^dag M_{EOFA} \Phi == eta^dag M^-1/2^dag M M^-1/2 eta = eta^dag eta // - virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) - { + void refresh(const GaugeField &U, const FermionField &eta) { Lop.ImportGauge(U); Rop.ImportGauge(U); - FermionField eta (Lop.FermionGrid()); FermionField CG_src (Lop.FermionGrid()); FermionField CG_soln (Lop.FermionGrid()); FermionField Forecast_src(Lop.FermionGrid()); @@ -140,11 +180,6 @@ NAMESPACE_BEGIN(Grid); if(use_heatbath_forecasting){ prev_solns.reserve(param.degree); } ChronoForecast, FermionField> Forecast; - // Seed with Gaussian noise vector (var = 0.5) - RealD scale = std::sqrt(0.5); - gaussian(pRNG,eta); - eta = eta * scale; - // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta RealD N(PowerNegHalf.norm); for(int k=0; k tmp(2, Lop.FermionGrid()); - mPhi = phi; + out = in; // LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi> - spProj(Phi, spProj_Phi, -1, Lop.Ls); - Lop.Omega(spProj_Phi, tmp[0], -1, 0); + spProj(in, spProj_in, -1, Lop.Ls); + Lop.Omega(spProj_in, tmp[0], -1, 0); G5R5(tmp[1], tmp[0]); tmp[0] = Zero(); SolverL(Lop, tmp[1], tmp[0]); Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back Lop.Omega(tmp[1], tmp[0], -1, 1); - mPhi = mPhi - Lop.k * innerProduct(spProj_Phi, tmp[0]).real(); + spProj(tmp[0], tmp[1], -1, Lop.Ls); + + out = out - Lop.k * tmp[1]; // RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb) - // - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi> - spProj(Phi, spProj_Phi, 1, Rop.Ls); - Rop.Omega(spProj_Phi, tmp[0], 1, 0); + // - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} |\Phi> + spProj(in, spProj_in, 1, Rop.Ls); + Rop.Omega(spProj_in, tmp[0], 1, 0); G5R5(tmp[1], tmp[0]); tmp[0] = Zero(); SolverR(Rop, tmp[1], tmp[0]); Rop.Dtilde(tmp[0], tmp[1]); Rop.Omega(tmp[1], tmp[0], 1, 1); - action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real(); -#endif + spProj(tmp[0], tmp[1], 1, Rop.Ls); + + out = out + Rop.k * tmp[1]; } + //Due to the structure of EOFA, it is no more expensive to compute the inverse of Meofa + //To ensure correctness we can simply reuse the heatbath code but use the rational approx + //f(x) = 1/x which corresponds to alpha_0=0, alpha_1=1, beta_1=0 => gamma_1=1 + void MeofaInv(const GaugeField &U, const FermionField &in, FermionField &out) { + Lop.ImportGauge(U); + Rop.ImportGauge(U); + + FermionField CG_src (Lop.FermionGrid()); + FermionField CG_soln (Lop.FermionGrid()); + std::vector tmp(2, Lop.FermionGrid()); + + // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta + // = 1 * \eta + out = in; + + // LH terms: + // \Phi = \Phi + k \sum_{k=1}^{N_{p}} P_{-} \Omega_{-}^{\dagger} ( H(mf) + // - \gamma_{l} \Delta_{-}(mf,mb) P_{-} )^{-1} \Omega_{-} P_{-} \eta + spProj(in, tmp[0], -1, Lop.Ls); + Lop.Omega(tmp[0], tmp[1], -1, 0); + G5R5(CG_src, tmp[1]); + { + heatbathRefreshShiftCoefficients(0, -1.); //-gamma_1 = -1. + + CG_soln = Zero(); // Just use zero as the initial guess + SolverHBL(Lop, CG_src, CG_soln); + + Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back + tmp[1] = Lop.k * tmp[0]; + } + Lop.Omega(tmp[1], tmp[0], -1, 1); + spProj(tmp[0], tmp[1], -1, Lop.Ls); + out = out + tmp[1]; + + // RH terms: + // \Phi = \Phi - k \sum_{k=1}^{N_{p}} P_{+} \Omega_{+}^{\dagger} ( H(mb) + // - \beta_l\gamma_{l} \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \eta + spProj(in, tmp[0], 1, Rop.Ls); + Rop.Omega(tmp[0], tmp[1], 1, 0); + G5R5(CG_src, tmp[1]); + { + heatbathRefreshShiftCoefficients(1, 0.); //-gamma_1 * beta_1 = 0 + + CG_soln = Zero(); + SolverHBR(Rop, CG_src, CG_soln); + + Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back + tmp[1] = - Rop.k * tmp[0]; + } + Rop.Omega(tmp[1], tmp[0], 1, 1); + spProj(tmp[0], tmp[1], 1, Rop.Ls); + out = out + tmp[1]; + + // Reset shift coefficients for energy and force evals + heatbathRefreshShiftCoefficients(0, 0.0); + heatbathRefreshShiftCoefficients(1, -1.0); + }; + + + + // EOFA action: see Eqn. (10) of arXiv:1706.05843 virtual RealD S(const GaugeField& U) { @@ -271,7 +374,7 @@ NAMESPACE_BEGIN(Grid); action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real(); // RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb) - // - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi> + // - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} |\Phi> spProj(Phi, spProj_Phi, 1, Rop.Ls); Rop.Omega(spProj_Phi, tmp[0], 1, 0); G5R5(tmp[1], tmp[0]); @@ -281,6 +384,26 @@ NAMESPACE_BEGIN(Grid); Rop.Omega(tmp[1], tmp[0], 1, 1); action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real(); + if(initial_action){ + //For the first call to S after refresh, S = |eta|^2. We can use this to ensure the rational approx is good + RealD diff = action - norm2_eta; + + //S_init = eta^dag M^{-1/2} M M^{-1/2} eta + //S_init - eta^dag eta = eta^dag ( M^{-1/2} M M^{-1/2} - 1 ) eta + + //If approximate solution + //S_init - eta^dag eta = eta^dag ( [M^{-1/2}+\delta M^{-1/2}] M [M^{-1/2}+\delta M^{-1/2}] - 1 ) eta + // \approx eta^dag ( \delta M^{-1/2} M^{1/2} + M^{1/2}\delta M^{-1/2} ) eta + // We divide out |eta|^2 to remove source scaling but the tolerance on this check should still be somewhat higher than the actual approx tolerance + RealD test = fabs(diff)/norm2_eta; //test the quality of the rational approx + + std::cout << GridLogMessage << action_name() << " initial action " << action << " expect " << norm2_eta << "; diff " << diff << std::endl; + std::cout << GridLogMessage << action_name() << "[ eta^dag ( M^{-1/2} M M^{-1/2} - 1 ) eta ]/|eta^2| = " << test << " expect 0 (tol " << param.BoundsCheckTol << ")" << std::endl; + + assert( ( test < param.BoundsCheckTol ) && " Initial action check failed" ); + initial_action = false; + } + return action; }; @@ -329,6 +452,40 @@ NAMESPACE_BEGIN(Grid); }; }; + template + class ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction : public ExactOneFlavourRatioPseudoFermionAction{ + public: + INHERIT_IMPL_TYPES(ImplD); + typedef OneFlavourRationalParams Params; + + private: + AbstractEOFAFermion& LopF; // the basic LH operator + AbstractEOFAFermion& RopF; // the basic RH operator + + public: + + virtual std::string action_name() { return "ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction"; } + + //Used in the heatbath, refresh the shift coefficients of the L (LorR=0) or R (LorR=1) operator + virtual void heatbathRefreshShiftCoefficients(int LorR, RealD to){ + AbstractEOFAFermion &op = LorR == 0 ? LopF : RopF; + op.RefreshShiftCoefficients(to); + this->ExactOneFlavourRatioPseudoFermionAction::heatbathRefreshShiftCoefficients(LorR,to); + } + + ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction(AbstractEOFAFermion& _LopF, + AbstractEOFAFermion& _RopF, + AbstractEOFAFermion& _LopD, + AbstractEOFAFermion& _RopD, + OperatorFunction& HeatbathCGL, OperatorFunction& HeatbathCGR, + OperatorFunction& ActionCGL, OperatorFunction& ActionCGR, + OperatorFunction& DerivCGL , OperatorFunction& DerivCGR, + Params& p, + bool use_fc=false) : + LopF(_LopF), RopF(_RopF), ExactOneFlavourRatioPseudoFermionAction(_LopD, _RopD, HeatbathCGL, HeatbathCGR, ActionCGL, ActionCGR, DerivCGL, DerivCGR, p, use_fc){} + }; + + NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h b/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h new file mode 100644 index 00000000..f237dee4 --- /dev/null +++ b/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h @@ -0,0 +1,434 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h + + Copyright (C) 2015 + + Author: Christopher Kelly + Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_H +#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_H + +NAMESPACE_BEGIN(Grid); + + ///////////////////////////////////////////////////////// + // Generic rational approximation for ratios of operators + ///////////////////////////////////////////////////////// + + /* S_f = -log( det( [M^dag M]/[V^dag V] )^{1/inv_pow} ) + = chi^dag ( [M^dag M]/[V^dag V] )^{-1/inv_pow} chi\ + = chi^dag ( [V^dag V]^{-1/2} [M^dag M] [V^dag V]^{-1/2} )^{-1/inv_pow} chi\ + = chi^dag [V^dag V]^{1/(2*inv_pow)} [M^dag M]^{-1/inv_pow} [V^dag V]^{1/(2*inv_pow)} chi\ + + S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi + + BIG WARNING: + Here V^dag V is referred to in this code as the "numerator" operator and M^dag M is the *denominator* operator. + this refers to their position in the pseudofermion action, which is the *inverse* of what appears in the determinant + Thus for DWF the numerator operator is the Pauli-Villars operator + + Here P/Q \sim R_{1/(2*inv_pow)} ~ (V^dagV)^{1/(2*inv_pow)} + Here N/D \sim R_{-1/inv_pow} ~ (M^dagM)^{-1/inv_pow} + */ + + template + class GeneralEvenOddRatioRationalPseudoFermionAction : public Action { + public: + + INHERIT_IMPL_TYPES(Impl); + + typedef RationalActionParams Params; + Params param; + RealD RefreshAction; + //For action evaluation + MultiShiftFunction ApproxPowerAction ; //rational approx for X^{1/inv_pow} + MultiShiftFunction ApproxNegPowerAction; //rational approx for X^{-1/inv_pow} + MultiShiftFunction ApproxHalfPowerAction; //rational approx for X^{1/(2*inv_pow)} + MultiShiftFunction ApproxNegHalfPowerAction; //rational approx for X^{-1/(2*inv_pow)} + + //For the MD integration + MultiShiftFunction ApproxPowerMD ; //rational approx for X^{1/inv_pow} + MultiShiftFunction ApproxNegPowerMD; //rational approx for X^{-1/inv_pow} + MultiShiftFunction ApproxHalfPowerMD; //rational approx for X^{1/(2*inv_pow)} + MultiShiftFunction ApproxNegHalfPowerMD; //rational approx for X^{-1/(2*inv_pow)} + + private: + + FermionOperator & NumOp;// the basic operator + FermionOperator & DenOp;// the basic operator + FermionField PhiEven; // the pseudo fermion field for this trajectory + FermionField PhiOdd; // the pseudo fermion field for this trajectory + + //Generate the approximation to x^{1/inv_pow} (->approx) and x^{-1/inv_pow} (-> approx_inv) by an approx_degree degree rational approximation + //CG_tolerance is used to issue a warning if the approximation error is larger than the tolerance of the CG and is otherwise just stored in the MultiShiftFunction for use by the multi-shift + static void generateApprox(MultiShiftFunction &approx, MultiShiftFunction &approx_inv, int inv_pow, int approx_degree, double CG_tolerance, AlgRemez &remez){ + std::cout< CG_tolerance) + std::cout< schurOp(numerator ? NumOp : DenOp); + ConjugateGradientMultiShift msCG(MaxIter, approx); + msCG(schurOp,in, out); + } + virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionField &in, std::vector &out_elems, FermionField &out){ + SchurDifferentiableOperator schurOp(numerator ? NumOp : DenOp); + ConjugateGradientMultiShift msCG(MaxIter, approx); + msCG(schurOp,in, out_elems, out); + } + //Allow derived classes to override the gauge import + virtual void ImportGauge(const GaugeField &U){ + NumOp.ImportGauge(U); + DenOp.ImportGauge(U); + } + + public: + + // allow non-uniform tolerances + void SetTolerances(std::vector action_tolerance,std::vector md_tolerance) + { + assert(action_tolerance.size()==ApproxPowerAction.tolerances.size()); + assert( md_tolerance.size()==ApproxPowerMD.tolerances.size()); + + // Fix up the tolerances + for(int i=0;i &_NumOp, + FermionOperator &_DenOp, + const Params & p + ) : + NumOp(_NumOp), + DenOp(_DenOp), + PhiOdd (_NumOp.FermionRedBlackGrid()), + PhiEven(_NumOp.FermionRedBlackGrid()), + param(p) + { + std::cout< action_tolerance(ApproxHalfPowerAction.tolerances.size(),param.action_tolerance); + std::vector md_tolerance (ApproxHalfPowerMD.tolerances.size(),param.md_tolerance); + + SetTolerances(action_tolerance, md_tolerance); + + std::cout<Broadcast(0,r); + + if ( param.BoundsCheckFreq != 0 && (r % param.BoundsCheckFreq)==0 ) { + std::cout< MdagM(DenOp); + std::cout< MpvPhi_k (n_pv,NumOp.FermionRedBlackGrid()); + std::vector MpvMfMpvPhi_k(n_pv,NumOp.FermionRedBlackGrid()); + std::vector MfMpvPhi_k (n_f ,NumOp.FermionRedBlackGrid()); + + FermionField MpvPhi(NumOp.FermionRedBlackGrid()); + FermionField MfMpvPhi(NumOp.FermionRedBlackGrid()); + FermionField MpvMfMpvPhi(NumOp.FermionRedBlackGrid()); + FermionField Y(NumOp.FermionRedBlackGrid()); + + GaugeField tmp(NumOp.GaugeGrid()); + + ImportGauge(U); + + std::cout< MdagM(DenOp); + SchurDifferentiableOperator VdagV(NumOp); + + + RealD ak; + + dSdU = Zero(); + + // With these building blocks + // + // dS/dU = + // \sum_k -ak MfMpvPhi_k^dag [ dM^dag M + M^dag dM ] MfMpvPhi_k (1) + // + \sum_k -ak MpvMfMpvPhi_k^\dag [ dV^dag V + V^dag dV ] MpvPhi_k (2) + // -ak MpvPhi_k^dag [ dV^dag V + V^dag dV ] MpvMfMpvPhi_k (3) + + //(1) + std::cout< + Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H +#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H + +#include + +NAMESPACE_BEGIN(Grid); + + ///////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Generic rational approximation for ratios of operators utilizing the mixed precision multishift algorithm + // cf. GeneralEvenOddRational.h for details + ///////////////////////////////////////////////////////////////////////////////////////////////////////////// + + template + class GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction { + private: + typedef typename ImplD::FermionField FermionFieldD; + typedef typename ImplF::FermionField FermionFieldF; + + FermionOperator & NumOpD; + FermionOperator & DenOpD; + + FermionOperator & NumOpF; + FermionOperator & DenOpF; + + Integer ReliableUpdateFreq; + protected: + + //Action evaluation + //Allow derived classes to override the multishift CG + virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){ +#if 1 + SchurDifferentiableOperator schurOp(numerator ? NumOpD : DenOpD); + ConjugateGradientMultiShift msCG(MaxIter, approx); + msCG(schurOp,in, out); +#else + SchurDifferentiableOperator schurOpD(numerator ? NumOpD : DenOpD); + SchurDifferentiableOperator schurOpF(numerator ? NumOpF : DenOpF); + FermionFieldD inD(NumOpD.FermionRedBlackGrid()); + FermionFieldD outD(NumOpD.FermionRedBlackGrid()); + + // Action better with higher precision? + ConjugateGradientMultiShiftMixedPrec msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq); + msCG(schurOpD, in, out); +#endif + } + //Force evaluation + virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector &out_elems, FermionFieldD &out){ + SchurDifferentiableOperator schurOpD(numerator ? NumOpD : DenOpD); + SchurDifferentiableOperator schurOpF(numerator ? NumOpF : DenOpF); + + FermionFieldD inD(NumOpD.FermionRedBlackGrid()); + FermionFieldD outD(NumOpD.FermionRedBlackGrid()); + std::vector out_elemsD(out_elems.size(),NumOpD.FermionRedBlackGrid()); + ConjugateGradientMultiShiftMixedPrecCleanup msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq); + msCG(schurOpD, in, out_elems, out); + } + //Allow derived classes to override the gauge import + virtual void ImportGauge(const typename ImplD::GaugeField &Ud){ + + typename ImplF::GaugeField Uf(NumOpF.GaugeGrid()); + precisionChange(Uf, Ud); + + std::cout << "Importing "< &_NumOpD, FermionOperator &_DenOpD, + FermionOperator &_NumOpF, FermionOperator &_DenOpF, + const RationalActionParams & p, Integer _ReliableUpdateFreq + ) : GeneralEvenOddRatioRationalPseudoFermionAction(_NumOpD, _DenOpD, p), + ReliableUpdateFreq(_ReliableUpdateFreq), + NumOpD(_NumOpD), DenOpD(_DenOpD), + NumOpF(_NumOpF), DenOpF(_DenOpF) + {} + + virtual std::string action_name(){return "GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction";} + }; + +NAMESPACE_END(Grid); + +#endif diff --git a/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h b/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h index e968b8e4..d1e5b69d 100644 --- a/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h +++ b/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h @@ -40,249 +40,62 @@ NAMESPACE_BEGIN(Grid); // Here N/D \sim R_{-1/2} ~ (M^dagM)^{-1/2} template - class OneFlavourEvenOddRatioRationalPseudoFermionAction : public Action { + class OneFlavourEvenOddRatioRationalPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction { public: - - INHERIT_IMPL_TYPES(Impl); - typedef OneFlavourRationalParams Params; - Params param; - - MultiShiftFunction PowerHalf ; - MultiShiftFunction PowerNegHalf; - MultiShiftFunction PowerQuarter; - MultiShiftFunction PowerNegQuarter; - private: - - FermionOperator & NumOp;// the basic operator - FermionOperator & DenOp;// the basic operator - FermionField PhiEven; // the pseudo fermion field for this trajectory - FermionField PhiOdd; // the pseudo fermion field for this trajectory + static RationalActionParams transcribe(const Params &in){ + RationalActionParams out; + out.inv_pow = 2; + out.lo = in.lo; + out.hi = in.hi; + out.MaxIter = in.MaxIter; + out.action_tolerance = out.md_tolerance = in.tolerance; + out.action_degree = out.md_degree = in.degree; + out.precision = in.precision; + out.BoundsCheckFreq = in.BoundsCheckFreq; + return out; + } public: - OneFlavourEvenOddRatioRationalPseudoFermionAction(FermionOperator &_NumOp, - FermionOperator &_DenOp, - Params & p - ) : - NumOp(_NumOp), - DenOp(_DenOp), - PhiOdd (_NumOp.FermionRedBlackGrid()), - PhiEven(_NumOp.FermionRedBlackGrid()), - param(p) - { - AlgRemez remez(param.lo,param.hi,param.precision); + FermionOperator &_DenOp, + const Params & p + ) : + GeneralEvenOddRatioRationalPseudoFermionAction(_NumOp, _DenOp, transcribe(p)){} - // MdagM^(+- 1/2) - std::cout< + class OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction + : public GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction { + public: + typedef OneFlavourRationalParams Params; + private: + static RationalActionParams transcribe(const Params &in){ + RationalActionParams out; + out.inv_pow = 2; + out.lo = in.lo; + out.hi = in.hi; + out.MaxIter = in.MaxIter; + out.action_tolerance = out.md_tolerance = in.tolerance; + out.action_degree = out.md_degree = in.degree; + out.precision = in.precision; + out.BoundsCheckFreq = in.BoundsCheckFreq; + return out; } - - - virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) { - // S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi - // - // P(phi) = e^{- phi^dag (VdagV)^1/4 (MdagM)^-1/2 (VdagV)^1/4 phi} - // = e^{- phi^dag (VdagV)^1/4 (MdagM)^-1/4 (MdagM)^-1/4 (VdagV)^1/4 phi} - // - // Phi = (VdagV)^-1/4 Mdag^{1/4} eta - // - // P(eta) = e^{- eta^dag eta} - // - // e^{x^2/2 sig^2} => sig^2 = 0.5. - // - // So eta should be of width sig = 1/sqrt(2). + public: + OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction(FermionOperator &_NumOp, + FermionOperator &_DenOp, + FermionOperator &_NumOpF, + FermionOperator &_DenOpF, + const Params & p, Integer ReliableUpdateFreq + ) : + GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction(_NumOp, _DenOp,_NumOpF, _DenOpF, transcribe(p),ReliableUpdateFreq){} - RealD scale = std::sqrt(0.5); - - FermionField eta(NumOp.FermionGrid()); - FermionField etaOdd (NumOp.FermionRedBlackGrid()); - FermionField etaEven(NumOp.FermionRedBlackGrid()); - FermionField tmp(NumOp.FermionRedBlackGrid()); - - gaussian(pRNG,eta); eta=eta*scale; - - pickCheckerboard(Even,etaEven,eta); - pickCheckerboard(Odd,etaOdd,eta); - - NumOp.ImportGauge(U); - DenOp.ImportGauge(U); - - - // MdagM^1/4 eta - SchurDifferentiableOperator MdagM(DenOp); - ConjugateGradientMultiShift msCG_M(param.MaxIter,PowerQuarter); - msCG_M(MdagM,etaOdd,tmp); - - // VdagV^-1/4 MdagM^1/4 eta - SchurDifferentiableOperator VdagV(NumOp); - ConjugateGradientMultiShift msCG_V(param.MaxIter,PowerNegQuarter); - msCG_V(VdagV,tmp,PhiOdd); - - assert(NumOp.ConstEE() == 1); - assert(DenOp.ConstEE() == 1); - PhiEven = Zero(); - - }; - - ////////////////////////////////////////////////////// - // S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi - ////////////////////////////////////////////////////// - virtual RealD S(const GaugeField &U) { - - NumOp.ImportGauge(U); - DenOp.ImportGauge(U); - - FermionField X(NumOp.FermionRedBlackGrid()); - FermionField Y(NumOp.FermionRedBlackGrid()); - - // VdagV^1/4 Phi - SchurDifferentiableOperator VdagV(NumOp); - ConjugateGradientMultiShift msCG_V(param.MaxIter,PowerQuarter); - msCG_V(VdagV,PhiOdd,X); - - // MdagM^-1/4 VdagV^1/4 Phi - SchurDifferentiableOperator MdagM(DenOp); - ConjugateGradientMultiShift msCG_M(param.MaxIter,PowerNegQuarter); - msCG_M(MdagM,X,Y); - - // Randomly apply rational bounds checks. - auto grid = NumOp.FermionGrid(); - auto r=rand(); - grid->Broadcast(0,r); - if ( (r%param.BoundsCheckFreq)==0 ) { - FermionField gauss(NumOp.FermionRedBlackGrid()); - gauss = PhiOdd; - HighBoundCheck(MdagM,gauss,param.hi); - InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,MdagM,gauss,PowerNegHalf); - } - - // Phidag VdagV^1/4 MdagM^-1/4 MdagM^-1/4 VdagV^1/4 Phi - RealD action = norm2(Y); - - return action; - }; - - // S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi - // - // Here, M is some 5D operator and V is the Pauli-Villars field - // N and D makeup the rat. poly of the M term and P and & makeup the rat.poly of the denom term - // - // Need - // dS_f/dU = chi^dag d[P/Q] N/D P/Q chi - // + chi^dag P/Q d[N/D] P/Q chi - // + chi^dag P/Q N/D d[P/Q] chi - // - // P/Q is expressed as partial fraction expansion: - // - // a0 + \sum_k ak/(V^dagV + bk) - // - // d[P/Q] is then - // - // \sum_k -ak [V^dagV+bk]^{-1} [ dV^dag V + V^dag dV ] [V^dag V + bk]^{-1} - // - // and similar for N/D. - // - // Need - // MpvPhi_k = [Vdag V + bk]^{-1} chi - // MpvPhi = {a0 + \sum_k ak [Vdag V + bk]^{-1} }chi - // - // MfMpvPhi_k = [MdagM+bk]^{-1} MpvPhi - // MfMpvPhi = {a0 + \sum_k ak [Mdag M + bk]^{-1} } MpvPhi - // - // MpvMfMpvPhi_k = [Vdag V + bk]^{-1} MfMpvchi - // - - virtual void deriv(const GaugeField &U,GaugeField & dSdU) { - - const int n_f = PowerNegHalf.poles.size(); - const int n_pv = PowerQuarter.poles.size(); - - std::vector MpvPhi_k (n_pv,NumOp.FermionRedBlackGrid()); - std::vector MpvMfMpvPhi_k(n_pv,NumOp.FermionRedBlackGrid()); - std::vector MfMpvPhi_k (n_f ,NumOp.FermionRedBlackGrid()); - - FermionField MpvPhi(NumOp.FermionRedBlackGrid()); - FermionField MfMpvPhi(NumOp.FermionRedBlackGrid()); - FermionField MpvMfMpvPhi(NumOp.FermionRedBlackGrid()); - FermionField Y(NumOp.FermionRedBlackGrid()); - - GaugeField tmp(NumOp.GaugeGrid()); - - NumOp.ImportGauge(U); - DenOp.ImportGauge(U); - - SchurDifferentiableOperator VdagV(NumOp); - SchurDifferentiableOperator MdagM(DenOp); - - ConjugateGradientMultiShift msCG_V(param.MaxIter,PowerQuarter); - ConjugateGradientMultiShift msCG_M(param.MaxIter,PowerNegHalf); - - msCG_V(VdagV,PhiOdd,MpvPhi_k,MpvPhi); - msCG_M(MdagM,MpvPhi,MfMpvPhi_k,MfMpvPhi); - msCG_V(VdagV,MfMpvPhi,MpvMfMpvPhi_k,MpvMfMpvPhi); - - RealD ak; - - dSdU = Zero(); - - // With these building blocks - // - // dS/dU = - // \sum_k -ak MfMpvPhi_k^dag [ dM^dag M + M^dag dM ] MfMpvPhi_k (1) - // + \sum_k -ak MpvMfMpvPhi_k^\dag [ dV^dag V + V^dag dV ] MpvPhi_k (2) - // -ak MpvPhi_k^dag [ dV^dag V + V^dag dV ] MpvMfMpvPhi_k (3) - - //(1) - for(int k=0;k & NumOp;// the basic operator @@ -73,15 +75,22 @@ NAMESPACE_BEGIN(Grid); remez.generateApprox(param.degree,1,2); PowerHalf.Init(remez,param.tolerance,false); PowerNegHalf.Init(remez,param.tolerance,true); + MDPowerNegHalf.Init(remez,param.mdtolerance,true); // MdagM^(+- 1/4) std::cout< MpvPhi_k (n_pv,NumOp.FermionGrid()); std::vector MpvMfMpvPhi_k(n_pv,NumOp.FermionGrid()); @@ -224,8 +233,8 @@ NAMESPACE_BEGIN(Grid); MdagMLinearOperator ,FermionField> MdagM(DenOp); MdagMLinearOperator ,FermionField> VdagV(NumOp); - ConjugateGradientMultiShift msCG_V(param.MaxIter,PowerQuarter); - ConjugateGradientMultiShift msCG_M(param.MaxIter,PowerNegHalf); + ConjugateGradientMultiShift msCG_V(param.MaxIter,MDPowerQuarter); + ConjugateGradientMultiShift msCG_M(param.MaxIter,MDPowerNegHalf); msCG_V(VdagV,Phi,MpvPhi_k,MpvPhi); msCG_M(MdagM,MpvPhi,MfMpvPhi_k,MfMpvPhi); @@ -244,7 +253,7 @@ NAMESPACE_BEGIN(Grid); //(1) for(int k=0;k #include #include +#include +#include #include #include diff --git a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h index da628c75..ff9a6496 100644 --- a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h +++ b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h @@ -38,7 +38,7 @@ NAMESPACE_BEGIN(Grid); class TwoFlavourEvenOddRatioPseudoFermionAction : public Action { public: INHERIT_IMPL_TYPES(Impl); - + private: FermionOperator & NumOp;// the basic operator FermionOperator & DenOp;// the basic operator @@ -50,6 +50,8 @@ NAMESPACE_BEGIN(Grid); FermionField PhiOdd; // the pseudo fermion field for this trajectory FermionField PhiEven; // the pseudo fermion field for this trajectory + RealD RefreshAction; + public: TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator &_NumOp, FermionOperator &_DenOp, @@ -75,24 +77,22 @@ NAMESPACE_BEGIN(Grid); conformable(_NumOp.GaugeRedBlackGrid(), _DenOp.GaugeRedBlackGrid()); }; - virtual std::string action_name(){return "TwoFlavourEvenOddRatioPseudoFermionAction";} + virtual std::string action_name(){ + std::stringstream sstream; + sstream<<"TwoFlavourEvenOddRatioPseudoFermionAction det("< sig^2 = 0.5. @@ -100,39 +100,59 @@ NAMESPACE_BEGIN(Grid); RealD scale = std::sqrt(0.5); FermionField eta (NumOp.FermionGrid()); + gaussian(pRNG,eta); eta = eta * scale; + + refresh(U,eta); + } + + void refresh(const GaugeField &U, const FermionField &eta) { + + // P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi} + // + // NumOp == V + // DenOp == M + // FermionField etaOdd (NumOp.FermionRedBlackGrid()); FermionField etaEven(NumOp.FermionRedBlackGrid()); FermionField tmp (NumOp.FermionRedBlackGrid()); - gaussian(pRNG,eta); - pickCheckerboard(Even,etaEven,eta); pickCheckerboard(Odd,etaOdd,eta); NumOp.ImportGauge(U); DenOp.ImportGauge(U); + std::cout << " TwoFlavourRefresh: Imported gauge "< Mpc(DenOp); SchurDifferentiableOperator Vpc(NumOp); + std::cout << " TwoFlavourRefresh: Diff ops "< +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +/////////////////////////////////////// +// Two flavour ratio +/////////////////////////////////////// +template +class TwoFlavourRatioEO4DPseudoFermionAction : public Action { +public: + INHERIT_IMPL_TYPES(Impl); + +private: + typedef FermionOperator FermOp; + FermionOperator & NumOp;// the basic operator + FermionOperator & DenOp;// the basic operator + + OperatorFunction &DerivativeSolver; + OperatorFunction &DerivativeDagSolver; + OperatorFunction &ActionSolver; + OperatorFunction &HeatbathSolver; + + FermionField phi4; // the pseudo fermion field for this trajectory + +public: + TwoFlavourRatioEO4DPseudoFermionAction(FermionOperator &_NumOp, + FermionOperator &_DenOp, + OperatorFunction & DS, + OperatorFunction & AS ) : + TwoFlavourRatioEO4DPseudoFermionAction(_NumOp,_DenOp, DS,DS,AS,AS) {}; + TwoFlavourRatioEO4DPseudoFermionAction(FermionOperator &_NumOp, + FermionOperator &_DenOp, + OperatorFunction & DS, + OperatorFunction & DDS, + OperatorFunction & AS, + OperatorFunction & HS + ) : NumOp(_NumOp), + DenOp(_DenOp), + DerivativeSolver(DS), + DerivativeDagSolver(DDS), + ActionSolver(AS), + HeatbathSolver(HS), + phi4(_NumOp.GaugeGrid()) + {}; + + virtual std::string action_name(){return "TwoFlavourRatioEO4DPseudoFermionAction";} + + virtual std::string LogParameters(){ + std::stringstream sstream; + sstream << GridLogMessage << "["< sig^2 = 0.5. + // + // So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707.... + // + RealD scale = std::sqrt(0.5); + + FermionField eta4(NumOp.GaugeGrid()); + FermionField eta5(NumOp.FermionGrid()); + FermionField tmp(NumOp.FermionGrid()); + FermionField phi5(NumOp.FermionGrid()); + + gaussian(pRNG,eta4); + NumOp.ImportFourDimPseudoFermion(eta4,eta5); + NumOp.ImportGauge(U); + DenOp.ImportGauge(U); + + SchurRedBlackDiagMooeeSolve PrecSolve(HeatbathSolver); + + DenOp.M(eta5,tmp); // M eta + PrecSolve(NumOp,tmp,phi5); // phi = V^-1 M eta + phi5=phi5*scale; + std::cout << GridLogMessage << "4d pf refresh "<< norm2(phi5)<<"\n"; + // Project to 4d + NumOp.ExportFourDimPseudoFermion(phi5,phi4); + + }; + + ////////////////////////////////////////////////////// + // S = phi^dag (V^dag M^-dag)_11 (M^-1 V)_11 phi + ////////////////////////////////////////////////////// + virtual RealD S(const GaugeField &U) { + + NumOp.ImportGauge(U); + DenOp.ImportGauge(U); + + FermionField Y4(NumOp.GaugeGrid()); + FermionField X(NumOp.FermionGrid()); + FermionField Y(NumOp.FermionGrid()); + FermionField phi5(NumOp.FermionGrid()); + + MdagMLinearOperator ,FermionField> MdagMOp(DenOp); + SchurRedBlackDiagMooeeSolve PrecSolve(ActionSolver); + + NumOp.ImportFourDimPseudoFermion(phi4,phi5); + NumOp.M(phi5,X); // X= V phi + PrecSolve(DenOp,X,Y); // Y= (MdagM)^-1 Mdag Vdag phi = M^-1 V phi + NumOp.ExportFourDimPseudoFermion(Y,Y4); + + RealD action = norm2(Y4); + + return action; + }; + + ////////////////////////////////////////////////////// + // dS/du = 2 Re phi^dag (V^dag M^-dag)_11 (M^-1 d V)_11 phi + // - 2 Re phi^dag (dV^dag M^-dag)_11 (M^-1 dM M^-1 V)_11 phi + ////////////////////////////////////////////////////// + virtual void deriv(const GaugeField &U,GaugeField & dSdU) { + + NumOp.ImportGauge(U); + DenOp.ImportGauge(U); + + FermionField X(NumOp.FermionGrid()); + FermionField Y(NumOp.FermionGrid()); + FermionField phi(NumOp.FermionGrid()); + FermionField Vphi(NumOp.FermionGrid()); + FermionField MinvVphi(NumOp.FermionGrid()); + FermionField tmp4(NumOp.GaugeGrid()); + FermionField MdagInvMinvVphi(NumOp.FermionGrid()); + + GaugeField force(NumOp.GaugeGrid()); + + //Y=V phi + //X = (Mdag V phi + //Y = (Mdag M)^-1 Mdag V phi = M^-1 V Phi + NumOp.ImportFourDimPseudoFermion(phi4,phi); + NumOp.M(phi,Vphi); // V phi + SchurRedBlackDiagMooeeSolve PrecSolve(DerivativeSolver); + PrecSolve(DenOp,Vphi,MinvVphi);// M^-1 V phi + std::cout << GridLogMessage << "4d deriv solve "<< norm2(MinvVphi)<<"\n"; + + // Projects onto the physical space and back + NumOp.ExportFourDimPseudoFermion(MinvVphi,tmp4); + NumOp.ImportFourDimPseudoFermion(tmp4,Y); + + SchurRedBlackDiagMooeeDagSolve PrecDagSolve(DerivativeDagSolver); + // X = proj M^-dag V phi + // Need an adjoint solve + PrecDagSolve(DenOp,Y,MdagInvMinvVphi); + std::cout << GridLogMessage << "4d deriv solve dag "<< norm2(MdagInvMinvVphi)<<"\n"; + + // phi^dag (Vdag Mdag^-1) (M^-1 dV) phi + NumOp.MDeriv(force ,MdagInvMinvVphi , phi, DaggerNo ); dSdU=force; + + // phi^dag (dVdag Mdag^-1) (M^-1 V) phi + NumOp.MDeriv(force , phi, MdagInvMinvVphi ,DaggerYes ); dSdU=dSdU+force; + + // - 2 Re phi^dag (dV^dag M^-dag)_11 (M^-1 dM M^-1 V)_11 phi + DenOp.MDeriv(force,MdagInvMinvVphi,MinvVphi,DaggerNo); dSdU=dSdU-force; + DenOp.MDeriv(force,MinvVphi,MdagInvMinvVphi,DaggerYes); dSdU=dSdU-force; + + dSdU *= -1.0; + //dSdU = - Ta(dSdU); + + }; +}; + +NAMESPACE_END(Grid); + + diff --git a/Grid/qcd/action/scalar/ScalarInteractionAction.h b/Grid/qcd/action/scalar/ScalarInteractionAction.h index e04dd486..7708a489 100644 --- a/Grid/qcd/action/scalar/ScalarInteractionAction.h +++ b/Grid/qcd/action/scalar/ScalarInteractionAction.h @@ -47,7 +47,7 @@ private: const unsigned int N = Impl::Group::Dimension; typedef typename Field::vector_object vobj; - typedef CartesianStencil Stencil; + typedef CartesianStencil Stencil; SimpleCompressor compressor; int npoint = 2 * Ndim; @@ -82,7 +82,7 @@ public: virtual RealD S(const Field &p) { assert(p.Grid()->Nd() == Ndim); - static Stencil phiStencil(p.Grid(), npoint, 0, directions, displacements,0); + static Stencil phiStencil(p.Grid(), npoint, 0, directions, displacements); phiStencil.HaloExchange(p, compressor); Field action(p.Grid()), pshift(p.Grid()), phisquared(p.Grid()); phisquared = p * p; @@ -133,7 +133,7 @@ public: double interm_t = usecond(); // move this outside - static Stencil phiStencil(p.Grid(), npoint, 0, directions, displacements,0); + static Stencil phiStencil(p.Grid(), npoint, 0, directions, displacements); phiStencil.HaloExchange(p, compressor); double halo_t = usecond(); diff --git a/Grid/qcd/gparity/Gparity.h b/Grid/qcd/gparity/Gparity.h new file mode 100644 index 00000000..ce1c70eb --- /dev/null +++ b/Grid/qcd/gparity/Gparity.h @@ -0,0 +1,6 @@ +#ifndef GRID_GPARITY_H_ +#define GRID_GPARITY_H_ + +#include + +#endif diff --git a/Grid/qcd/gparity/GparityFlavour.cc b/Grid/qcd/gparity/GparityFlavour.cc new file mode 100644 index 00000000..4596f96b --- /dev/null +++ b/Grid/qcd/gparity/GparityFlavour.cc @@ -0,0 +1,34 @@ +#include + +NAMESPACE_BEGIN(Grid); + +const std::array GparityFlavour::sigma_mu = {{ + GparityFlavour(GparityFlavour::Algebra::SigmaX), + GparityFlavour(GparityFlavour::Algebra::SigmaY), + GparityFlavour(GparityFlavour::Algebra::SigmaZ) + }}; + +const std::array GparityFlavour::sigma_all = {{ + GparityFlavour(GparityFlavour::Algebra::Identity), + GparityFlavour(GparityFlavour::Algebra::SigmaX), + GparityFlavour(GparityFlavour::Algebra::SigmaY), + GparityFlavour(GparityFlavour::Algebra::SigmaZ), + GparityFlavour(GparityFlavour::Algebra::ProjPlus), + GparityFlavour(GparityFlavour::Algebra::ProjMinus) +}}; + +const std::array GparityFlavour::name = {{ + "SigmaX", + "MinusSigmaX", + "SigmaY", + "MinusSigmaY", + "SigmaZ", + "MinusSigmaZ", + "Identity", + "MinusIdentity", + "ProjPlus", + "MinusProjPlus", + "ProjMinus", + "MinusProjMinus"}}; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/gparity/GparityFlavour.h b/Grid/qcd/gparity/GparityFlavour.h new file mode 100644 index 00000000..b2009235 --- /dev/null +++ b/Grid/qcd/gparity/GparityFlavour.h @@ -0,0 +1,475 @@ +#ifndef GRID_QCD_GPARITY_FLAVOUR_H +#define GRID_QCD_GPARITY_FLAVOUR_H + +//Support for flavour-matrix operations acting on the G-parity flavour index + +#include + +NAMESPACE_BEGIN(Grid); + +class GparityFlavour { + public: + GRID_SERIALIZABLE_ENUM(Algebra, undef, + SigmaX, 0, + MinusSigmaX, 1, + SigmaY, 2, + MinusSigmaY, 3, + SigmaZ, 4, + MinusSigmaZ, 5, + Identity, 6, + MinusIdentity, 7, + ProjPlus, 8, + MinusProjPlus, 9, + ProjMinus, 10, + MinusProjMinus, 11 + ); + static constexpr unsigned int nSigma = 12; + static const std::array name; + static const std::array sigma_mu; + static const std::array sigma_all; + Algebra g; + public: + accelerator GparityFlavour(Algebra initg): g(initg) {} +}; + + + +// 0 1 x vector +// 1 0 +template +accelerator_inline void multFlavourSigmaX(iVector &ret, const iVector &rhs) +{ + ret(0) = rhs(1); + ret(1) = rhs(0); +}; +template +accelerator_inline void lmultFlavourSigmaX(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = rhs(1,0); + ret(0,1) = rhs(1,1); + ret(1,0) = rhs(0,0); + ret(1,1) = rhs(0,1); +}; +template +accelerator_inline void rmultFlavourSigmaX(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = rhs(0,1); + ret(0,1) = rhs(0,0); + ret(1,0) = rhs(1,1); + ret(1,1) = rhs(1,0); +}; + + +template +accelerator_inline void multFlavourMinusSigmaX(iVector &ret, const iVector &rhs) +{ + ret(0) = -rhs(1); + ret(1) = -rhs(0); +}; +template +accelerator_inline void lmultFlavourMinusSigmaX(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = -rhs(1,0); + ret(0,1) = -rhs(1,1); + ret(1,0) = -rhs(0,0); + ret(1,1) = -rhs(0,1); +}; +template +accelerator_inline void rmultFlavourMinusSigmaX(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = -rhs(0,1); + ret(0,1) = -rhs(0,0); + ret(1,0) = -rhs(1,1); + ret(1,1) = -rhs(1,0); +}; + + + + + +// 0 -i x vector +// i 0 +template +accelerator_inline void multFlavourSigmaY(iVector &ret, const iVector &rhs) +{ + ret(0) = timesMinusI(rhs(1)); + ret(1) = timesI(rhs(0)); +}; +template +accelerator_inline void lmultFlavourSigmaY(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = timesMinusI(rhs(1,0)); + ret(0,1) = timesMinusI(rhs(1,1)); + ret(1,0) = timesI(rhs(0,0)); + ret(1,1) = timesI(rhs(0,1)); +}; +template +accelerator_inline void rmultFlavourSigmaY(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = timesI(rhs(0,1)); + ret(0,1) = timesMinusI(rhs(0,0)); + ret(1,0) = timesI(rhs(1,1)); + ret(1,1) = timesMinusI(rhs(1,0)); +}; + +template +accelerator_inline void multFlavourMinusSigmaY(iVector &ret, const iVector &rhs) +{ + ret(0) = timesI(rhs(1)); + ret(1) = timesMinusI(rhs(0)); +}; +template +accelerator_inline void lmultFlavourMinusSigmaY(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = timesI(rhs(1,0)); + ret(0,1) = timesI(rhs(1,1)); + ret(1,0) = timesMinusI(rhs(0,0)); + ret(1,1) = timesMinusI(rhs(0,1)); +}; +template +accelerator_inline void rmultFlavourMinusSigmaY(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = timesMinusI(rhs(0,1)); + ret(0,1) = timesI(rhs(0,0)); + ret(1,0) = timesMinusI(rhs(1,1)); + ret(1,1) = timesI(rhs(1,0)); +}; + + + + + +// 1 0 x vector +// 0 -1 +template +accelerator_inline void multFlavourSigmaZ(iVector &ret, const iVector &rhs) +{ + ret(0) = rhs(0); + ret(1) = -rhs(1); +}; +template +accelerator_inline void lmultFlavourSigmaZ(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = rhs(0,0); + ret(0,1) = rhs(0,1); + ret(1,0) = -rhs(1,0); + ret(1,1) = -rhs(1,1); +}; +template +accelerator_inline void rmultFlavourSigmaZ(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = rhs(0,0); + ret(0,1) = -rhs(0,1); + ret(1,0) = rhs(1,0); + ret(1,1) = -rhs(1,1); +}; + + +template +accelerator_inline void multFlavourMinusSigmaZ(iVector &ret, const iVector &rhs) +{ + ret(0) = -rhs(0); + ret(1) = rhs(1); +}; +template +accelerator_inline void lmultFlavourMinusSigmaZ(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = -rhs(0,0); + ret(0,1) = -rhs(0,1); + ret(1,0) = rhs(1,0); + ret(1,1) = rhs(1,1); +}; +template +accelerator_inline void rmultFlavourMinusSigmaZ(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = -rhs(0,0); + ret(0,1) = rhs(0,1); + ret(1,0) = -rhs(1,0); + ret(1,1) = rhs(1,1); +}; + + + + + + +template +accelerator_inline void multFlavourIdentity(iVector &ret, const iVector &rhs) +{ + ret(0) = rhs(0); + ret(1) = rhs(1); +}; +template +accelerator_inline void lmultFlavourIdentity(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = rhs(0,0); + ret(0,1) = rhs(0,1); + ret(1,0) = rhs(1,0); + ret(1,1) = rhs(1,1); +}; +template +accelerator_inline void rmultFlavourIdentity(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = rhs(0,0); + ret(0,1) = rhs(0,1); + ret(1,0) = rhs(1,0); + ret(1,1) = rhs(1,1); +}; + +template +accelerator_inline void multFlavourMinusIdentity(iVector &ret, const iVector &rhs) +{ + ret(0) = -rhs(0); + ret(1) = -rhs(1); +}; +template +accelerator_inline void lmultFlavourMinusIdentity(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = -rhs(0,0); + ret(0,1) = -rhs(0,1); + ret(1,0) = -rhs(1,0); + ret(1,1) = -rhs(1,1); +}; +template +accelerator_inline void rmultFlavourMinusIdentity(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = -rhs(0,0); + ret(0,1) = -rhs(0,1); + ret(1,0) = -rhs(1,0); + ret(1,1) = -rhs(1,1); +}; + + + + + +//G-parity flavour projection 1/2(1+\sigma_2) +//1 -i +//i 1 +template +accelerator_inline void multFlavourProjPlus(iVector &ret, const iVector &rhs) +{ + ret(0) = 0.5*rhs(0) + 0.5*timesMinusI(rhs(1)); + ret(1) = 0.5*timesI(rhs(0)) + 0.5*rhs(1); +}; +template +accelerator_inline void lmultFlavourProjPlus(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = 0.5*rhs(0,0) + 0.5*timesMinusI(rhs(1,0)); + ret(0,1) = 0.5*rhs(0,1) + 0.5*timesMinusI(rhs(1,1)); + ret(1,0) = 0.5*timesI(rhs(0,0)) + 0.5*rhs(1,0); + ret(1,1) = 0.5*timesI(rhs(0,1)) + 0.5*rhs(1,1); +}; +template +accelerator_inline void rmultFlavourProjPlus(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = 0.5*rhs(0,0) + 0.5*timesI(rhs(0,1)); + ret(0,1) = 0.5*timesMinusI(rhs(0,0)) + 0.5*rhs(0,1); + ret(1,0) = 0.5*rhs(1,0) + 0.5*timesI(rhs(1,1)); + ret(1,1) = 0.5*timesMinusI(rhs(1,0)) + 0.5*rhs(1,1); +}; + + +template +accelerator_inline void multFlavourMinusProjPlus(iVector &ret, const iVector &rhs) +{ + ret(0) = -0.5*rhs(0) + 0.5*timesI(rhs(1)); + ret(1) = 0.5*timesMinusI(rhs(0)) - 0.5*rhs(1); +}; +template +accelerator_inline void lmultFlavourMinusProjPlus(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = -0.5*rhs(0,0) + 0.5*timesI(rhs(1,0)); + ret(0,1) = -0.5*rhs(0,1) + 0.5*timesI(rhs(1,1)); + ret(1,0) = 0.5*timesMinusI(rhs(0,0)) - 0.5*rhs(1,0); + ret(1,1) = 0.5*timesMinusI(rhs(0,1)) - 0.5*rhs(1,1); +}; +template +accelerator_inline void rmultFlavourMinusProjPlus(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = -0.5*rhs(0,0) + 0.5*timesMinusI(rhs(0,1)); + ret(0,1) = 0.5*timesI(rhs(0,0)) - 0.5*rhs(0,1); + ret(1,0) = -0.5*rhs(1,0) + 0.5*timesMinusI(rhs(1,1)); + ret(1,1) = 0.5*timesI(rhs(1,0)) - 0.5*rhs(1,1); +}; + + + + + +//G-parity flavour projection 1/2(1-\sigma_2) +//1 i +//-i 1 +template +accelerator_inline void multFlavourProjMinus(iVector &ret, const iVector &rhs) +{ + ret(0) = 0.5*rhs(0) + 0.5*timesI(rhs(1)); + ret(1) = 0.5*timesMinusI(rhs(0)) + 0.5*rhs(1); +}; +template +accelerator_inline void lmultFlavourProjMinus(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = 0.5*rhs(0,0) + 0.5*timesI(rhs(1,0)); + ret(0,1) = 0.5*rhs(0,1) + 0.5*timesI(rhs(1,1)); + ret(1,0) = 0.5*timesMinusI(rhs(0,0)) + 0.5*rhs(1,0); + ret(1,1) = 0.5*timesMinusI(rhs(0,1)) + 0.5*rhs(1,1); +}; +template +accelerator_inline void rmultFlavourProjMinus(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = 0.5*rhs(0,0) + 0.5*timesMinusI(rhs(0,1)); + ret(0,1) = 0.5*timesI(rhs(0,0)) + 0.5*rhs(0,1); + ret(1,0) = 0.5*rhs(1,0) + 0.5*timesMinusI(rhs(1,1)); + ret(1,1) = 0.5*timesI(rhs(1,0)) + 0.5*rhs(1,1); +}; + + +template +accelerator_inline void multFlavourMinusProjMinus(iVector &ret, const iVector &rhs) +{ + ret(0) = -0.5*rhs(0) + 0.5*timesMinusI(rhs(1)); + ret(1) = 0.5*timesI(rhs(0)) - 0.5*rhs(1); +}; +template +accelerator_inline void lmultFlavourMinusProjMinus(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = -0.5*rhs(0,0) + 0.5*timesMinusI(rhs(1,0)); + ret(0,1) = -0.5*rhs(0,1) + 0.5*timesMinusI(rhs(1,1)); + ret(1,0) = 0.5*timesI(rhs(0,0)) - 0.5*rhs(1,0); + ret(1,1) = 0.5*timesI(rhs(0,1)) - 0.5*rhs(1,1); +}; +template +accelerator_inline void rmultFlavourMinusProjMinus(iMatrix &ret, const iMatrix &rhs) +{ + ret(0,0) = -0.5*rhs(0,0) + 0.5*timesI(rhs(0,1)); + ret(0,1) = 0.5*timesMinusI(rhs(0,0)) - 0.5*rhs(0,1); + ret(1,0) = -0.5*rhs(1,0) + 0.5*timesI(rhs(1,1)); + ret(1,1) = 0.5*timesMinusI(rhs(1,0)) - 0.5*rhs(1,1); +}; + + + + + + + + + + +template +accelerator_inline auto operator*(const GparityFlavour &G, const iVector &arg) +->typename std::enable_if, GparityFlavourTensorIndex>::value, iVector>::type +{ + iVector ret; + + switch (G.g) + { + case GparityFlavour::Algebra::SigmaX: + multFlavourSigmaX(ret, arg); break; + case GparityFlavour::Algebra::MinusSigmaX: + multFlavourMinusSigmaX(ret, arg); break; + case GparityFlavour::Algebra::SigmaY: + multFlavourSigmaY(ret, arg); break; + case GparityFlavour::Algebra::MinusSigmaY: + multFlavourMinusSigmaY(ret, arg); break; + case GparityFlavour::Algebra::SigmaZ: + multFlavourSigmaZ(ret, arg); break; + case GparityFlavour::Algebra::MinusSigmaZ: + multFlavourMinusSigmaZ(ret, arg); break; + case GparityFlavour::Algebra::Identity: + multFlavourIdentity(ret, arg); break; + case GparityFlavour::Algebra::MinusIdentity: + multFlavourMinusIdentity(ret, arg); break; + case GparityFlavour::Algebra::ProjPlus: + multFlavourProjPlus(ret, arg); break; + case GparityFlavour::Algebra::MinusProjPlus: + multFlavourMinusProjPlus(ret, arg); break; + case GparityFlavour::Algebra::ProjMinus: + multFlavourProjMinus(ret, arg); break; + case GparityFlavour::Algebra::MinusProjMinus: + multFlavourMinusProjMinus(ret, arg); break; + default: assert(0); + } + + return ret; +} + +template +accelerator_inline auto operator*(const GparityFlavour &G, const iMatrix &arg) +->typename std::enable_if, GparityFlavourTensorIndex>::value, iMatrix>::type +{ + iMatrix ret; + + switch (G.g) + { + case GparityFlavour::Algebra::SigmaX: + lmultFlavourSigmaX(ret, arg); break; + case GparityFlavour::Algebra::MinusSigmaX: + lmultFlavourMinusSigmaX(ret, arg); break; + case GparityFlavour::Algebra::SigmaY: + lmultFlavourSigmaY(ret, arg); break; + case GparityFlavour::Algebra::MinusSigmaY: + lmultFlavourMinusSigmaY(ret, arg); break; + case GparityFlavour::Algebra::SigmaZ: + lmultFlavourSigmaZ(ret, arg); break; + case GparityFlavour::Algebra::MinusSigmaZ: + lmultFlavourMinusSigmaZ(ret, arg); break; + case GparityFlavour::Algebra::Identity: + lmultFlavourIdentity(ret, arg); break; + case GparityFlavour::Algebra::MinusIdentity: + lmultFlavourMinusIdentity(ret, arg); break; + case GparityFlavour::Algebra::ProjPlus: + lmultFlavourProjPlus(ret, arg); break; + case GparityFlavour::Algebra::MinusProjPlus: + lmultFlavourMinusProjPlus(ret, arg); break; + case GparityFlavour::Algebra::ProjMinus: + lmultFlavourProjMinus(ret, arg); break; + case GparityFlavour::Algebra::MinusProjMinus: + lmultFlavourMinusProjMinus(ret, arg); break; + default: assert(0); + } + + return ret; +} + +template +accelerator_inline auto operator*(const iMatrix &arg, const GparityFlavour &G) +->typename std::enable_if, GparityFlavourTensorIndex>::value, iMatrix>::type +{ + iMatrix ret; + + switch (G.g) + { + case GparityFlavour::Algebra::SigmaX: + rmultFlavourSigmaX(ret, arg); break; + case GparityFlavour::Algebra::MinusSigmaX: + rmultFlavourMinusSigmaX(ret, arg); break; + case GparityFlavour::Algebra::SigmaY: + rmultFlavourSigmaY(ret, arg); break; + case GparityFlavour::Algebra::MinusSigmaY: + rmultFlavourMinusSigmaY(ret, arg); break; + case GparityFlavour::Algebra::SigmaZ: + rmultFlavourSigmaZ(ret, arg); break; + case GparityFlavour::Algebra::MinusSigmaZ: + rmultFlavourMinusSigmaZ(ret, arg); break; + case GparityFlavour::Algebra::Identity: + rmultFlavourIdentity(ret, arg); break; + case GparityFlavour::Algebra::MinusIdentity: + rmultFlavourMinusIdentity(ret, arg); break; + case GparityFlavour::Algebra::ProjPlus: + rmultFlavourProjPlus(ret, arg); break; + case GparityFlavour::Algebra::MinusProjPlus: + rmultFlavourMinusProjPlus(ret, arg); break; + case GparityFlavour::Algebra::ProjMinus: + rmultFlavourProjMinus(ret, arg); break; + case GparityFlavour::Algebra::MinusProjMinus: + rmultFlavourMinusProjMinus(ret, arg); break; + default: assert(0); + } + + return ret; +} + +NAMESPACE_END(Grid); + +#endif // include guard diff --git a/Grid/qcd/hmc/GenericHMCrunner.h b/Grid/qcd/hmc/GenericHMCrunner.h index 98e8175a..727b3e24 100644 --- a/Grid/qcd/hmc/GenericHMCrunner.h +++ b/Grid/qcd/hmc/GenericHMCrunner.h @@ -129,18 +129,10 @@ public: Runner(S); } - ////////////////////////////////////////////////////////////////// - -private: - template - void Runner(SmearingPolicy &Smearing) { - auto UGrid = Resources.GetCartesian(); - Resources.AddRNGs(); - Field U(UGrid); - - // Can move this outside? - typedef IntegratorType TheIntegrator; - TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing); + //Use the checkpointer to initialize the RNGs and the gauge field, writing the resulting gauge field into U. + //This is called automatically by Run but may be useful elsewhere, e.g. for integrator tuning experiments + void initializeGaugeFieldAndRNGs(Field &U){ + if(!Resources.haveRNGs()) Resources.AddRNGs(); if (Parameters.StartingType == "HotStart") { // Hot start @@ -159,14 +151,43 @@ private: Resources.GetCheckPointer()->CheckpointRestore(Parameters.StartTrajectory, U, Resources.GetSerialRNG(), Resources.GetParallelRNG()); + } else if (Parameters.StartingType == "CheckpointStartReseed") { + // Same as CheckpointRestart but reseed the RNGs using the fixed integer seeding used for ColdStart and HotStart + // Useful for creating new evolution streams from an existing stream + + // WARNING: Unfortunately because the checkpointer doesn't presently allow us to separately restore the RNG and gauge fields we have to load + // an existing RNG checkpoint first; make sure one is available and named correctly + Resources.GetCheckPointer()->CheckpointRestore(Parameters.StartTrajectory, U, + Resources.GetSerialRNG(), + Resources.GetParallelRNG()); + Resources.SeedFixedIntegers(); } else { // others std::cout << GridLogError << "Unrecognized StartingType\n"; std::cout << GridLogError - << "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + << "Valid [HotStart, ColdStart, TepidStart, CheckpointStart, CheckpointStartReseed]\n"; exit(1); } + } + + + + ////////////////////////////////////////////////////////////////// + +private: + template + void Runner(SmearingPolicy &Smearing) { + auto UGrid = Resources.GetCartesian(); + Field U(UGrid); + + initializeGaugeFieldAndRNGs(U); + + typedef IntegratorType TheIntegrator; + TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing); + + // Sets the momentum filter + MDynamics.setMomentumFilter(*(Resources.GetMomentumFilter())); Smearing.set_Field(U); diff --git a/Grid/qcd/hmc/HMC.h b/Grid/qcd/hmc/HMC.h index 44674ea5..d4739fb0 100644 --- a/Grid/qcd/hmc/HMC.h +++ b/Grid/qcd/hmc/HMC.h @@ -34,6 +34,7 @@ directory * @brief Classes for Hybrid Monte Carlo update * * @author Guido Cossu + * @author Peter Boyle */ //-------------------------------------------------------------------- #pragma once @@ -52,6 +53,7 @@ struct HMCparameters: Serializable { Integer, Trajectories, /* @brief Number of sweeps in this run */ bool, MetropolisTest, Integer, NoMetropolisUntil, + bool, PerformRandomShift, /* @brief Randomly shift the gauge configuration at the start of a trajectory */ std::string, StartingType, IntegratorParameters, MD) @@ -62,6 +64,7 @@ struct HMCparameters: Serializable { StartTrajectory = 0; Trajectories = 10; StartingType = "HotStart"; + PerformRandomShift = true; ///////////////////////////////// } @@ -82,6 +85,7 @@ struct HMCparameters: Serializable { std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n"; std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n"; std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n"; + std::cout << GridLogMessage << "[HMC parameters] Doing random shift : " << std::boolalpha << PerformRandomShift << "\n"; std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n"; MD.print_parameters(); } @@ -94,6 +98,7 @@ private: const HMCparameters Params; typedef typename IntegratorType::Field Field; + typedef typename IntegratorType::FieldImplementation FieldImplementation; typedef std::vector< HmcObservable * > ObsListType; //pass these from the resource manager @@ -115,22 +120,17 @@ private: random(sRNG, rn_test); - std::cout << GridLogMessage - << "--------------------------------------------------\n"; - std::cout << GridLogMessage << "exp(-dH) = " << prob - << " Random = " << rn_test << "\n"; - std::cout << GridLogMessage - << "Acc. Probability = " << ((prob < 1.0) ? prob : 1.0) << "\n"; + std::cout << GridLogHMC << "--------------------------------------------------\n"; + std::cout << GridLogHMC << "exp(-dH) = " << prob << " Random = " << rn_test << "\n"; + std::cout << GridLogHMC << "Acc. Probability = " << ((prob < 1.0) ? prob : 1.0) << "\n"; if ((prob > 1.0) || (rn_test <= prob)) { // accepted - std::cout << GridLogMessage << "Metropolis_test -- ACCEPTED\n"; - std::cout << GridLogMessage - << "--------------------------------------------------\n"; + std::cout << GridLogHMC << "Metropolis_test -- ACCEPTED\n"; + std::cout << GridLogHMC << "--------------------------------------------------\n"; return true; } else { // rejected - std::cout << GridLogMessage << "Metropolis_test -- REJECTED\n"; - std::cout << GridLogMessage - << "--------------------------------------------------\n"; + std::cout << GridLogHMC << "Metropolis_test -- REJECTED\n"; + std::cout << GridLogHMC << "--------------------------------------------------\n"; return false; } } @@ -139,19 +139,80 @@ private: // Evolution ///////////////////////////////////////////////////////// RealD evolve_hmc_step(Field &U) { - TheIntegrator.refresh(U, sRNG, pRNG); // set U and initialize P and phi's - RealD H0 = TheIntegrator.S(U); // initial state action + GridBase *Grid = U.Grid(); + + if(Params.PerformRandomShift){ +#if 0 + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // Mainly for DDHMC perform a random translation of U modulo volume + ////////////////////////////////////////////////////////////////////////////////////////////////////// + std::cout << GridLogMessage << "--------------------------------------------------\n"; + std::cout << GridLogMessage << "Random shifting gauge field by ["; + + std::vector Umu(Grid->Nd(), U.Grid()); + for(int mu=0;muNd();mu++) Umu[mu] = PeekIndex(U, mu); + + for(int d=0;dNd();d++) { + + int L = Grid->GlobalDimensions()[d]; + + RealD rn_uniform; random(sRNG, rn_uniform); + + int shift = (int) (rn_uniform*L); + + std::cout << shift; + if(dNd()-1) std::cout <<","; + else std::cout <<"]\n"; + + //shift all fields together in a way that respects the gauge BCs + for(int mu=0; mu < Grid->Nd(); mu++) + Umu[mu] = FieldImplementation::CshiftLink(Umu[mu],d,shift); + + for(int mu=0;muNd();mu++) PokeIndex(U,Umu[mu],mu); + } + std::cout << GridLogMessage << "--------------------------------------------------\n"; +#endif + } + + TheIntegrator.reset_timer(); + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // set U and initialize P and phi's + ////////////////////////////////////////////////////////////////////////////////////////////////////// + std::cout << GridLogMessage << "--------------------------------------------------\n"; + std::cout << GridLogMessage << "Refresh momenta and pseudofermions"; + TheIntegrator.refresh(U, sRNG, pRNG); + std::cout << GridLogMessage << "--------------------------------------------------\n"; + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // initial state action + ////////////////////////////////////////////////////////////////////////////////////////////////////// + std::cout << GridLogMessage << "--------------------------------------------------\n"; + std::cout << GridLogMessage << "Compute initial action"; + RealD H0 = TheIntegrator.Sinitial(U); + std::cout << GridLogMessage << "--------------------------------------------------\n"; std::streamsize current_precision = std::cout.precision(); std::cout.precision(15); - std::cout << GridLogMessage << "Total H before trajectory = " << H0 << "\n"; + std::cout << GridLogHMC << "Total H before trajectory = " << H0 << "\n"; std::cout.precision(current_precision); + std::cout << GridLogMessage << "--------------------------------------------------\n"; + std::cout << GridLogMessage << " Molecular Dynamics evolution "; TheIntegrator.integrate(U); + std::cout << GridLogMessage << "--------------------------------------------------\n"; - RealD H1 = TheIntegrator.S(U); // updated state action + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // updated state action + ////////////////////////////////////////////////////////////////////////////////////////////////////// + std::cout << GridLogMessage << "--------------------------------------------------\n"; + std::cout << GridLogMessage << "Compute final action"; + RealD H1 = TheIntegrator.S(U); + std::cout << GridLogMessage << "--------------------------------------------------\n"; + + /////////////////////////////////////////////////////////// if(0){ std::cout << "------------------------- Reversibility test" << std::endl; @@ -163,17 +224,16 @@ private: } /////////////////////////////////////////////////////////// - std::cout.precision(15); - std::cout << GridLogMessage << "Total H after trajectory = " << H1 - << " dH = " << H1 - H0 << "\n"; + + std::cout << GridLogHMC << "--------------------------------------------------\n"; + std::cout << GridLogHMC << "Total H after trajectory = " << H1 << " dH = " << H1 - H0 << "\n"; + std::cout << GridLogHMC << "--------------------------------------------------\n"; + std::cout.precision(current_precision); return (H1 - H0); } - - - public: ///////////////////////////////////////// @@ -195,10 +255,13 @@ public: // Actual updates (evolve a copy Ucopy then copy back eventually) unsigned int FinalTrajectory = Params.Trajectories + Params.NoMetropolisUntil + Params.StartTrajectory; + for (int traj = Params.StartTrajectory; traj < FinalTrajectory; ++traj) { - std::cout << GridLogMessage << "-- # Trajectory = " << traj << "\n"; + + std::cout << GridLogHMC << "-- # Trajectory = " << traj << "\n"; + if (traj < Params.StartTrajectory + Params.NoMetropolisUntil) { - std::cout << GridLogMessage << "-- Thermalization" << std::endl; + std::cout << GridLogHMC << "-- Thermalization" << std::endl; } double t0=usecond(); @@ -207,20 +270,19 @@ public: DeltaH = evolve_hmc_step(Ucopy); // Metropolis-Hastings test bool accept = true; - if (traj >= Params.StartTrajectory + Params.NoMetropolisUntil) { + if (Params.MetropolisTest && traj >= Params.StartTrajectory + Params.NoMetropolisUntil) { accept = metropolis_test(DeltaH); } else { - std::cout << GridLogMessage << "Skipping Metropolis test" << std::endl; + std::cout << GridLogHMC << "Skipping Metropolis test" << std::endl; } if (accept) Ucur = Ucopy; - - double t1=usecond(); - std::cout << GridLogMessage << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl; + std::cout << GridLogHMC << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl; + TheIntegrator.print_timer(); for (int obs = 0; obs < Observables.size(); obs++) { std::cout << GridLogDebug << "Observables # " << obs << std::endl; @@ -228,7 +290,7 @@ public: std::cout << GridLogDebug << "Observables pointer " << Observables[obs] << std::endl; Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG); } - std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl; + std::cout << GridLogHMC << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl; } } diff --git a/Grid/qcd/hmc/HMCModules.h b/Grid/qcd/hmc/HMCModules.h index 4c61a006..cf0edd26 100644 --- a/Grid/qcd/hmc/HMCModules.h +++ b/Grid/qcd/hmc/HMCModules.h @@ -80,7 +80,9 @@ public: std::cout << GridLogError << "Seeds not initialized" << std::endl; exit(1); } + std::cout << GridLogMessage << "Reseeding serial RNG with seed vector " << SerialSeeds << std::endl; sRNG_.SeedFixedIntegers(SerialSeeds); + std::cout << GridLogMessage << "Reseeding parallel RNG with seed vector " << ParallelSeeds << std::endl; pRNG_->SeedFixedIntegers(ParallelSeeds); } }; diff --git a/Grid/qcd/hmc/HMCResourceManager.h b/Grid/qcd/hmc/HMCResourceManager.h index 783e4890..19bee923 100644 --- a/Grid/qcd/hmc/HMCResourceManager.h +++ b/Grid/qcd/hmc/HMCResourceManager.h @@ -72,6 +72,8 @@ class HMCResourceManager { typedef HMCModuleBase< BaseHmcCheckpointer > CheckpointerBaseModule; typedef HMCModuleBase< HmcObservable > ObservableBaseModule; typedef ActionModuleBase< Action, GridModule > ActionBaseModule; + typedef typename ImplementationPolicy::Field MomentaField; + typedef typename ImplementationPolicy::Field Field; // Named storage for grid pairs (std + red-black) std::unordered_map Grids; @@ -80,6 +82,9 @@ class HMCResourceManager { // SmearingModule Smearing; std::unique_ptr CP; + // Momentum filter + std::unique_ptr > Filter; + // A vector of HmcObservable modules std::vector > ObservablesList; @@ -90,6 +95,7 @@ class HMCResourceManager { bool have_RNG; bool have_CheckPointer; + bool have_Filter; // NOTE: operator << is not overloaded for std::vector // so this function is necessary @@ -101,7 +107,7 @@ class HMCResourceManager { public: - HMCResourceManager() : have_RNG(false), have_CheckPointer(false) {} + HMCResourceManager() : have_RNG(false), have_CheckPointer(false), have_Filter(false) {} template void initialize(ReaderClass &Read){ @@ -129,6 +135,7 @@ public: RNGModuleParameters RNGpar(Read); SetRNGSeeds(RNGpar); + // Observables auto &ObsFactory = HMC_ObservablesModuleFactory::getInstance(); Read.push(observable_string);// here must check if existing... @@ -208,6 +215,16 @@ public: AddGrid(s, Mod); } + void SetMomentumFilter( MomentumFilterBase * MomFilter) { + assert(have_Filter==false); + Filter = std::unique_ptr >(MomFilter); + have_Filter = true; + } + MomentumFilterBase *GetMomentumFilter(void) { + if ( !have_Filter) + SetMomentumFilter(new MomentumFilterNone()); + return Filter.get(); + } GridCartesian* GetCartesian(std::string s = "") { if (s.empty()) s = Grids.begin()->first; @@ -226,6 +243,9 @@ public: ////////////////////////////////////////////////////// // Random number generators ////////////////////////////////////////////////////// + + //Return true if the RNG objects have been instantiated + bool haveRNGs() const{ return have_RNG; } void AddRNGs(std::string s = "") { // Couple the RNGs to the GridModule tagged by s diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h index aa28c6c8..62e7ad44 100644 --- a/Grid/qcd/hmc/integrators/Integrator.h +++ b/Grid/qcd/hmc/integrators/Integrator.h @@ -33,7 +33,6 @@ directory #define INTEGRATOR_INCLUDED #include -#include "MomentumFilter.h" NAMESPACE_BEGIN(Grid); @@ -64,9 +63,10 @@ public: }; /*! @brief Class for Molecular Dynamics management */ -template +template class Integrator { protected: + typedef FieldImplementation_ FieldImplementation; typedef typename FieldImplementation::Field MomentaField; //for readability typedef typename FieldImplementation::Field Field; @@ -119,36 +119,65 @@ protected: } } update_P_hireps{}; + void update_P(MomentaField& Mom, Field& U, int level, double ep) { // input U actually not used in the fundamental case // Fundamental updates, include smearing for (int a = 0; a < as[level].actions.size(); ++a) { + double start_full = usecond(); Field force(U.Grid()); conformable(U.Grid(), Mom.Grid()); Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); double start_force = usecond(); + + std::cout << GridLogMessage << "AuditForce["<deriv_timer_start(); as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta + as[level].actions.at(a)->deriv_timer_stop(); + + std::cout << GridLogMessage << "AuditForce["<is_smeared << std::endl; + auto name = as[level].actions.at(a)->action_name(); if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); + force = FieldImplementation::projectForce(force); // Ta for gauge fields double end_force = usecond(); - Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); - std::cout << GridLogIntegrator << "["<applyFilter(force); + std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x]) + Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR; + + Real force_max = std::sqrt(maxLocalNorm2(force)); + Real impulse_max = force_max * ep * HMC_MOMENTUM_DENOMINATOR; + + as[level].actions.at(a)->deriv_log(force_abs,force_max,impulse_abs,impulse_max); + + std::cout << GridLogIntegrator<< "["<applyFilter(Mom); } void update_U(Field& U, double ep) @@ -162,8 +191,12 @@ protected: void update_U(MomentaField& Mom, Field& U, double ep) { + MomentaField MomFiltered(Mom.Grid()); + MomFiltered = Mom; + MomFilter->applyFilter(MomFiltered); + // exponential of Mom*U in the gauge fields case - FieldImplementation::update_field(Mom, U, ep); + FieldImplementation::update_field(MomFiltered, U, ep); // Update the smeared fields, can be implemented as observer Smearer.set_Field(U); @@ -206,6 +239,77 @@ public: const MomentaField & getMomentum() const{ return P; } + void reset_timer(void) + { + for (int level = 0; level < as.size(); ++level) { + for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { + as[level].actions.at(actionID)->reset_timer(); + } + } + } + void print_timer(void) + { + std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::" << std::endl; + std::cout << GridLogMessage << " Refresh cumulative timings "<action_name() + <<"["<refresh_us*1.0e-6<<" s"<< std::endl; + } + } + std::cout << GridLogMessage << "--------------------------- "<action_name() + <<"["<S_us*1.0e-6<<" s"<< std::endl; + } + } + std::cout << GridLogMessage << "--------------------------- "<action_name() + <<"["<deriv_us*1.0e-6<<" s"<< std::endl; + } + } + std::cout << GridLogMessage << "--------------------------- "<action_name() + <<"["<deriv_max_average() + <<" norm " << as[level].actions.at(actionID)->deriv_norm_average() + <<" Fdt max " << as[level].actions.at(actionID)->Fdt_max_average() + <<" Fdt norm " << as[level].actions.at(actionID)->Fdt_norm_average() + <<" calls " << as[level].actions.at(actionID)->deriv_num + << std::endl; + } + } + std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl; + } + void print_parameters() { std::cout << GridLogMessage << "[Integrator] Name : "<< integrator_name() << std::endl; @@ -224,7 +328,6 @@ public: } } std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl; - } void reverse_momenta() @@ -249,15 +352,19 @@ public: void refresh(Field& U, GridSerialRNG & sRNG, GridParallelRNG& pRNG) { assert(P.Grid() == U.Grid()); - std::cout << GridLogIntegrator << "Integrator refresh\n"; + std::cout << GridLogIntegrator << "Integrator refresh" << std::endl; + std::cout << GridLogIntegrator << "Generating momentum" << std::endl; FieldImplementation::generate_momenta(P, sRNG, pRNG); // Update the smeared fields, can be implemented as observer // necessary to keep the fields updated even after a reject // of the Metropolis + std::cout << GridLogIntegrator << "Updating smeared fields" << std::endl; Smearer.set_Field(U); // Set the (eventual) representations gauge fields + + std::cout << GridLogIntegrator << "Updating representations" << std::endl; Representations.update(U); // The Smearer is attached to a pointer of the gauge field @@ -267,15 +374,24 @@ public: for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { // get gauge field from the SmearingPolicy and // based on the boolean is_smeared in actionID + auto name = as[level].actions.at(actionID)->action_name(); + std::cout << GridLogMessage << "refresh [" << level << "][" << actionID << "] "<is_smeared); + + std::cout << GridLogMessage << "AuditRefresh["<refresh_timer_start(); as[level].actions.at(actionID)->refresh(Us, sRNG, pRNG); + as[level].actions.at(actionID)->refresh_timer_stop(); + std::cout << GridLogMessage << "AuditRefresh["<applyFilter(P); } // to be used by the actionlevel class to iterate @@ -306,13 +422,17 @@ public: // Actions for (int level = 0; level < as.size(); ++level) { for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { + // get gauge field from the SmearingPolicy and // based on the boolean is_smeared in actionID Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared); std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl; + as[level].actions.at(actionID)->S_timer_start(); Hterm = as[level].actions.at(actionID)->S(Us); + as[level].actions.at(actionID)->S_timer_stop(); std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl; H += Hterm; + } as[level].apply(S_hireps, Representations, level, H); } @@ -320,6 +440,52 @@ public: return H; } + struct _Sinitial { + template + void operator()(std::vector*> repr_set, Repr& Rep, int level, RealD& H) { + + for (int a = 0; a < repr_set.size(); ++a) { + + RealD Hterm = repr_set.at(a)->Sinitial(Rep.U); + + std::cout << GridLogMessage << "Sinitial Level " << level << " term " << a << " H Hirep = " << Hterm << std::endl; + H += Hterm; + + } + } + } Sinitial_hireps{}; + + RealD Sinitial(Field& U) + { // here also U not used + + std::cout << GridLogIntegrator << "Integrator initial action\n"; + + RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom + + RealD Hterm; + + // Actions + for (int level = 0; level < as.size(); ++level) { + for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { + // get gauge field from the SmearingPolicy and + // based on the boolean is_smeared in actionID + Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared); + std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl; + as[level].actions.at(actionID)->S_timer_start(); + + Hterm = as[level].actions.at(actionID)->Sinitial(Us); + as[level].actions.at(actionID)->S_timer_stop(); + + std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl; + H += Hterm; + } + as[level].apply(Sinitial_hireps, Representations, level, H); + } + + return H; + } + + void integrate(Field& U) { // reset the clocks diff --git a/Grid/qcd/hmc/integrators/Integrator_algorithm.h b/Grid/qcd/hmc/integrators/Integrator_algorithm.h index b05c4ea8..9c70fd1f 100644 --- a/Grid/qcd/hmc/integrators/Integrator_algorithm.h +++ b/Grid/qcd/hmc/integrators/Integrator_algorithm.h @@ -92,10 +92,11 @@ NAMESPACE_BEGIN(Grid); * P 1/2 P 1/2 */ -template > -class LeapFrog : public Integrator +template > +class LeapFrog : public Integrator { public: + typedef FieldImplementation_ FieldImplementation; typedef LeapFrog Algorithm; INHERIT_FIELD_TYPES(FieldImplementation); @@ -135,13 +136,14 @@ public: } }; -template > -class MinimumNorm2 : public Integrator +template > +class MinimumNorm2 : public Integrator { private: const RealD lambda = 0.1931833275037836; public: + typedef FieldImplementation_ FieldImplementation; INHERIT_FIELD_TYPES(FieldImplementation); MinimumNorm2(GridBase* grid, IntegratorParameters Par, ActionSet& Aset, SmearingPolicy& Sm) @@ -192,8 +194,8 @@ public: } }; -template > -class ForceGradient : public Integrator +template > +class ForceGradient : public Integrator { private: const RealD lambda = 1.0 / 6.0; @@ -202,6 +204,7 @@ private: const RealD theta = 0.0; public: + typedef FieldImplementation_ FieldImplementation; INHERIT_FIELD_TYPES(FieldImplementation); // Looks like dH scales as dt^4. tested wilson/wilson 2 level. @@ -227,7 +230,8 @@ public: // Presently 4 force evals, and should have 3, so 1.33x too expensive. // could reduce this with sloppy CG to perhaps 1.15x too expensive // even without prediction. - this->update_P(Pfg, Ufg, level, 1.0); + this->update_P(Pfg, Ufg, level, fg_dt); + Pfg = Pfg*(1.0/fg_dt); this->update_U(Pfg, Ufg, fg_dt); this->update_P(Ufg, level, ep); } diff --git a/Grid/qcd/modules/Registration.h b/Grid/qcd/modules/Registration.h index 28a9fdae..db43abe1 100644 --- a/Grid/qcd/modules/Registration.h +++ b/Grid/qcd/modules/Registration.h @@ -78,13 +78,13 @@ static Registrar, // Now a specific registration with a fermion field // here must instantiate CG and CR for every new fermion field type (macro!!) -static Registrar< ConjugateGradientModule, - HMC_SolverModuleFactory > __CGWFmodXMLInit("ConjugateGradient"); +static Registrar< ConjugateGradientModule, + HMC_SolverModuleFactory > __CGWFmodXMLInit("ConjugateGradient"); -static Registrar< BiCGSTABModule, - HMC_SolverModuleFactory > __BiCGWFmodXMLInit("BiCGSTAB"); -static Registrar< ConjugateResidualModule, - HMC_SolverModuleFactory > __CRWFmodXMLInit("ConjugateResidual"); +static Registrar< BiCGSTABModule, + HMC_SolverModuleFactory > __BiCGWFmodXMLInit("BiCGSTAB"); +static Registrar< ConjugateResidualModule, + HMC_SolverModuleFactory > __CRWFmodXMLInit("ConjugateResidual"); // add the staggered, scalar versions here diff --git a/Grid/qcd/observables/topological_charge.h b/Grid/qcd/observables/topological_charge.h index 4f116496..220ed738 100644 --- a/Grid/qcd/observables/topological_charge.h +++ b/Grid/qcd/observables/topological_charge.h @@ -31,15 +31,16 @@ directory NAMESPACE_BEGIN(Grid); + struct TopologySmearingParameters : Serializable { GRID_SERIALIZABLE_CLASS_MEMBERS(TopologySmearingParameters, - int, steps, - float, step_size, int, meas_interval, - float, maxTau); + float, init_step_size, + float, maxTau, + float, tolerance); - TopologySmearingParameters(int s = 0, float ss = 0.0f, int mi = 0, float mT = 0.0f): - steps(s), step_size(ss), meas_interval(mi), maxTau(mT){} + TopologySmearingParameters(float ss = 0.0f, int mi = 0, float mT = 0.0f, float tol = 1e-4): + init_step_size(ss), meas_interval(mi), maxTau(mT), tolerance(tol){} template < class ReaderClass > TopologySmearingParameters(Reader& Reader){ @@ -97,9 +98,9 @@ public: if (Pars.do_smearing){ // using wilson flow by default here - WilsonFlow WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval); - WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau); - Real T0 = WF.energyDensityPlaquette(Usmear); + WilsonFlowAdaptive WF(Pars.Smearing.init_step_size, Pars.Smearing.maxTau, Pars.Smearing.tolerance, Pars.Smearing.meas_interval); + WF.smear(Usmear, U); + Real T0 = WF.energyDensityPlaquette(Pars.Smearing.maxTau, Usmear); std::cout << GridLogMessage << std::setprecision(std::numeric_limits::digits10 + 1) << "T0 : [ " << traj << " ] "<< T0 << std::endl; } diff --git a/Grid/qcd/smearing/WilsonFlow.h b/Grid/qcd/smearing/WilsonFlow.h index 19fd94e2..f169d02b 100644 --- a/Grid/qcd/smearing/WilsonFlow.h +++ b/Grid/qcd/smearing/WilsonFlow.h @@ -7,6 +7,7 @@ Source file: ./lib/qcd/modules/plaquette.h Copyright (C) 2017 Author: Guido Cossu +Author: Christopher Kelly This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,177 +33,318 @@ directory NAMESPACE_BEGIN(Grid); template -class WilsonFlow: public Smear{ - unsigned int Nstep; - unsigned int measure_interval; - mutable RealD epsilon, taus; +class WilsonFlowBase: public Smear{ +public: + //Store generic measurements to take during smearing process using std::function + typedef std::function FunctionType; //int: step, RealD: flow time, GaugeField : the gauge field +protected: + std::vector< std::pair > functions; //The int maps to the measurement frequency mutable WilsonGaugeAction SG; - - void evolve_step(typename Gimpl::GaugeField&) const; - void evolve_step_adaptive(typename Gimpl::GaugeField&, RealD); - RealD tau(unsigned int t)const {return epsilon*(t+1.0); } - + public: INHERIT_GIMPL_TYPES(Gimpl) - explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1): - Nstep(Nstep), - epsilon(epsilon), - measure_interval(interval), + explicit WilsonFlowBase(unsigned int meas_interval =1): SG(WilsonGaugeAction(3.0)) { // WilsonGaugeAction with beta 3.0 - assert(epsilon > 0.0); - LogMessage(); + setDefaultMeasurements(meas_interval); } + + void resetActions(){ functions.clear(); } - void LogMessage() { - std::cout << GridLogMessage - << "[WilsonFlow] Nstep : " << Nstep << std::endl; - std::cout << GridLogMessage - << "[WilsonFlow] epsilon : " << epsilon << std::endl; - std::cout << GridLogMessage - << "[WilsonFlow] full trajectory : " << Nstep * epsilon << std::endl; - } + void addMeasurement(int meas_interval, FunctionType meas){ functions.push_back({meas_interval, meas}); } - virtual void smear(GaugeField&, const GaugeField&) const; + //Set the class to perform the default measurements: + //the plaquette energy density every step + //the plaquette topological charge every 'topq_meas_interval' steps + //and output to stdout + void setDefaultMeasurements(int topq_meas_interval = 1); - virtual void derivative(GaugeField&, const GaugeField&, const GaugeField&) const { + void derivative(GaugeField&, const GaugeField&, const GaugeField&) const override{ assert(0); // undefined for WilsonFlow } - void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau); - RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const; - RealD energyDensityPlaquette(const GaugeField& U) const; + //Compute t^2 for time t from the plaquette + static RealD energyDensityPlaquette(const RealD t, const GaugeField& U); + + //Compute t^2 for time t from the 1x1 cloverleaf form + //t is the Wilson flow time + static RealD energyDensityCloverleaf(const RealD t, const GaugeField& U); + + //Evolve the gauge field by Nstep steps of epsilon and return the energy density computed every interval steps + //The smeared field is output as V + std::vector flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval = 1); + + //Version that does not return the smeared field + std::vector flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval = 1); + + + //Evolve the gauge field by Nstep steps of epsilon and return the Cloverleaf energy density computed every interval steps + //The smeared field is output as V + std::vector flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval = 1); + + //Version that does not return the smeared field + std::vector flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval = 1); }; +//Basic iterative Wilson flow +template +class WilsonFlow: public WilsonFlowBase{ +private: + int Nstep; //number of steps + RealD epsilon; //step size + + //Evolve the gauge field by 1 step of size eps and update tau + void evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const; + +public: + INHERIT_GIMPL_TYPES(Gimpl) + + //Integrate the Wilson flow for Nstep steps of size epsilon + WilsonFlow(const RealD epsilon, const int Nstep, unsigned int meas_interval = 1): WilsonFlowBase(meas_interval), Nstep(Nstep), epsilon(epsilon){} + + void smear(GaugeField& out, const GaugeField& in) const override; +}; + +//Wilson flow with adaptive step size +template +class WilsonFlowAdaptive: public WilsonFlowBase{ +private: + RealD init_epsilon; //initial step size + RealD maxTau; //integrate to t=maxTau + RealD tolerance; //integration error tolerance + + //Evolve the gauge field by 1 step and update tau and the current time step eps + // + //If the step size eps is too large that a significant integration error results, + //the gauge field (U) and tau will not be updated and the function will return 0; eps will be adjusted to a smaller + //value for the next iteration. + // + //For a successful integration step the function will return 1 + int evolve_step_adaptive(typename Gimpl::GaugeField&U, RealD &tau, RealD &eps) const; + +public: + INHERIT_GIMPL_TYPES(Gimpl) + + WilsonFlowAdaptive(const RealD init_epsilon, const RealD maxTau, const RealD tolerance, unsigned int meas_interval = 1): + WilsonFlowBase(meas_interval), init_epsilon(init_epsilon), maxTau(maxTau), tolerance(tolerance){} + + void smear(GaugeField& out, const GaugeField& in) const override; +}; //////////////////////////////////////////////////////////////////////////////// // Implementations //////////////////////////////////////////////////////////////////////////////// template -void WilsonFlow::evolve_step(typename Gimpl::GaugeField &U) const{ +RealD WilsonFlowBase::energyDensityPlaquette(const RealD t, const GaugeField& U){ + static WilsonGaugeAction SG(3.0); + return 2.0 * t * t * SG.S(U)/U.Grid()->gSites(); +} + +//Compute t^2 for time from the 1x1 cloverleaf form +template +RealD WilsonFlowBase::energyDensityCloverleaf(const RealD t, const GaugeField& U){ + typedef typename Gimpl::GaugeLinkField GaugeMat; + typedef typename Gimpl::GaugeField GaugeLorentz; + + assert(Nd == 4); + //E = 1/2 tr( F_munu F_munu ) + //However as F_numu = -F_munu, only need to sum the trace of the squares of the following 6 field strengths: + //F_01 F_02 F_03 F_12 F_13 F_23 + GaugeMat F(U.Grid()); + LatticeComplexD R(U.Grid()); + R = Zero(); + + for(int mu=0;mu<3;mu++){ + for(int nu=mu+1;nu<4;nu++){ + WilsonLoops::FieldStrength(F, U, mu, nu); + R = R + trace(F*F); + } + } + ComplexD out = sum(R); + out = t*t*out / RealD(U.Grid()->gSites()); + return -real(out); //minus sign necessary for +ve energy +} + + +template +std::vector WilsonFlowBase::flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval){ + std::vector out; + resetActions(); + addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){ + std::cout << GridLogMessage << "[WilsonFlow] Computing plaquette energy density for step " << step << std::endl; + out.push_back( energyDensityPlaquette(t,U) ); + }); + smear(V,U); + return out; +} + +template +std::vector WilsonFlowBase::flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval){ + GaugeField V(U); + return flowMeasureEnergyDensityPlaquette(V,U, measure_interval); +} + +template +std::vector WilsonFlowBase::flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval){ + std::vector out; + resetActions(); + addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){ + std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl; + out.push_back( energyDensityCloverleaf(t,U) ); + }); + smear(V,U); + return out; +} + +template +std::vector WilsonFlowBase::flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval){ + GaugeField V(U); + return flowMeasureEnergyDensityCloverleaf(V,U, measure_interval); +} + +template +void WilsonFlowBase::setDefaultMeasurements(int topq_meas_interval){ + addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){ + std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl; + }); + addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){ + std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops::TopologicalCharge(U) << std::endl; + }); +} + + + +template +void WilsonFlow::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const{ GaugeField Z(U.Grid()); GaugeField tmp(U.Grid()); - SG.deriv(U, Z); + this->SG.deriv(U, Z); Z *= 0.25; // Z0 = 1/4 * F(U) Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0 Z *= -17.0/8.0; - SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1 + this->SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1 Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1 Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1 Z *= -4.0/3.0; - SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2 + this->SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2 Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2 Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2 + tau += epsilon; } template -void WilsonFlow::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD maxTau) { - if (maxTau - taus < epsilon){ - epsilon = maxTau-taus; - } - //std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl; - GaugeField Z(U.Grid()); - GaugeField Zprime(U.Grid()); - GaugeField tmp(U.Grid()), Uprime(U.Grid()); - Uprime = U; - SG.deriv(U, Z); - Zprime = -Z; - Z *= 0.25; // Z0 = 1/4 * F(U) - Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0 +void WilsonFlow::smear(GaugeField& out, const GaugeField& in) const{ + std::cout << GridLogMessage + << "[WilsonFlow] Nstep : " << Nstep << std::endl; + std::cout << GridLogMessage + << "[WilsonFlow] epsilon : " << epsilon << std::endl; + std::cout << GridLogMessage + << "[WilsonFlow] full trajectory : " << Nstep * epsilon << std::endl; - Z *= -17.0/8.0; - SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1 - Zprime += 2.0*tmp; - Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1 - Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1 - - - Z *= -4.0/3.0; - SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2 - Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2 - Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2 - - // Ramos - Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0 - // Compute distance as norm^2 of the difference - GaugeField diffU = U - Uprime; - RealD diff = norm2(diffU); - // adjust integration step - - taus += epsilon; - //std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl; - - epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.); - //std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl; - -} - -template -RealD WilsonFlow::energyDensityPlaquette(unsigned int step, const GaugeField& U) const { - RealD td = tau(step); - return 2.0 * td * td * SG.S(U)/U.Grid()->gSites(); -} - -template -RealD WilsonFlow::energyDensityPlaquette(const GaugeField& U) const { - return 2.0 * taus * taus * SG.S(U)/U.Grid()->gSites(); -} - - -//#define WF_TIMING - - - -template -void WilsonFlow::smear(GaugeField& out, const GaugeField& in) const { out = in; - for (unsigned int step = 1; step <= Nstep; step++) { + RealD taus = 0.; + for (unsigned int step = 1; step <= Nstep; step++) { //step indicates the number of smearing steps applied at the time of measurement auto start = std::chrono::high_resolution_clock::now(); - evolve_step(out); + evolve_step(out, taus); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration diff = end - start; #ifdef WF_TIMING std::cout << "Time to evolve " << diff.count() << " s\n"; #endif - std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " - << step << " " << tau(step) << " " - << energyDensityPlaquette(step,out) << std::endl; - if( step % measure_interval == 0){ - std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " - << step << " " - << WilsonLoops::TopologicalCharge(out) << std::endl; - } + //Perform measurements + for(auto const &meas : this->functions) + if( step % meas.first == 0 ) meas.second(step,taus,out); } } + + template -void WilsonFlow::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){ +int WilsonFlowAdaptive::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD &tau, RealD &eps) const{ + if (maxTau - tau < eps){ + eps = maxTau-tau; + } + //std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl; + GaugeField Z(U.Grid()); + GaugeField Zprime(U.Grid()); + GaugeField tmp(U.Grid()), Uprime(U.Grid()), Usave(U.Grid()); + Uprime = U; + Usave = U; + + this->SG.deriv(U, Z); + Zprime = -Z; + Z *= 0.25; // Z0 = 1/4 * F(U) + Gimpl::update_field(Z, U, -2.0*eps); // U = W1 = exp(ep*Z0)*W0 + + Z *= -17.0/8.0; + this->SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1 + Zprime += 2.0*tmp; + Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1 + Gimpl::update_field(Z, U, -2.0*eps); // U_= W2 = exp(ep*Z)*W1 + + + Z *= -4.0/3.0; + this->SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2 + Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2 + Gimpl::update_field(Z, U, -2.0*eps); // V(t+e) = exp(ep*Z)*W2 + + // Ramos arXiv:1301.4388 + Gimpl::update_field(Zprime, Uprime, -2.0*eps); // V'(t+e) = exp(ep*Z')*W0 + + // Compute distance using Ramos' definition + GaugeField diffU = U - Uprime; + RealD max_dist = 0; + + for(int mu=0;mu(diffU, mu); + RealD dist_mu = sqrt( maxLocalNorm2(diffU_mu) ) /Nc/Nc; //maximize over sites + max_dist = std::max(max_dist, dist_mu); //maximize over mu + } + + int ret; + if(max_dist < tolerance) { + tau += eps; + ret = 1; + } else { + U = Usave; + ret = 0; + } + eps = eps*0.95*std::pow(tolerance/max_dist,1./3.); + std::cout << GridLogMessage << "Adaptive smearing : Distance: "<< max_dist <<" Step successful: " << ret << " New epsilon: " << eps << std::endl; + + return ret; +} + +template +void WilsonFlowAdaptive::smear(GaugeField& out, const GaugeField& in) const{ + std::cout << GridLogMessage + << "[WilsonFlow] initial epsilon : " << init_epsilon << std::endl; + std::cout << GridLogMessage + << "[WilsonFlow] full trajectory : " << maxTau << std::endl; + std::cout << GridLogMessage + << "[WilsonFlow] tolerance : " << tolerance << std::endl; out = in; - taus = epsilon; + RealD taus = 0.; + RealD eps = init_epsilon; unsigned int step = 0; do{ - step++; - //std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl; - evolve_step_adaptive(out, maxTau); - std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " - << step << " " << taus << " " - << energyDensityPlaquette(out) << std::endl; - if( step % measure_interval == 0){ - std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " - << step << " " - << WilsonLoops::TopologicalCharge(out) << std::endl; - } + int step_success = evolve_step_adaptive(out, taus, eps); + step += step_success; //step will not be incremented if the integration step fails + + //Perform measurements + if(step_success) + for(auto const &meas : this->functions) + if( step % meas.first == 0 ) meas.second(step,taus,out); } while (taus < maxTau); - - - } + + NAMESPACE_END(Grid); diff --git a/Grid/qcd/utils/CovariantCshift.h b/Grid/qcd/utils/CovariantCshift.h index 6c70706f..23984145 100644 --- a/Grid/qcd/utils/CovariantCshift.h +++ b/Grid/qcd/utils/CovariantCshift.h @@ -88,6 +88,12 @@ namespace PeriodicBC { return CovShiftBackward(Link,mu,arg); } + //Boundary-aware C-shift of gauge links / gauge transformation matrices + template Lattice + CshiftLink(const Lattice &Link, int mu, int shift) + { + return Cshift(Link, mu, shift); + } } @@ -158,6 +164,9 @@ namespace ConjugateBC { // std::cout<<"Gparity::CovCshiftBackward mu="< Lattice CovShiftIdentityBackward(const Lattice &Link, int mu) { GridBase *grid = Link.Grid(); @@ -176,6 +185,9 @@ namespace ConjugateBC { return Link; } + //Out(x) = S_\mu(x+\hat\mu) | x_\mu != L-1 + // = S*_\mu(0) | x_\mu == L-1 + //Note: While this is used for Staples it is also applicable for shifting gauge links or gauge transformation matrices template Lattice ShiftStaple(const Lattice &Link, int mu) { @@ -208,6 +220,47 @@ namespace ConjugateBC { return CovShiftBackward(Link,mu,arg); } + //Boundary-aware C-shift of gauge links / gauge transformation matrices + //shift = 1 + //Out(x) = U_\mu(x+\hat\mu) | x_\mu != L-1 + // = U*_\mu(0) | x_\mu == L-1 + //shift = -1 + //Out(x) = U_\mu(x-mu) | x_\mu != 0 + // = U*_\mu(L-1) | x_\mu == 0 + //shift = 2 + //Out(x) = U_\mu(x+2\hat\mu) | x_\mu < L-2 + // = U*_\mu(1) | x_\mu == L-1 + // = U*_\mu(0) | x_\mu == L-2 + //shift = -2 + //Out(x) = U_\mu(x-2mu) | x_\mu > 1 + // = U*_\mu(L-2) | x_\mu == 0 + // = U*_\mu(L-1) | x_\mu == 1 + //etc + template Lattice + CshiftLink(const Lattice &Link, int mu, int shift) + { + GridBase *grid = Link.Grid(); + int Lmu = grid->GlobalDimensions()[mu]; + assert(abs(shift) < Lmu && "Invalid shift value"); + + Lattice> coor(grid); + LatticeCoordinate(coor, mu); + + Lattice tmp(grid); + if(shift > 0){ + tmp = Cshift(Link, mu, shift); + tmp = where(coor >= Lmu-shift, conjugate(tmp), tmp); + return tmp; + }else if(shift < 0){ + tmp = Link; + tmp = where(coor >= Lmu+shift, conjugate(tmp), tmp); + return Cshift(tmp, mu, shift); + } + + //shift == 0 + return Link; + } + } diff --git a/Grid/qcd/utils/GaugeFix.h b/Grid/qcd/utils/GaugeFix.h index 29184a88..fc723fe3 100644 --- a/Grid/qcd/utils/GaugeFix.h +++ b/Grid/qcd/utils/GaugeFix.h @@ -40,27 +40,45 @@ public: typedef typename Gimpl::GaugeLinkField GaugeMat; typedef typename Gimpl::GaugeField GaugeLorentz; - static void GaugeLinkToLieAlgebraField(const std::vector &U,std::vector &A) { - for(int mu=0;mu &A,GaugeMat &dmuAmu,int orthog) { + + //The derivative of the Lie algebra field + static void DmuAmu(const std::vector &U, GaugeMat &dmuAmu,int orthog) { + GridBase* grid = U[0].Grid(); + GaugeMat Ax(grid); + GaugeMat Axm1(grid); + GaugeMat Utmp(grid); + dmuAmu=Zero(); for(int mu=0;mu &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) { + static Real SteepestDescentStep(std::vector &U,GaugeMat &xform, Real alpha, GaugeMat & dmuAmu,int orthog) { GridBase *grid = U[0].Grid(); - std::vector A(Nd,grid); GaugeMat g(grid); - - GaugeLinkToLieAlgebraField(U,A); - ExpiAlphaDmuAmu(A,g,alpha,dmuAmu,orthog); - + ExpiAlphaDmuAmu(U,g,alpha,dmuAmu,orthog); Real vol = grid->gSites(); Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; xform = g*xform ; - SU::GaugeTransform(U,g); + SU::GaugeTransform(U,g); return trG; } - static Real FourierAccelSteepestDescentStep(std::vector &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) { + static Real FourierAccelSteepestDescentStep(std::vector &U,GaugeMat &xform, Real alpha, GaugeMat & dmuAmu,int orthog) { GridBase *grid = U[0].Grid(); @@ -159,11 +174,7 @@ public: GaugeMat g(grid); GaugeMat dmuAmu_p(grid); - std::vector A(Nd,grid); - - GaugeLinkToLieAlgebraField(U,A); - - DmuAmu(A,dmuAmu,orthog); + DmuAmu(U,dmuAmu,orthog); std::vector mask(Nd,1); for(int mu=0;mu::GaugeTransform(U,g); + SU::GaugeTransform(U,g); return trG; } - static void ExpiAlphaDmuAmu(const std::vector &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu,int orthog) { + static void ExpiAlphaDmuAmu(const std::vector &U,GaugeMat &g, Real alpha, GaugeMat &dmuAmu,int orthog) { GridBase *grid = g.Grid(); Complex cialpha(0.0,-alpha); GaugeMat ciadmam(grid); - DmuAmu(A,dmuAmu,orthog); + DmuAmu(U,dmuAmu,orthog); ciadmam = dmuAmu*cialpha; SU::taExp(ciadmam,g); } diff --git a/Grid/qcd/utils/SUn.h b/Grid/qcd/utils/SUn.h index 675493b3..23eceea3 100644 --- a/Grid/qcd/utils/SUn.h +++ b/Grid/qcd/utils/SUn.h @@ -615,7 +615,6 @@ public: GridBase *grid = out.Grid(); typedef typename LatticeMatrixType::vector_type vector_type; - typedef typename LatticeMatrixType::scalar_type scalar_type; typedef iSinglet vTComplexType; @@ -694,32 +693,32 @@ public: * Adjoint rep gauge xform */ - template - static void GaugeTransform( GaugeField &Umu, GaugeMat &g){ + template + static void GaugeTransform(typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){ GridBase *grid = Umu.Grid(); conformable(grid,g.Grid()); - GaugeMat U(grid); - GaugeMat ag(grid); ag = adj(g); + typename Gimpl::GaugeLinkField U(grid); + typename Gimpl::GaugeLinkField ag(grid); ag = adj(g); for(int mu=0;mu(Umu,mu); - U = g*U*Cshift(ag, mu, 1); + U = g*U*Gimpl::CshiftLink(ag, mu, 1); //BC-aware PokeIndex(Umu,U,mu); } } - template - static void GaugeTransform( std::vector &U, GaugeMat &g){ + template + static void GaugeTransform( std::vector &U, typename Gimpl::GaugeLinkField &g){ GridBase *grid = g.Grid(); - GaugeMat ag(grid); ag = adj(g); + typename Gimpl::GaugeLinkField ag(grid); ag = adj(g); for(int mu=0;mu - static void RandomGaugeTransform(GridParallelRNG &pRNG, GaugeField &Umu, GaugeMat &g){ + template + static void RandomGaugeTransform(GridParallelRNG &pRNG, typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){ LieRandomize(pRNG,g,1.0); - GaugeTransform(Umu,g); + GaugeTransform(Umu,g); } // Projects the algebra components a lattice matrix (of dimension ncol*ncol -1 ) diff --git a/Grid/qcd/utils/WilsonLoops.h b/Grid/qcd/utils/WilsonLoops.h index e002e3d5..d6efbd5d 100644 --- a/Grid/qcd/utils/WilsonLoops.h +++ b/Grid/qcd/utils/WilsonLoops.h @@ -125,6 +125,57 @@ public: return sumplaq / vol / faces / Nc; // Nd , Nc dependent... FIXME } + ////////////////////////////////////////////////// + // sum over all spatial planes of plaquette + ////////////////////////////////////////////////// + static void siteSpatialPlaquette(ComplexField &Plaq, + const std::vector &U) { + ComplexField sitePlaq(U[0].Grid()); + Plaq = Zero(); + for (int mu = 1; mu < Nd-1; mu++) { + for (int nu = 0; nu < mu; nu++) { + traceDirPlaquette(sitePlaq, U, mu, nu); + Plaq = Plaq + sitePlaq; + } + } + } + + //////////////////////////////////// + // sum over all x,y,z and over all spatial planes of plaquette + ////////////////////////////////////////////////// + static std::vector timesliceSumSpatialPlaquette(const GaugeLorentz &Umu) { + std::vector U(Nd, Umu.Grid()); + // inefficient here + for (int mu = 0; mu < Nd; mu++) { + U[mu] = PeekIndex(Umu, mu); + } + + ComplexField Plaq(Umu.Grid()); + + siteSpatialPlaquette(Plaq, U); + typedef typename ComplexField::scalar_object sobj; + std::vector Tq; + sliceSum(Plaq, Tq, Nd-1); + + std::vector out(Tq.size()); + for(int t=0;t timesliceAvgSpatialPlaquette(const GaugeLorentz &Umu) { + std::vector sumplaq = timesliceSumSpatialPlaquette(Umu); + int Lt = Umu.Grid()->FullDimensions()[Nd-1]; + assert(sumplaq.size() == Lt); + double vol = Umu.Grid()->gSites() / Lt; + double faces = (1.0 * (Nd - 1)* (Nd - 2)) / 2.0; + for(int t=0;t(Umu, mu); // some redundant copies GaugeMat vu = v*u; //FS = 0.25*Ta(u*v + Cshift(vu, mu, -1)); - FS = (u*v + Cshift(vu, mu, -1)); + FS = (u*v + Gimpl::CshiftLink(vu, mu, -1)); FS = 0.125*(FS - adj(FS)); } - static Real TopologicalCharge(GaugeLorentz &U){ + static Real TopologicalCharge(const GaugeLorentz &U){ // 4d topological charge assert(Nd==4); // Bx = -iF(y,z), By = -iF(z,y), Bz = -iF(x,y) @@ -389,6 +440,203 @@ public: } + //Clover-leaf Wilson loop combination for arbitrary mu-extent M and nu extent N, mu >= nu + //cf https://arxiv.org/pdf/hep-lat/9701012.pdf Eq 7 for 1x2 Wilson loop + //Clockwise ordering + static void CloverleafMxN(GaugeMat &FS, const GaugeMat &Umu, const GaugeMat &Unu, int mu, int nu, int M, int N){ +#define Fmu(A) Gimpl::CovShiftForward(Umu, mu, A) +#define Bmu(A) Gimpl::CovShiftBackward(Umu, mu, A) +#define Fnu(A) Gimpl::CovShiftForward(Unu, nu, A) +#define Bnu(A) Gimpl::CovShiftBackward(Unu, nu, A) +#define FmuI Gimpl::CovShiftIdentityForward(Umu, mu) +#define BmuI Gimpl::CovShiftIdentityBackward(Umu, mu) +#define FnuI Gimpl::CovShiftIdentityForward(Unu, nu) +#define BnuI Gimpl::CovShiftIdentityBackward(Unu, nu) + + //Upper right loop + GaugeMat tmp = BmuI; + for(int i=1;i(U, mu); + GaugeMat Unu = PeekIndex(U, nu); + if(M == N){ + GaugeMat F(Umu.Grid()); + CloverleafMxN(F, Umu, Unu, mu, nu, M, N); + FS = 0.125 * ( F - adj(F) ); + }else{ + //Average over both orientations + GaugeMat horizontal(Umu.Grid()), vertical(Umu.Grid()); + CloverleafMxN(horizontal, Umu, Unu, mu, nu, M, N); + CloverleafMxN(vertical, Umu, Unu, mu, nu, N, M); + FS = 0.0625 * ( horizontal - adj(horizontal) + vertical - adj(vertical) ); + } + } + + //Topological charge contribution from MxN Wilson loops + //cf https://arxiv.org/pdf/hep-lat/9701012.pdf Eq 6 + //output is the charge by timeslice: sum over timeslices to obtain the total + static std::vector TimesliceTopologicalChargeMxN(const GaugeLorentz &U, int M, int N){ + assert(Nd == 4); + std::vector > F(Nd,std::vector(Nd,nullptr)); + //Note F_numu = - F_munu + //hence we only need to loop over mu,nu,rho,sigma that aren't related by permuting mu,nu or rho,sigma + //Use nu > mu + for(int mu=0;mu Tq; + sliceSum(fsum, Tq, Nd-1); + + std::vector out(Tq.size()); + for(int t=0;t Tq = TimesliceTopologicalChargeMxN(U,M,N); + Real out(0); + for(int t=0;t > TimesliceTopologicalCharge5LiContributions(const GaugeLorentz &U){ + static const int exts[5][2] = { {1,1}, {2,2}, {1,2}, {1,3}, {3,3} }; + std::vector > out(5); + for(int i=0;i<5;i++){ + out[i] = TimesliceTopologicalChargeMxN(U,exts[i][0],exts[i][1]); + } + return out; + } + + static std::vector TopologicalCharge5LiContributions(const GaugeLorentz &U){ + static const int exts[5][2] = { {1,1}, {2,2}, {1,2}, {1,3}, {3,3} }; + std::vector out(5); + std::cout << GridLogMessage << "Computing topological charge" << std::endl; + for(int i=0;i<5;i++){ + out[i] = TopologicalChargeMxN(U,exts[i][0],exts[i][1]); + std::cout << GridLogMessage << exts[i][0] << "x" << exts[i][1] << " Wilson loop contribution " << out[i] << std::endl; + } + return out; + } + + //Compute the 5Li topological charge + static std::vector TimesliceTopologicalCharge5Li(const GaugeLorentz &U){ + std::vector > loops = TimesliceTopologicalCharge5LiContributions(U); + + double c5=1./20.; + double c4=1./5.-2.*c5; + double c3=(-64.+640.*c5)/45.; + double c2=(1-64.*c5)/9.; + double c1=(19.-55.*c5)/9.; + + int Lt = loops[0].size(); + std::vector out(Lt,0.); + for(int t=0;t Qt = TimesliceTopologicalCharge5Li(U); + Real Q = 0.; + for(int t=0;t - inline vec operator()(vec a, vec b){ + inline vec operator()(vec a){ vec out; const vec::uint> tbl_swap = acle::tbl_swap(); svbool_t pg1 = acle::pg1(); @@ -520,7 +520,7 @@ struct TimesMinusI{ struct TimesI{ // Complex template - inline vec operator()(vec a, vec b){ + inline vec operator()(vec a){ vec out; const vec::uint> tbl_swap = acle::tbl_swap(); svbool_t pg1 = acle::pg1(); diff --git a/Grid/simd/Grid_a64fx-fixedsize.h b/Grid/simd/Grid_a64fx-fixedsize.h index 6b450012..5bf1b0a3 100644 --- a/Grid/simd/Grid_a64fx-fixedsize.h +++ b/Grid/simd/Grid_a64fx-fixedsize.h @@ -418,7 +418,7 @@ struct Conj{ struct TimesMinusI{ // Complex float - inline vecf operator()(vecf a, vecf b){ + inline vecf operator()(vecf a){ lutf tbl_swap = acle::tbl_swap(); pred pg1 = acle::pg1(); pred pg_odd = acle::pg_odd(); @@ -428,7 +428,7 @@ struct TimesMinusI{ return svneg_m(a_v, pg_odd, a_v); } // Complex double - inline vecd operator()(vecd a, vecd b){ + inline vecd operator()(vecd a){ lutd tbl_swap = acle::tbl_swap(); pred pg1 = acle::pg1(); pred pg_odd = acle::pg_odd(); @@ -441,7 +441,7 @@ struct TimesMinusI{ struct TimesI{ // Complex float - inline vecf operator()(vecf a, vecf b){ + inline vecf operator()(vecf a){ lutf tbl_swap = acle::tbl_swap(); pred pg1 = acle::pg1(); pred pg_even = acle::pg_even(); @@ -451,7 +451,7 @@ struct TimesI{ return svneg_m(a_v, pg_even, a_v); } // Complex double - inline vecd operator()(vecd a, vecd b){ + inline vecd operator()(vecd a){ lutd tbl_swap = acle::tbl_swap(); pred pg1 = acle::pg1(); pred pg_even = acle::pg_even(); diff --git a/Grid/simd/Grid_avx.h b/Grid/simd/Grid_avx.h index ad9800fb..f8962714 100644 --- a/Grid/simd/Grid_avx.h +++ b/Grid/simd/Grid_avx.h @@ -405,12 +405,12 @@ struct Conj{ struct TimesMinusI{ //Complex single - inline __m256 operator()(__m256 in, __m256 ret){ + inline __m256 operator()(__m256 in){ __m256 tmp =_mm256_addsub_ps(_mm256_setzero_ps(),in); // r,-i return _mm256_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(2,3,0,1)); //-i,r } //Complex double - inline __m256d operator()(__m256d in, __m256d ret){ + inline __m256d operator()(__m256d in){ __m256d tmp = _mm256_addsub_pd(_mm256_setzero_pd(),in); // r,-i return _mm256_shuffle_pd(tmp,tmp,0x5); } @@ -418,12 +418,12 @@ struct TimesMinusI{ struct TimesI{ //Complex single - inline __m256 operator()(__m256 in, __m256 ret){ + inline __m256 operator()(__m256 in){ __m256 tmp =_mm256_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); // i,r return _mm256_addsub_ps(_mm256_setzero_ps(),tmp); // i,-r } //Complex double - inline __m256d operator()(__m256d in, __m256d ret){ + inline __m256d operator()(__m256d in){ __m256d tmp = _mm256_shuffle_pd(in,in,0x5); return _mm256_addsub_pd(_mm256_setzero_pd(),tmp); // i,-r } diff --git a/Grid/simd/Grid_avx512.h b/Grid/simd/Grid_avx512.h index 839d4554..95b96143 100644 --- a/Grid/simd/Grid_avx512.h +++ b/Grid/simd/Grid_avx512.h @@ -271,14 +271,14 @@ struct Conj{ struct TimesMinusI{ //Complex single - inline __m512 operator()(__m512 in, __m512 ret){ + inline __m512 operator()(__m512 in){ //__m512 tmp = _mm512_mask_sub_ps(in,0xaaaa,_mm512_setzero_ps(),in); // real -imag //return _mm512_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(2,3,1,0)); // 0x4E?? __m512 tmp = _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); return _mm512_mask_sub_ps(tmp,0xaaaa,_mm512_setzero_ps(),tmp); } //Complex double - inline __m512d operator()(__m512d in, __m512d ret){ + inline __m512d operator()(__m512d in){ //__m512d tmp = _mm512_mask_sub_pd(in,0xaa,_mm512_setzero_pd(),in); // real -imag //return _mm512_shuffle_pd(tmp,tmp,0x55); __m512d tmp = _mm512_shuffle_pd(in,in,0x55); @@ -288,17 +288,16 @@ struct TimesMinusI{ struct TimesI{ //Complex single - inline __m512 operator()(__m512 in, __m512 ret){ + inline __m512 operator()(__m512 in){ __m512 tmp = _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); return _mm512_mask_sub_ps(tmp,0x5555,_mm512_setzero_ps(),tmp); } //Complex double - inline __m512d operator()(__m512d in, __m512d ret){ + inline __m512d operator()(__m512d in){ __m512d tmp = _mm512_shuffle_pd(in,in,0x55); return _mm512_mask_sub_pd(tmp,0x55,_mm512_setzero_pd(),tmp); } - }; // Gpermute utilities consider coalescing into 1 Gpermute diff --git a/Grid/simd/Grid_doubled_vector.h b/Grid/simd/Grid_doubled_vector.h new file mode 100644 index 00000000..ee604750 --- /dev/null +++ b/Grid/simd/Grid_doubled_vector.h @@ -0,0 +1,666 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/simd/Grid_vector_types.h + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +template +class Grid_simd2 { +public: + typedef typename RealPart::type Real; + typedef Vector_type vector_type; + typedef Scalar_type scalar_type; + + typedef union conv_t_union { + Vector_type v; + Scalar_type s[sizeof(Vector_type) / sizeof(Scalar_type)]; + accelerator_inline conv_t_union(){}; + } conv_t; + + static constexpr int nvec=2; + Vector_type v[nvec]; + + static accelerator_inline constexpr int Nsimd(void) { + static_assert( (sizeof(Vector_type) / sizeof(Scalar_type) >= 1), " size mismatch " ); + + return nvec*sizeof(Vector_type) / sizeof(Scalar_type); + } + + accelerator_inline Grid_simd2 &operator=(const Grid_simd2 &&rhs) { + for(int n=0;n accelerator_inline + Grid_simd2(const typename std::enable_if::value, S>::type a) { + vsplat(*this, a); + }; + + ///////////////////////////// + // Constructors + ///////////////////////////// + accelerator_inline Grid_simd2 & operator=(const Zero &z) { + vzero(*this); + return (*this); + } + + /////////////////////////////////////////////// + // mac, mult, sub, add, adj + /////////////////////////////////////////////// + + friend accelerator_inline void mac(Grid_simd2 *__restrict__ y, + const Grid_simd2 *__restrict__ a, + const Grid_simd2 *__restrict__ x) { + *y = (*a) * (*x) + (*y); + }; + + friend accelerator_inline void mult(Grid_simd2 *__restrict__ y, + const Grid_simd2 *__restrict__ l, + const Grid_simd2 *__restrict__ r) { + *y = (*l) * (*r); + } + + friend accelerator_inline void sub(Grid_simd2 *__restrict__ y, + const Grid_simd2 *__restrict__ l, + const Grid_simd2 *__restrict__ r) { + *y = (*l) - (*r); + } + friend accelerator_inline void add(Grid_simd2 *__restrict__ y, + const Grid_simd2 *__restrict__ l, + const Grid_simd2 *__restrict__ r) { + *y = (*l) + (*r); + } + friend accelerator_inline void mac(Grid_simd2 *__restrict__ y, + const Scalar_type *__restrict__ a, + const Grid_simd2 *__restrict__ x) { + *y = (*a) * (*x) + (*y); + }; + friend accelerator_inline void mult(Grid_simd2 *__restrict__ y, + const Scalar_type *__restrict__ l, + const Grid_simd2 *__restrict__ r) { + *y = (*l) * (*r); + } + friend accelerator_inline void sub(Grid_simd2 *__restrict__ y, + const Scalar_type *__restrict__ l, + const Grid_simd2 *__restrict__ r) { + *y = (*l) - (*r); + } + friend accelerator_inline void add(Grid_simd2 *__restrict__ y, + const Scalar_type *__restrict__ l, + const Grid_simd2 *__restrict__ r) { + *y = (*l) + (*r); + } + + friend accelerator_inline void mac(Grid_simd2 *__restrict__ y, + const Grid_simd2 *__restrict__ a, + const Scalar_type *__restrict__ x) { + *y = (*a) * (*x) + (*y); + }; + friend accelerator_inline void mult(Grid_simd2 *__restrict__ y, + const Grid_simd2 *__restrict__ l, + const Scalar_type *__restrict__ r) { + *y = (*l) * (*r); + } + friend accelerator_inline void sub(Grid_simd2 *__restrict__ y, + const Grid_simd2 *__restrict__ l, + const Scalar_type *__restrict__ r) { + *y = (*l) - (*r); + } + friend accelerator_inline void add(Grid_simd2 *__restrict__ y, + const Grid_simd2 *__restrict__ l, + const Scalar_type *__restrict__ r) { + *y = (*l) + (*r); + } + + //////////////////////////////////////////////////////////////////////// + // FIXME: gonna remove these load/store, get, set, prefetch + //////////////////////////////////////////////////////////////////////// + friend accelerator_inline void vset(Grid_simd2 &ret, Scalar_type *a) { + for(int n=0;n + friend accelerator_inline Grid_simd2 SimdApply(const functor &func, const Grid_simd2 &v) { + Grid_simd2 ret; + for(int n=0;n + friend accelerator_inline Grid_simd2 SimdApplyBinop(const functor &func, + const Grid_simd2 &x, + const Grid_simd2 &y) { + Grid_simd2 ret; + for(int n=0;n Al Bl Ah,Bh + /////////////////////// + friend accelerator_inline void exchange0(Grid_simd2 &out1,Grid_simd2 &out2,Grid_simd2 in1,Grid_simd2 in2){ + out1.v[0] = in1.v[0]; + out1.v[1] = in2.v[0]; + out2.v[0] = in1.v[1]; + out2.v[1] = in2.v[1]; + } + friend accelerator_inline void exchange1(Grid_simd2 &out1,Grid_simd2 &out2,Grid_simd2 in1,Grid_simd2 in2){ + exchange0(out1.v[0],out2.v[0],in1.v[0],in2.v[0]); + exchange0(out1.v[1],out2.v[1],in1.v[1],in2.v[1]); + } + friend accelerator_inline void exchange2(Grid_simd2 &out1,Grid_simd2 &out2,Grid_simd2 in1,Grid_simd2 in2){ + exchange1(out1.v[0],out2.v[0],in1.v[0],in2.v[0]); + exchange1(out1.v[1],out2.v[1],in1.v[1],in2.v[1]); + } + friend accelerator_inline void exchange3(Grid_simd2 &out1,Grid_simd2 &out2,Grid_simd2 in1,Grid_simd2 in2){ + exchange2(out1.v[0],out2.v[0],in1.v[0],in2.v[0]); + exchange2(out1.v[1],out2.v[1],in1.v[1],in2.v[1]); + } + friend accelerator_inline void exchange4(Grid_simd2 &out1,Grid_simd2 &out2,Grid_simd2 in1,Grid_simd2 in2){ + exchange3(out1.v[0],out2.v[0],in1.v[0],in2.v[0]); + exchange3(out1.v[1],out2.v[1],in1.v[1],in2.v[1]); + } + friend accelerator_inline void exchange(Grid_simd2 &out1,Grid_simd2 &out2,Grid_simd2 in1,Grid_simd2 in2,int n) + { + if (n==3) { + exchange3(out1,out2,in1,in2); + } else if(n==2) { + exchange2(out1,out2,in1,in2); + } else if(n==1) { + exchange1(out1,out2,in1,in2); + } else if(n==0) { + exchange0(out1,out2,in1,in2); + } + } + //////////////////////////////////////////////////////////////////// + // General permute; assumes vector length is same across + // all subtypes; may not be a good assumption, but could + // add the vector width as a template param for BG/Q for example + //////////////////////////////////////////////////////////////////// + friend accelerator_inline void permute0(Grid_simd2 &y, Grid_simd2 b) { + y.v[0]=b.v[1]; + y.v[1]=b.v[0]; + } + friend accelerator_inline void permute1(Grid_simd2 &y, Grid_simd2 b) { + permute0(y.v[0],b.v[0]); + permute0(y.v[1],b.v[1]); + } + friend accelerator_inline void permute2(Grid_simd2 &y, Grid_simd2 b) { + permute1(y.v[0],b.v[0]); + permute1(y.v[1],b.v[1]); + } + friend accelerator_inline void permute3(Grid_simd2 &y, Grid_simd2 b) { + permute2(y.v[0],b.v[0]); + permute2(y.v[1],b.v[1]); + } + friend accelerator_inline void permute4(Grid_simd2 &y, Grid_simd2 b) { + permute3(y.v[0],b.v[0]); + permute3(y.v[1],b.v[1]); + } + friend accelerator_inline void permute(Grid_simd2 &y, Grid_simd2 b, int perm) { + if(perm==3) permute3(y, b); + else if(perm==2) permute2(y, b); + else if(perm==1) permute1(y, b); + else if(perm==0) permute0(y, b); + } + + /////////////////////////////// + // Getting single lanes + /////////////////////////////// + accelerator_inline Scalar_type getlane(int lane) const { + if(lane < vector_type::Nsimd() ) return v[0].getlane(lane); + else return v[1].getlane(lane%vector_type::Nsimd()); + } + + accelerator_inline void putlane(const Scalar_type &S, int lane){ + if(lane < vector_type::Nsimd() ) v[0].putlane(S,lane); + else v[1].putlane(S,lane%vector_type::Nsimd()); + } +}; // end of Grid_simd2 class definition + +/////////////////////////////// +// Define available types +/////////////////////////////// + +typedef Grid_simd2 , vComplexD> vComplexD2; +typedef Grid_simd2 vRealD2; + + + +///////////////////////////////////////// +// Some traits to recognise the types +///////////////////////////////////////// +template +struct is_simd : public std::false_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; + +template using IfSimd = Invoke::value, int> >; +template using IfNotSimd = Invoke::value, unsigned> >; + +/////////////////////////////////////////////// +// insert / extract with complex support +/////////////////////////////////////////////// +template +accelerator_inline S getlane(const Grid_simd &in,int lane) { + return in.getlane(lane); +} +template +accelerator_inline void putlane(Grid_simd &vec,const S &_S, int lane){ + vec.putlane(_S,lane); +} +template = 0 > +accelerator_inline S getlane(const S &in,int lane) { + return in; +} +template = 0 > +accelerator_inline void putlane(S &vec,const S &_S, int lane){ + vec = _S; +} +template +accelerator_inline S getlane(const Grid_simd2 &in,int lane) { + return in.getlane(lane); +} +template +accelerator_inline void putlane(Grid_simd2 &vec,const S &_S, int lane){ + vec.putlane(_S,lane); +} + + +//////////////////////////////////////////////////////////////////// +// General rotate +//////////////////////////////////////////////////////////////////// + +template +accelerator_inline void vbroadcast(Grid_simd2 &ret,const Grid_simd2 &src,int lane){ + S* typepun =(S*) &src; + vsplat(ret,typepun[lane]); +} +template =0> +accelerator_inline void rbroadcast(Grid_simd2 &ret,const Grid_simd2 &src,int lane){ + typedef typename V::vector_type vector_type; + S* typepun =(S*) &src; + ret.v[0].v = unary(real(typepun[lane]), VsplatSIMD()); + ret.v[1].v = unary(real(typepun[lane]), VsplatSIMD()); +} + + +/////////////////////// +// Splat +/////////////////////// + +// this is only for the complex version +template = 0, class ABtype> +accelerator_inline void vsplat(Grid_simd2 &ret, ABtype a, ABtype b) { + vsplat(ret.v[0],a,b); + vsplat(ret.v[1],a,b); +} + +// overload if complex +template +accelerator_inline void vsplat(Grid_simd2 &ret, EnableIf, S> c) { + vsplat(ret, real(c), imag(c)); +} +template +accelerator_inline void rsplat(Grid_simd2 &ret, EnableIf, S> c) { + vsplat(ret, real(c), real(c)); +} + +// if real fill with a, if complex fill with a in the real part (first function +// above) +template +accelerator_inline void vsplat(Grid_simd2 &ret, NotEnableIf, S> a) +{ + vsplat(ret.v[0],a); + vsplat(ret.v[1],a); +} +////////////////////////// + +/////////////////////////////////////////////// +// Initialise to 1,0,i for the correct types +/////////////////////////////////////////////// +// For complex types +template = 0> +accelerator_inline void vone(Grid_simd2 &ret) { + vsplat(ret, S(1.0, 0.0)); +} +template = 0> +accelerator_inline void vzero(Grid_simd2 &ret) { + vsplat(ret, S(0.0, 0.0)); +} // use xor? +template = 0> +accelerator_inline void vcomplex_i(Grid_simd2 &ret) { + vsplat(ret, S(0.0, 1.0)); +} + +template = 0> +accelerator_inline void visign(Grid_simd2 &ret) { + vsplat(ret, S(1.0, -1.0)); +} +template = 0> +accelerator_inline void vrsign(Grid_simd2 &ret) { + vsplat(ret, S(-1.0, 1.0)); +} + +// if not complex overload here +template = 0> +accelerator_inline void vone(Grid_simd2 &ret) { + vsplat(ret, S(1.0)); +} +template = 0> +accelerator_inline void vzero(Grid_simd2 &ret) { + vsplat(ret, S(0.0)); +} + +// For integral types +template = 0> +accelerator_inline void vone(Grid_simd2 &ret) { + vsplat(ret, 1); +} +template = 0> +accelerator_inline void vzero(Grid_simd2 &ret) { + vsplat(ret, 0); +} +template = 0> +accelerator_inline void vtrue(Grid_simd2 &ret) { + vsplat(ret, 0xFFFFFFFF); +} +template = 0> +accelerator_inline void vfalse(Grid_simd2 &ret) { + vsplat(ret, 0); +} +template +accelerator_inline void zeroit(Grid_simd2 &z) { + vzero(z); +} + +/////////////////////// +// Vstream +/////////////////////// +template = 0> +accelerator_inline void vstream(Grid_simd2 &out, const Grid_simd2 &in) { + vstream(out.v[0],in.v[0]); + vstream(out.v[1],in.v[1]); +} +template = 0> +accelerator_inline void vstream(Grid_simd2 &out, const Grid_simd2 &in) { + vstream(out.v[0],in.v[0]); + vstream(out.v[1],in.v[1]); +} +template = 0> +accelerator_inline void vstream(Grid_simd2 &out, const Grid_simd2 &in) { + vstream(out.v[0],in.v[0]); + vstream(out.v[1],in.v[1]); +} + +//////////////////////////////////// +// Arithmetic operator overloads +,-,* +//////////////////////////////////// +template +accelerator_inline Grid_simd2 operator+(Grid_simd2 a, Grid_simd2 b) { + Grid_simd2 ret; + ret.v[0] = a.v[0]+b.v[0]; + ret.v[1] = a.v[1]+b.v[1]; + return ret; +}; + +template +accelerator_inline Grid_simd2 operator-(Grid_simd2 a, Grid_simd2 b) { + Grid_simd2 ret; + ret.v[0] = a.v[0]-b.v[0]; + ret.v[1] = a.v[1]-b.v[1]; + return ret; +}; + +// Distinguish between complex types and others +template = 0> +accelerator_inline Grid_simd2 real_mult(Grid_simd2 a, Grid_simd2 b) { + Grid_simd2 ret; + ret.v[0] =real_mult(a.v[0],b.v[0]); + ret.v[1] =real_mult(a.v[1],b.v[1]); + return ret; +}; +template = 0> +accelerator_inline Grid_simd2 real_madd(Grid_simd2 a, Grid_simd2 b, Grid_simd2 c) { + Grid_simd2 ret; + ret.v[0] =real_madd(a.v[0],b.v[0],c.v[0]); + ret.v[1] =real_madd(a.v[1],b.v[1],c.v[1]); + return ret; +}; + + +// Distinguish between complex types and others +template +accelerator_inline Grid_simd2 operator*(Grid_simd2 a, Grid_simd2 b) { + Grid_simd2 ret; + ret.v[0] = a.v[0]*b.v[0]; + ret.v[1] = a.v[1]*b.v[1]; + return ret; +}; + +// Distinguish between complex types and others +template +accelerator_inline Grid_simd2 operator/(Grid_simd2 a, Grid_simd2 b) { + Grid_simd2 ret; + ret.v[0] = a.v[0]/b.v[0]; + ret.v[1] = a.v[1]/b.v[1]; + return ret; +}; + +/////////////////////// +// Conjugate +/////////////////////// +template +accelerator_inline Grid_simd2 conjugate(const Grid_simd2 &in) { + Grid_simd2 ret; + ret.v[0] = conjugate(in.v[0]); + ret.v[1] = conjugate(in.v[1]); + return ret; +} +template = 0> +accelerator_inline Grid_simd2 adj(const Grid_simd2 &in) { + return conjugate(in); +} + +/////////////////////// +// timesMinusI +/////////////////////// +template +accelerator_inline void timesMinusI(Grid_simd2 &ret, const Grid_simd2 &in) { + timesMinusI(ret.v[0],in.v[0]); + timesMinusI(ret.v[1],in.v[1]); +} +template +accelerator_inline Grid_simd2 timesMinusI(const Grid_simd2 &in) { + Grid_simd2 ret; + timesMinusI(ret.v[0],in.v[0]); + timesMinusI(ret.v[1],in.v[1]); + return ret; +} + +/////////////////////// +// timesI +/////////////////////// +template +accelerator_inline void timesI(Grid_simd2 &ret, const Grid_simd2 &in) { + timesI(ret.v[0],in.v[0]); + timesI(ret.v[1],in.v[1]); +} +template +accelerator_inline Grid_simd2 timesI(const Grid_simd2 &in) { + Grid_simd2 ret; + timesI(ret.v[0],in.v[0]); + timesI(ret.v[1],in.v[1]); + return ret; +} + +///////////////////// +// Inner, outer +///////////////////// +template +accelerator_inline Grid_simd2 innerProduct(const Grid_simd2 &l,const Grid_simd2 &r) { + return conjugate(l) * r; +} +template +accelerator_inline Grid_simd2 outerProduct(const Grid_simd2 &l,const Grid_simd2 &r) { + return l * conjugate(r); +} + +template +accelerator_inline Grid_simd2 trace(const Grid_simd2 &arg) { + return arg; +} + +//////////////////////////////////////////////////////////// +// copy/splat complex real parts into real; +// insert real into complex and zero imag; +//////////////////////////////////////////////////////////// +accelerator_inline void precisionChange(vComplexD2 &out,const vComplexF &in){ + Optimization::PrecisionChange::StoD(in.v,out.v[0].v,out.v[1].v); +} +accelerator_inline void precisionChange(vComplexF &out,const vComplexD2 &in){ + out.v=Optimization::PrecisionChange::DtoS(in.v[0].v,in.v[1].v); +} +accelerator_inline void precisionChange(vComplexD2 *out,const vComplexF *in,int nvec){ + for(int m=0;m - accelerator_inline vec operator()(vec a, vec b){ + accelerator_inline vec operator()(vec a){ vec out; VECTOR_FOR(i, W::c, 1) @@ -265,7 +265,7 @@ struct TimesMinusI{ struct TimesI{ // Complex template - accelerator_inline vec operator()(vec a, vec b){ + accelerator_inline vec operator()(vec a){ vec out; VECTOR_FOR(i, W::c, 1) diff --git a/Grid/simd/Grid_gpu_rrii.h b/Grid/simd/Grid_gpu_rrii.h new file mode 100644 index 00000000..36f343e4 --- /dev/null +++ b/Grid/simd/Grid_gpu_rrii.h @@ -0,0 +1,878 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/simd/Grid_gpu.h + + Copyright (C) 2021 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +//---------------------------------------------------------------------- +/*! @file Grid_gpu_rrii.h*/ +//---------------------------------------------------------------------- + +////////////////////////////// +// fp16 +////////////////////////////// +#ifdef GRID_CUDA +#include +#endif +#ifdef GRID_HIP +#include +#endif +#if !defined(GRID_HIP) && !defined(GRID_CUDA) +namespace Grid { + typedef struct { uint16_t x;} half; +} +#endif +namespace Grid { + accelerator_inline float half2float(half h) + { + float f; +#if defined(GRID_CUDA) || defined(GRID_HIP) + f = __half2float(h); +#else + Grid_half hh; + hh.x = h.x; + f= sfw_half_to_float(hh); +#endif + return f; + } + accelerator_inline half float2half(float f) + { + half h; +#if defined(GRID_CUDA) || defined(GRID_HIP) + h = __float2half(f); +#else + Grid_half hh = sfw_float_to_half(f); + h.x = hh.x; +#endif + return h; + } +} + + +#define COALESCE_GRANULARITY ( GEN_SIMD_WIDTH ) + +namespace Grid { + +//////////////////////////////////////////////////////////////////////// +// Real vector +//////////////////////////////////////////////////////////////////////// +template +struct GpuVector { + _datum rrrr[_N]; + static const int N = _N; + typedef _datum datum; +}; +template +inline accelerator GpuVector operator*(const GpuVector l,const GpuVector r) { + GpuVector ret; + for(int i=0;i +inline accelerator GpuVector operator-(const GpuVector l,const GpuVector r) { + GpuVector ret; + for(int i=0;i +inline accelerator GpuVector operator+(const GpuVector l,const GpuVector r) { + GpuVector ret; + for(int i=0;i +inline accelerator GpuVector operator/(const GpuVector l,const GpuVector r) { + GpuVector ret; + for(int i=0;i +struct GpuComplexVector { + _datum rrrr[_N]; + _datum iiii[_N]; + static const int N = _N; + typedef _datum datum; +}; +template +inline accelerator GpuComplexVector operator*(const GpuComplexVector l,const GpuComplexVector r) { + GpuComplexVector ret; + for(int i=0;i +inline accelerator GpuComplexVector operator-(const GpuComplexVector l,const GpuComplexVector r) { + GpuComplexVector ret; + for(int i=0;i +inline accelerator GpuComplexVector operator+(const GpuComplexVector l,const GpuComplexVector r) { + GpuComplexVector ret; + for(int i=0;i +inline accelerator GpuComplexVector operator/(const GpuComplexVector l,const GpuComplexVector r) { + GpuComplexVector ret; + for(int i=0;i GpuVectorRH; +typedef GpuComplexVector GpuVectorCH; +typedef GpuVector GpuVectorRF; +typedef GpuComplexVector GpuVectorCF; +typedef GpuVector GpuVectorRD; +typedef GpuComplexVector GpuVectorCD; +typedef GpuVector GpuVectorI; + +namespace Optimization { + + struct Vsplat{ + //Complex float + accelerator_inline GpuVectorCF operator()(float a, float b){ + GpuVectorCF ret; + for(int i=0;i + accelerator_inline void operator()(GpuVector a, P* Fp){ + GpuVector *vF = (GpuVector *)Fp; + *vF = a; + } + template + accelerator_inline void operator()(GpuComplexVector a, P* Fp){ + GpuComplexVector *vF = (GpuComplexVector *)Fp; + *vF = a; + } + }; + + struct Vstream{ + template + accelerator_inline void operator()(P* F,GpuVector a){ + GpuVector *vF = (GpuVector *)F; + *vF = a; + } + template + accelerator_inline void operator()(P* F,GpuComplexVector a){ + GpuComplexVector *vF = (GpuComplexVector *)F; + *vF = a; + } + }; + + struct Vset{ + // Complex float + accelerator_inline GpuVectorCF operator()(Grid::ComplexF *a){ + typedef GpuVectorCF vec; + vec ret; + for(int i=0;i + struct Reduce{ + //Need templated class to overload output type + //General form must generate error if compiled + accelerator_inline Out_type operator()(In_type in){ + printf("Error, using wrong Reduce function\n"); + exit(1); + return 0; + } + }; + + ///////////////////////////////////////////////////// + // Arithmetic operations + ///////////////////////////////////////////////////// + struct Sum{ + //Real float + accelerator_inline GpuVectorRF operator()(GpuVectorRF a,GpuVectorRF b){ + return a+b; + } + accelerator_inline GpuVectorRD operator()(GpuVectorRD a,GpuVectorRD b){ + return a+b; + } + accelerator_inline GpuVectorCF operator()(GpuVectorCF a,GpuVectorCF b){ + return a+b; + } + accelerator_inline GpuVectorCD operator()(GpuVectorCD a,GpuVectorCD b){ + return a+b; + } + accelerator_inline GpuVectorI operator()(GpuVectorI a,GpuVectorI b){ + return a+b; + } + }; + + struct Sub{ + accelerator_inline GpuVectorRF operator()(GpuVectorRF a,GpuVectorRF b){ + return a-b; + } + accelerator_inline GpuVectorRD operator()(GpuVectorRD a,GpuVectorRD b){ + return a-b; + } + accelerator_inline GpuVectorCF operator()(GpuVectorCF a,GpuVectorCF b){ + return a-b; + } + accelerator_inline GpuVectorCD operator()(GpuVectorCD a,GpuVectorCD b){ + return a-b; + } + accelerator_inline GpuVectorI operator()(GpuVectorI a,GpuVectorI b){ + return a-b; + } + }; + + struct MultRealPart{ + accelerator_inline GpuVectorCF operator()(GpuVectorCF a,GpuVectorCF b){ + typedef GpuVectorCF vec; + vec ret; + for(int i=0;i + static accelerator_inline GpuVector<_N,_datum> PermuteN(GpuVector<_N,_datum> &in) { + typedef GpuVector<_N,_datum> vec; + vec out; + unsigned int _mask = vec::N >> (n + 1); + for(int i=0;i + static accelerator_inline GpuComplexVector<_N,_datum> PermuteN(GpuComplexVector<_N,_datum> &in) { + typedef GpuComplexVector<_N,_datum> vec; + vec out; + unsigned int _mask = vec::N >> (n + 1); + for(int i=0;i static accelerator_inline vec Permute0(vec in) { return PermuteN<0,vec::N,typename vec::datum>(in); } + template static accelerator_inline vec Permute1(vec in) { return PermuteN<1,vec::N,typename vec::datum>(in); } + template static accelerator_inline vec Permute2(vec in) { return PermuteN<2,vec::N,typename vec::datum>(in); } + template static accelerator_inline vec Permute3(vec in) { return PermuteN<3,vec::N,typename vec::datum>(in); } + + }; + + struct PrecisionChange { + + //////////////////////////////////////////////////////////////////////////////////// + // Single / Half + //////////////////////////////////////////////////////////////////////////////////// + static accelerator_inline GpuVectorCH StoH (GpuVectorCF a,GpuVectorCF b) { + int N = GpuVectorCF::N; + GpuVectorCH h; + for(int i=0;i + static accelerator_inline void ExchangeN(GpuVector<_N,_datum> &out1, + GpuVector<_N,_datum> &out2, + GpuVector<_N,_datum> &in1, + GpuVector<_N,_datum> &in2 ) + { + typedef GpuVector<_N,_datum> vec; + unsigned int mask = vec::N >> (n + 1); + for(int i=0;i + static accelerator_inline void ExchangeN(GpuComplexVector<_N,_datum> &out1, + GpuComplexVector<_N,_datum> &out2, + GpuComplexVector<_N,_datum> &in1, + GpuComplexVector<_N,_datum> &in2 ) + { + typedef GpuComplexVector<_N,_datum> vec; + unsigned int mask = vec::N >> (n + 1); + for(int i=0;i + static accelerator_inline void Exchange0(vec &out1,vec &out2,vec &in1,vec &in2){ + ExchangeN<0>(out1,out2,in1,in2); + }; + template + static accelerator_inline void Exchange1(vec &out1,vec &out2,vec &in1,vec &in2){ + ExchangeN<1>(out1,out2,in1,in2); + }; + template + static accelerator_inline void Exchange2(vec &out1,vec &out2,vec &in1,vec &in2){ + ExchangeN<2>(out1,out2,in1,in2); + }; + template + static accelerator_inline void Exchange3(vec &out1,vec &out2,vec &in1,vec &in2){ + ExchangeN<3>(out1,out2,in1,in2); + }; + +}; + +struct Rotate{ + + template static accelerator_inline vec tRotate(vec in){ + return rotate(in, n); + } + + template + static accelerator_inline GpuComplexVector<_N,_datum> rotate_template(GpuComplexVector<_N,_datum> &in, int n) + { + typedef GpuComplexVector<_N,_datum> vec; + vec out; + for(int i=0;i + static accelerator_inline GpuVector<_N,_datum> rotate_template(GpuVector<_N,_datum> &in, int n) + { + typedef GpuVector<_N,_datum> vec; + vec out; + for(int i=0;i + accelerator_inline Grid::ComplexF + Reduce::operator()(GpuVectorCF in) + { + Grid::ComplexF greduce(in.rrrr[0],in.iiii[0]); + for(int i=1;i + accelerator_inline Grid::ComplexD + Reduce::operator()(GpuVectorCD in) + { + Grid::ComplexD greduce(in.rrrr[0],in.iiii[0]); + for(int i=1;i + accelerator_inline Grid::RealF + Reduce::operator()(GpuVectorRF in) + { + RealF ret = in.rrrr[0]; + for(int i=1;i + accelerator_inline Grid::RealD + Reduce::operator()(GpuVectorRD in) + { + RealD ret = in.rrrr[0]; + for(int i=1;i + accelerator_inline Integer + Reduce::operator()(GpuVectorI in) + { + Integer ret = in.rrrr[0]; + for(int i=1;i using ReduceSIMD = Optimization::Reduce; + + // Arithmetic operations + typedef Optimization::Sum SumSIMD; + typedef Optimization::Sub SubSIMD; + typedef Optimization::Div DivSIMD; + typedef Optimization::Mult MultSIMD; + typedef Optimization::MultComplex MultComplexSIMD; + typedef Optimization::MultRealPart MultRealPartSIMD; + typedef Optimization::MaddRealPart MaddRealPartSIMD; + typedef Optimization::Conj ConjSIMD; + typedef Optimization::TimesMinusI TimesMinusISIMD; + typedef Optimization::TimesI TimesISIMD; + +} diff --git a/Grid/simd/Grid_gpu_vec.h b/Grid/simd/Grid_gpu_vec.h index b2c7588f..6f4528c7 100644 --- a/Grid/simd/Grid_gpu_vec.h +++ b/Grid/simd/Grid_gpu_vec.h @@ -38,7 +38,7 @@ Author: Peter Boyle #ifdef GRID_HIP #include #endif -#ifdef GRID_SYCL +#if !defined(GRID_CUDA) && !defined(GRID_HIP) namespace Grid { typedef struct { uint16_t x;} half; typedef struct { half x; half y;} half2; @@ -486,7 +486,7 @@ namespace Optimization { struct TimesMinusI{ //Complex single - accelerator_inline GpuVectorCF operator()(GpuVectorCF in,GpuVectorCF dummy){ + accelerator_inline GpuVectorCF operator()(GpuVectorCF in){ typedef GpuVectorCF vec; vec ret; for(int i=0;i ai -ar ai -br float32x4_t r0, r1; r0 = vnegq_f32(in); // -ar -ai -br -bi @@ -328,7 +328,7 @@ struct TimesMinusI{ return vtrn1q_f32(r1, r0); // ar -ai br -bi } //Complex double - inline float64x2_t operator()(float64x2_t in, float64x2_t ret){ + inline float64x2_t operator()(float64x2_t in){ // a ib -> b -ia float64x2_t tmp; tmp = vnegq_f64(in); @@ -338,7 +338,7 @@ struct TimesMinusI{ struct TimesI{ //Complex single - inline float32x4_t operator()(float32x4_t in, float32x4_t ret){ + inline float32x4_t operator()(float32x4_t in){ // ar ai br bi -> -ai ar -bi br float32x4_t r0, r1; r0 = vnegq_f32(in); // -ar -ai -br -bi @@ -346,7 +346,7 @@ struct TimesI{ return vtrn1q_f32(r1, in); // -ai ar -bi br } //Complex double - inline float64x2_t operator()(float64x2_t in, float64x2_t ret){ + inline float64x2_t operator()(float64x2_t in){ // a ib -> -b ia float64x2_t tmp; tmp = vnegq_f64(in); diff --git a/Grid/simd/Grid_qpx.h b/Grid/simd/Grid_qpx.h index d93dbc5b..a1b08514 100644 --- a/Grid/simd/Grid_qpx.h +++ b/Grid/simd/Grid_qpx.h @@ -356,7 +356,7 @@ struct Conj{ struct TimesMinusI{ //Complex double - inline vector4double operator()(vector4double v, vector4double ret){ + inline vector4double operator()(vector4double v){ return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.}, (vector4double){0., 0., 0., 0.}); } @@ -367,7 +367,7 @@ struct TimesMinusI{ struct TimesI{ //Complex double - inline vector4double operator()(vector4double v, vector4double ret){ + inline vector4double operator()(vector4double v){ return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.}, (vector4double){0., 0., 0., 0.}); } diff --git a/Grid/simd/Grid_sse4.h b/Grid/simd/Grid_sse4.h index eb76427e..cb93dd3e 100644 --- a/Grid/simd/Grid_sse4.h +++ b/Grid/simd/Grid_sse4.h @@ -35,7 +35,7 @@ Author: neo */ // Time-stamp: <2015-06-16 23:27:54 neo> //---------------------------------------------------------------------- - +#include #include NAMESPACE_BEGIN(Grid); @@ -273,27 +273,25 @@ struct Conj{ struct TimesMinusI{ //Complex single - inline __m128 operator()(__m128 in, __m128 ret){ + inline __m128 operator()(__m128 in){ __m128 tmp =_mm_addsub_ps(_mm_setzero_ps(),in); // r,-i return _mm_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(2,3,0,1)); } //Complex double - inline __m128d operator()(__m128d in, __m128d ret){ + inline __m128d operator()(__m128d in){ __m128d tmp =_mm_addsub_pd(_mm_setzero_pd(),in); // r,-i return _mm_shuffle_pd(tmp,tmp,0x1); } - - }; struct TimesI{ //Complex single - inline __m128 operator()(__m128 in, __m128 ret){ + inline __m128 operator()(__m128 in){ __m128 tmp =_mm_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); return _mm_addsub_ps(_mm_setzero_ps(),tmp); // r,-i } //Complex double - inline __m128d operator()(__m128d in, __m128d ret){ + inline __m128d operator()(__m128d in){ __m128d tmp = _mm_shuffle_pd(in,in,0x1); return _mm_addsub_pd(_mm_setzero_pd(),tmp); // r,-i } diff --git a/Grid/simd/Grid_vector_types.h b/Grid/simd/Grid_vector_types.h index 4f952bb2..daf41cae 100644 --- a/Grid/simd/Grid_vector_types.h +++ b/Grid/simd/Grid_vector_types.h @@ -110,11 +110,10 @@ accelerator_inline Grid_half sfw_float_to_half(float ff) { #ifdef GPU_VEC #include "Grid_gpu_vec.h" #endif -/* -#ifdef GEN -#include "Grid_generic.h" + +#ifdef GPU_RRII +#include "Grid_gpu_rrii.h" #endif -*/ #ifdef GEN #if defined(A64FX) || defined(A64FXFIXEDSIZE) // breakout A64FX SVE ACLE here @@ -131,7 +130,6 @@ accelerator_inline Grid_half sfw_float_to_half(float ff) { #include "Grid_a64fx-fixedsize.h" #endif #else - //#pragma message("building GEN") // generic #include "Grid_generic.h" #endif #endif @@ -150,23 +148,6 @@ accelerator_inline Grid_half sfw_float_to_half(float ff) { #endif #endif -/* -#ifdef A64FXVLA -#pragma message("building A64FX VLA") -#if defined(ARMCLANGCOMPAT) - #pragma message("applying data types patch") -#endif -#include -#include "Grid_a64fx-2.h" -#endif - -#ifdef A64FXVLS -#pragma message("building A64FX VLS") -#include -#include "Grid_a64fx-fixedsize.h" -#endif -*/ - #ifdef SSE4 #include "Grid_sse4.h" #endif @@ -270,12 +251,14 @@ public: typedef Vector_type vector_type; typedef Scalar_type scalar_type; + /* typedef union conv_t_union { Vector_type v; Scalar_type s[sizeof(Vector_type) / sizeof(Scalar_type)]; accelerator_inline conv_t_union(){}; } conv_t; - + */ + Vector_type v; static accelerator_inline constexpr int Nsimd(void) { @@ -555,15 +538,13 @@ public: template friend accelerator_inline Grid_simd SimdApply(const functor &func, const Grid_simd &v) { Grid_simd ret; - Grid_simd::conv_t conv; Grid_simd::scalar_type s; - conv.v = v.v; for (int i = 0; i < Nsimd(); i++) { - s = conv.s[i]; - conv.s[i] = func(s); + s = v.getlane(i); + s = func(s); + ret.putlane(s,i); } - ret.v = conv.v; return ret; } template @@ -571,18 +552,14 @@ public: const Grid_simd &x, const Grid_simd &y) { Grid_simd ret; - Grid_simd::conv_t cx; - Grid_simd::conv_t cy; Grid_simd::scalar_type sx,sy; - cx.v = x.v; - cy.v = y.v; for (int i = 0; i < Nsimd(); i++) { - sx = cx.s[i]; - sy = cy.s[i]; - cx.s[i] = func(sx,sy); + sx = x.getlane(i); + sy = y.getlane(i); + sx = func(sx,sy); + ret.putlane(sx,i); } - ret.v = cx.v; return ret; } /////////////////////// @@ -645,15 +622,36 @@ public: /////////////////////////////// // Getting single lanes /////////////////////////////// - accelerator_inline Scalar_type getlane(int lane) { +#ifdef GPU_RRII + template = 0> + accelerator_inline Scalar_type getlane(int lane) const { + return Scalar_type(v.rrrr[lane],v.iiii[lane]); + } + template = 0> + accelerator_inline void putlane(const Scalar_type &_S, int lane){ + v.rrrr[lane] = real(_S); + v.iiii[lane] = imag(_S); + } + template = 0> + accelerator_inline Scalar_type getlane(int lane) const { + return ((S*)&v)[lane]; + } + template = 0> + accelerator_inline void putlane(const S &_S, int lane){ + ((Scalar_type*)&v)[lane] = _S; + } +#else // Can pun to an array of complex + accelerator_inline Scalar_type getlane(int lane) const { return ((Scalar_type*)&v)[lane]; } - accelerator_inline void putlane(const Scalar_type &S, int lane){ ((Scalar_type*)&v)[lane] = S; } +#endif + }; // end of Grid_simd class definition + /////////////////////////////// // Define available types /////////////////////////////// @@ -663,7 +661,7 @@ typedef Grid_simd vRealD; typedef Grid_simd vInteger; typedef Grid_simd vRealH; -#ifdef GPU_VEC +#if defined(GPU_VEC) || defined(GPU_RRII) typedef Grid_simd, SIMD_CHtype> vComplexH; typedef Grid_simd , SIMD_CFtype> vComplexF; typedef Grid_simd , SIMD_CDtype> vComplexD; @@ -763,6 +761,7 @@ accelerator_inline void vsplat(Grid_simd &ret, NotEnableIf, } ////////////////////////// + /////////////////////////////////////////////// // Initialise to 1,0,i for the correct types /////////////////////////////////////////////// @@ -907,34 +906,6 @@ accelerator_inline Grid_simd fxmac(Grid_simd a, Grid_simd b, G // ---------------------------------------------- -// Distinguish between complex types and others -template = 0> -accelerator_inline Grid_simd operator/(Grid_simd a, Grid_simd b) { - typedef Grid_simd simd; - - simd ret; - simd den; - typename simd::conv_t conv; - - ret = a * conjugate(b) ; - den = b * conjugate(b) ; - - // duplicates real part - auto real_den = toReal(den); - simd zden; - memcpy((void *)&zden.v,(void *)&real_den.v,sizeof(zden)); - ret.v=binary(ret.v, zden.v, DivSIMD()); - return ret; -}; - -// Real/Integer types -template = 0> -accelerator_inline Grid_simd operator/(Grid_simd a, Grid_simd b) { - Grid_simd ret; - ret.v = binary(a.v, b.v, DivSIMD()); - return ret; -}; - /////////////////////// // Conjugate /////////////////////// @@ -959,30 +930,29 @@ accelerator_inline Grid_simd adj(const Grid_simd &in) { /////////////////////// template = 0> accelerator_inline void timesMinusI(Grid_simd &ret, const Grid_simd &in) { - ret.v = binary(in.v, ret.v, TimesMinusISIMD()); + ret.v = unary(in.v, TimesMinusISIMD()); } template = 0> accelerator_inline Grid_simd timesMinusI(const Grid_simd &in) { Grid_simd ret; - timesMinusI(ret, in); + ret.v=unary(in.v, TimesMinusISIMD()); return ret; } template = 0> accelerator_inline Grid_simd timesMinusI(const Grid_simd &in) { return in; } - /////////////////////// // timesI /////////////////////// template = 0> accelerator_inline void timesI(Grid_simd &ret, const Grid_simd &in) { - ret.v = binary(in.v, ret.v, TimesISIMD()); + ret.v = unary(in.v, TimesISIMD()); } template = 0> accelerator_inline Grid_simd timesI(const Grid_simd &in) { Grid_simd ret; - timesI(ret, in); + ret.v= unary(in.v, TimesISIMD()); return ret; } template = 0> @@ -990,6 +960,35 @@ accelerator_inline Grid_simd timesI(const Grid_simd &in) { return in; } + +// Distinguish between complex types and others +template = 0> +accelerator_inline Grid_simd operator/(Grid_simd a, Grid_simd b) { + typedef Grid_simd simd; + + simd ret; + simd den; + + ret = a * conjugate(b) ; + den = b * conjugate(b) ; + + // duplicates real part + auto real_den = toReal(den); + simd zden; + memcpy((void *)&zden.v,(void *)&real_den.v,sizeof(zden)); + ret.v=binary(ret.v, zden.v, DivSIMD()); + return ret; +}; + +// Real/Integer types +template = 0> +accelerator_inline Grid_simd operator/(Grid_simd a, Grid_simd b) { + Grid_simd ret; + ret.v = binary(a.v, b.v, DivSIMD()); + return ret; +}; + + ///////////////////// // Inner, outer ///////////////////// @@ -1021,12 +1020,12 @@ template // must be a real arg accelerator_inline typename toRealMapper::Realified toReal(const Csimd &in) { typedef typename toRealMapper::Realified Rsimd; Rsimd ret; - typename Rsimd::conv_t conv; - memcpy((void *)&conv.v,(void *)&in.v,sizeof(conv.v)); + int j=0; for (int i = 0; i < Rsimd::Nsimd(); i += 2) { - conv.s[i + 1] = conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc... + auto s = real(in.getlane(j++)); + ret.putlane(s,i); + ret.putlane(s,i+1); } - memcpy((void *)&ret.v,(void *)&conv.v,sizeof(ret.v)); return ret; } @@ -1039,23 +1038,24 @@ template // must be a real arg accelerator_inline typename toComplexMapper::Complexified toComplex(const Rsimd &in) { typedef typename toComplexMapper::Complexified Csimd; - typename Rsimd::conv_t conv; // address as real - - conv.v = in.v; + typedef typename Csimd::scalar_type scalar_type; + int j=0; + Csimd ret; for (int i = 0; i < Rsimd::Nsimd(); i += 2) { - assert(conv.s[i + 1] == conv.s[i]); + auto rr = in.getlane(i); + auto ri = in.getlane(i+1); + assert(rr==ri); // trap any cases where real was not duplicated // indicating the SIMD grids of real and imag assignment did not correctly // match - conv.s[i + 1] = 0.0; // zero imaginary parts + scalar_type s(rr,0.0); + ret.putlane(s,j++); } - Csimd ret; - memcpy((void *)&ret.v,(void *)&conv.v,sizeof(ret.v)); return ret; } -accelerator_inline void precisionChange(vRealF *out,vRealD *in,int nvec) +accelerator_inline void precisionChange(vRealF *out,const vRealD *in,int nvec) { assert((nvec&0x1)==0); for(int m=0;m*2 -struct is_simd : public std::false_type {}; -template <> struct is_simd : public std::true_type {}; -template <> struct is_simd : public std::true_type {}; -template <> struct is_simd : public std::true_type {}; -template <> struct is_simd : public std::true_type {}; -template <> struct is_simd : public std::true_type {}; -template <> struct is_simd : public std::true_type {}; -template <> struct is_simd : public std::true_type {}; - -template using IfSimd = Invoke::value, int> >; -template using IfNotSimd = Invoke::value, unsigned> >; NAMESPACE_END(Grid); diff --git a/Grid/simd/Grid_vector_unops.h b/Grid/simd/Grid_vector_unops.h index b89bb785..35f1721c 100644 --- a/Grid/simd/Grid_vector_unops.h +++ b/Grid/simd/Grid_vector_unops.h @@ -29,8 +29,7 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_VECTOR_UNOPS -#define GRID_VECTOR_UNOPS +#pragma once #include @@ -112,6 +111,9 @@ template struct ImagFunctor { accelerator scalar operator()(const scalar &a) const { return imag(a); } }; +///////////// +// Unary operations +///////////// template accelerator_inline Grid_simd real(const Grid_simd &r) { return SimdApply(RealFunctor(), r); @@ -168,6 +170,65 @@ template accelerator_inline Grid_simd div(const Grid_simd &r, Integer y) { return SimdApply(DivIntFunctor(y), r); } +/// Double 2 cases +template +accelerator_inline Grid_simd2 real(const Grid_simd2 &r) { + return SimdApply(RealFunctor(), r); +} +template +accelerator_inline Grid_simd2 imag(const Grid_simd2 &r) { + return SimdApply(ImagFunctor(), r); +} +template +accelerator_inline Grid_simd2 sqrt(const Grid_simd2 &r) { + return SimdApply(SqrtRealFunctor(), r); +} +template +accelerator_inline Grid_simd2 cos(const Grid_simd2 &r) { + return SimdApply(CosRealFunctor(), r); +} +template +accelerator_inline Grid_simd2 sin(const Grid_simd2 &r) { + return SimdApply(SinRealFunctor(), r); +} +template +accelerator_inline Grid_simd2 acos(const Grid_simd2 &r) { + return SimdApply(AcosRealFunctor(), r); +} +template +accelerator_inline Grid_simd2 asin(const Grid_simd2 &r) { + return SimdApply(AsinRealFunctor(), r); +} +template +accelerator_inline Grid_simd2 log(const Grid_simd2 &r) { + return SimdApply(LogRealFunctor(), r); +} +template +accelerator_inline Grid_simd2 abs(const Grid_simd2 &r) { + return SimdApply(AbsRealFunctor(), r); +} +template +accelerator_inline Grid_simd2 exp(const Grid_simd2 &r) { + return SimdApply(ExpFunctor(), r); +} +template +accelerator_inline Grid_simd2 Not(const Grid_simd2 &r) { + return SimdApply(NotFunctor(), r); +} +template +accelerator_inline Grid_simd2 pow(const Grid_simd2 &r, double y) { + return SimdApply(PowRealFunctor(y), r); +} +template +accelerator_inline Grid_simd2 mod(const Grid_simd2 &r, Integer y) { + return SimdApply(ModIntFunctor(y), r); +} +template +accelerator_inline Grid_simd2 div(const Grid_simd2 &r, Integer y) { + return SimdApply(DivIntFunctor(y), r); +} + + //////////////////////////////////////////////////////////////////////////// // Allows us to assign into **conformable** real vectors from complex //////////////////////////////////////////////////////////////////////////// @@ -193,23 +254,22 @@ struct OrOrFunctor { //////////////////////////////// template accelerator_inline Grid_simd operator&(const Grid_simd &x, - const Grid_simd &y) { + const Grid_simd &y) { return SimdApplyBinop(AndFunctor(), x, y); } template accelerator_inline Grid_simd operator&&(const Grid_simd &x, - const Grid_simd &y) { + const Grid_simd &y) { return SimdApplyBinop(AndAndFunctor(), x, y); } template accelerator_inline Grid_simd operator|(const Grid_simd &x, - const Grid_simd &y) { + const Grid_simd &y) { return SimdApplyBinop(OrFunctor(), x, y); } template accelerator_inline Grid_simd operator||(const Grid_simd &x, - const Grid_simd &y) { + const Grid_simd &y) { return SimdApplyBinop(OrOrFunctor(), x, y); } NAMESPACE_END(Grid); -#endif diff --git a/Grid/simd/Simd.h b/Grid/simd/Simd.h index 76ca3bef..78fae298 100644 --- a/Grid/simd/Simd.h +++ b/Grid/simd/Simd.h @@ -69,6 +69,7 @@ typedef RealF Real; typedef thrust::complex ComplexF; typedef thrust::complex ComplexD; typedef thrust::complex Complex; +typedef thrust::complex ComplexH; template using complex = thrust::complex; accelerator_inline ComplexD pow(const ComplexD& r,RealD y){ return(thrust::pow(r,(double)y)); } @@ -77,6 +78,7 @@ accelerator_inline ComplexF pow(const ComplexF& r,RealF y){ return(thrust::pow(r typedef std::complex ComplexF; typedef std::complex ComplexD; typedef std::complex Complex; +typedef std::complex ComplexH; // Hack template using complex = std::complex; accelerator_inline ComplexD pow(const ComplexD& r,RealD y){ return(std::pow(r,y)); } @@ -224,18 +226,14 @@ accelerator_inline void Gpermute(VectorSIMD &y,const VectorSIMD &b,int perm); NAMESPACE_END(Grid); #include +#include #include NAMESPACE_BEGIN(Grid); -// Default precision -#ifdef GRID_DEFAULT_PRECISION_DOUBLE + +// Default precision is wired to double typedef vRealD vReal; typedef vComplexD vComplex; -#else -typedef vRealF vReal; -typedef vComplexF vComplex; -#endif - inline std::ostream& operator<< (std::ostream& stream, const vComplexF &o){ int nn=vComplexF::Nsimd(); @@ -262,6 +260,13 @@ inline std::ostream& operator<< (std::ostream& stream, const vComplexD &o){ stream<<">"; return stream; } +inline std::ostream& operator<< (std::ostream& stream, const vComplexD2 &o){ + stream<<"<"; + stream<"; + return stream; +} inline std::ostream& operator<< (std::ostream& stream, const vRealF &o){ int nn=vRealF::Nsimd(); diff --git a/Grid/stencil/SimpleCompressor.h b/Grid/stencil/SimpleCompressor.h index 1150b234..dabd70a6 100644 --- a/Grid/stencil/SimpleCompressor.h +++ b/Grid/stencil/SimpleCompressor.h @@ -3,26 +3,108 @@ NAMESPACE_BEGIN(Grid); -template -accelerator_inline void exchangeSIMT(vobj &mp0,vobj &mp1,const vobj &vp0,const vobj &vp1,Integer type) -{ - typedef decltype(coalescedRead(mp0)) sobj; - unsigned int Nsimd = vobj::Nsimd(); - unsigned int mask = Nsimd >> (type + 1); - int lane = acceleratorSIMTlane(Nsimd); - int j0 = lane &(~mask); // inner coor zero - int j1 = lane |(mask) ; // inner coor one - const vobj *vpa = &vp0; - const vobj *vpb = &vp1; - const vobj *vp = (lane&mask) ? (vpb) : (vpa); - auto sa = coalescedRead(vp[0],j0); - auto sb = coalescedRead(vp[0],j1); - coalescedWrite(mp0,sa); - coalescedWrite(mp1,sb); -} +class SimpleStencilParams{ +public: + Coordinate dirichlet; + int partialDirichlet; + SimpleStencilParams() { partialDirichlet = 0; }; +}; -template -class SimpleCompressor { + +// Compressors will inherit buffer management policies +// Standard comms buffer management +class FaceGatherSimple +{ +public: + static int PartialCompressionFactor(GridBase *grid) {return 1;}; + // Decompress is after merge so ok + template + static void Gather_plane_simple (commVector >& table, + const Lattice &rhs, + cobj *buffer, + compressor &compress, + int off,int so,int partial) + { + int num=table.size(); + std::pair *table_v = & table[0]; + + auto rhs_v = rhs.View(AcceleratorRead); + accelerator_forNB( i,num, vobj::Nsimd(), { + compress.Compress(buffer[off+table_v[i].first],rhs_v[so+table_v[i].second]); + }); + rhs_v.ViewClose(); + } + template + static void Gather_plane_exchange(commVector >& table,const Lattice &rhs, + std::vector pointers,int dimension,int plane,int cbmask, + compressor &compress,int type,int partial) + { + assert( (table.size()&0x1)==0); + int num=table.size()/2; + int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane + + auto rhs_v = rhs.View(AcceleratorRead); + auto p0=&pointers[0][0]; + auto p1=&pointers[1][0]; + auto tp=&table[0]; + auto rhs_p = &rhs_v[0]; + accelerator_forNB(j, num, vobj::Nsimd(), { + compress.CompressExchange(p0[j],p1[j], + rhs_p[so+tp[2*j ].second], + rhs_p[so+tp[2*j+1].second], + type); + }); + rhs_v.ViewClose(); + } + + template + static void DecompressFace(decompressor decompress,Decompression &dd) + { + auto kp = dd.kernel_p; + auto mp = dd.mpi_p; + accelerator_forNB(o,dd.buffer_size,1,{ + decompress.Decompress(kp[o],mp[o]); + }); + } + template + static void MergeFace(decompressor decompress,Merger &mm) + { + auto mp = &mm.mpointer[0]; + auto vp0= &mm.vpointers[0][0]; + auto vp1= &mm.vpointers[1][0]; + auto type= mm.type; + accelerator_forNB(o,mm.buffer_size/2,Merger::Nsimd,{ + decompress.Exchange(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); + }); + } +}; + +//////////////////////////////////// +// Wilson compressor will add alternate policies for Dirichlet +// and possibly partial Dirichlet for DWF +//////////////////////////////////// +/* +class FaceGatherDirichlet +{ + // If it's dirichlet we don't assemble comms buffers + // + // Rely on zeroes in gauge field to drive the correct result + // NAN propgagation: field will locally wrap, so fermion should NOT contain NAN and just permute + template + static void Gather_plane_simple (commVector >& table,const Lattice &rhs,cobj *buffer,compressor &compress, int off,int so){}; + template + static void Gather_plane_exchange(commVector >& table,const Lattice &rhs, + Vector pointers,int dimension,int plane,int cbmask, + compressor &compress,int type) {} + template + static void Merge(decompressor decompress,Merge &mm) { } + template + static void Decompress(decompressor decompress,Decompression &dd) {} +}; +*/ + +template +class SimpleCompressorGather : public FaceGather { public: void Point(int) {}; accelerator_inline int CommDatumSize(void) const { return sizeof(vobj); } @@ -30,20 +112,19 @@ public: accelerator_inline void Compress(vobj &buf,const vobj &in) const { coalescedWrite(buf,coalescedRead(in)); } - accelerator_inline void Exchange(vobj *mp,vobj *vp0,vobj *vp1,Integer type,Integer o) const { + accelerator_inline void Exchange(vobj &mp0,vobj &mp1,vobj &vp0,vobj &vp1,Integer type) const { #ifdef GRID_SIMT - exchangeSIMT(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); + exchangeSIMT(mp0,mp1,vp0,vp1,type); #else - exchange(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); + exchange(mp0,mp1,vp0,vp1,type); #endif } - accelerator_inline void Decompress(vobj *out,vobj *in, int o) const { assert(0); } - accelerator_inline void CompressExchange(vobj *out0,vobj *out1,const vobj *in, - int j,int k, int m,int type) const { + accelerator_inline void Decompress(vobj &out,vobj &in) const { }; + accelerator_inline void CompressExchange(vobj &out0,vobj &out1,const vobj &in0,const vobj &in1,int type) const { #ifdef GRID_SIMT - exchangeSIMT(out0[j],out1[j],in[k],in[m],type); + exchangeSIMT(out0,out1,in0,in1,type); #else - exchange(out0[j],out1[j],in[k],in[m],type); + exchange(out0,out1,in0,in1,type); #endif } // For cshift. Cshift should drop compressor coupling altogether @@ -53,6 +134,18 @@ public: } }; +// Standard compressor never needs dirichlet. +// +// Get away with a local period wrap and rely on dirac operator to use a zero gauge link as it is faster +// +// Compressors that inherit Dirichlet and Non-dirichlet behaviour. +// +// Currently run-time behaviour through StencilParameters paramaters, p.dirichlet +// combined with the FaceGatherSimple behaviour + +template using SimpleCompressor = SimpleCompressorGather; +//template using SimpleCompressorDirichlet = SimpleCompressorGather; + NAMESPACE_END(Grid); #endif diff --git a/Grid/stencil/Stencil.cc b/Grid/stencil/Stencil.cc index c1b33baa..27dc75ed 100644 --- a/Grid/stencil/Stencil.cc +++ b/Grid/stencil/Stencil.cc @@ -29,6 +29,27 @@ NAMESPACE_BEGIN(Grid); +uint64_t DslashFullCount; +uint64_t DslashPartialCount; +uint64_t DslashDirichletCount; + +void DslashResetCounts(void) +{ + DslashFullCount=0; + DslashPartialCount=0; + DslashDirichletCount=0; +} +void DslashGetCounts(uint64_t &dirichlet,uint64_t &partial,uint64_t &full) +{ + dirichlet = DslashDirichletCount; + partial = DslashPartialCount; + full = DslashFullCount; +} +void DslashLogFull(void) { DslashFullCount++;} +void DslashLogPartial(void) { DslashPartialCount++;} +void DslashLogDirichlet(void){ DslashDirichletCount++;} + + void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask, int off,std::vector > & table) { diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 65d878cb..40f224e6 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -52,6 +52,16 @@ NAMESPACE_BEGIN(Grid); +// These can move into a params header and be given MacroMagic serialisation +struct DefaultImplParams { + Coordinate dirichlet; // Blocksize of dirichlet BCs + int partialDirichlet; + DefaultImplParams() { + dirichlet.resize(0); + partialDirichlet=0; + }; +}; + /////////////////////////////////////////////////////////////////// // Gather for when there *is* need to SIMD split with compression /////////////////////////////////////////////////////////////////// @@ -59,6 +69,7 @@ NAMESPACE_BEGIN(Grid); void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask, int off,std::vector > & table); +/* template void Gather_plane_simple_table (commVector >& table,const Lattice &rhs,cobj *buffer,compressor &compress, int off,int so) __attribute__((noinline)); @@ -107,6 +118,13 @@ void Gather_plane_exchange_table(commVector >& table, }); rhs_v.ViewClose(); } +*/ + +void DslashResetCounts(void); +void DslashGetCounts(uint64_t &dirichlet,uint64_t &partial,uint64_t &full); +void DslashLogFull(void); +void DslashLogPartial(void); +void DslashLogDirichlet(void); struct StencilEntry { #ifdef GRID_CUDA @@ -137,6 +155,18 @@ class CartesianStencilAccelerator { int _osites; StencilVector _directions; StencilVector _distances; + /////////////////////////////////////////////////// + // If true, this is FULLY communicated per face + // Otherwise will either be full or partial dirichlet + /////////////////////////////////////////////////// + StencilVector _comms_send; + StencilVector _comms_recv; // this is FULLY communicated per face + /////////////////////////////////////////////////// + // If true, this is partially communicated per face + /////////////////////////////////////////////////// + StencilVector _comms_partial_send; + StencilVector _comms_partial_recv; + // StencilVector _comm_buf_size; StencilVector _permute_type; StencilVector same_node; @@ -183,7 +213,7 @@ class CartesianStencilAccelerator { template class CartesianStencilView : public CartesianStencilAccelerator { - private: +public: int *closed; StencilEntry *cpu_ptr; ViewMode mode; @@ -218,7 +248,6 @@ class CartesianStencil : public CartesianStencilAccelerator View_type; typedef typename View_type::StencilVector StencilVector; @@ -230,27 +259,50 @@ public: void * recv_buf; Integer to_rank; Integer from_rank; - Integer bytes; + Integer do_send; + Integer do_recv; + Integer xbytes; + Integer rbytes; }; struct Merge { + static constexpr int Nsimd = vobj::Nsimd(); cobj * mpointer; // std::vector rpointers; std::vector vpointers; Integer buffer_size; Integer type; + Integer partial; // partial dirichlet BCs + Coordinate dims; }; struct Decompress { + static constexpr int Nsimd = vobj::Nsimd(); cobj * kernel_p; cobj * mpi_p; Integer buffer_size; + Integer partial; // partial dirichlet BCs + Coordinate dims; + }; + struct CopyReceiveBuffer { + void * from_p; + void * to_p; + Integer bytes; + }; + struct CachedTransfer { + Integer direction; + Integer OrthogPlane; + Integer DestProc; + Integer xbytes; + Integer rbytes; + Integer lane; + Integer cb; + void *recv_buf; }; - protected: GridBase * _grid; - public: GridBase *Grid(void) const { return _grid; } + LebesgueOrder *lo; //////////////////////////////////////////////////////////////////////// // Needed to conveniently communicate gparity parameters into GPU memory @@ -265,6 +317,8 @@ public: } int face_table_computed; + int partialDirichlet; + int fullDirichlet; std::vector > > face_table ; Vector surface_list; @@ -275,45 +329,26 @@ public: std::vector MergersSHM; std::vector Decompressions; std::vector DecompressionsSHM; - + std::vector CopyReceiveBuffers ; + std::vector CachedTransfers; + std::vector MpiReqs; + /////////////////////////////////////////////////////////// // Unified Comms buffers for all directions /////////////////////////////////////////////////////////// // Vectors that live on the symmetric heap in case of SHMEM // These are used; either SHM objects or refs to the above symmetric heap vectors // depending on comms target - Vector u_simd_send_buf; - Vector u_simd_recv_buf; + std::vector u_simd_send_buf; + std::vector u_simd_recv_buf; int u_comm_offset; int _unified_buffer_size; - ///////////////////////////////////////// - // Timing info; ugly; possibly temporary - ///////////////////////////////////////// - double commtime; - double mpi3synctime; - double mpi3synctime_g; - double shmmergetime; - double gathertime; - double gathermtime; - double halogtime; - double mergetime; - double decompresstime; - double comms_bytes; - double shm_bytes; - double splicetime; - double nosplicetime; - double calls; - std::vector comm_bytes_thr; - std::vector shm_bytes_thr; - std::vector comm_time_thr; - std::vector comm_enter_thr; - std::vector comm_leave_thr; - //////////////////////////////////////// // Stencil query //////////////////////////////////////// +#if 1 inline int SameNode(int point) { int dimension = this->_directions[point]; @@ -333,15 +368,49 @@ public: if ( displacement == 0 ) return 1; return 0; } +#else + // fancy calculation for shm code + inline int SameNode(int point) { + int dimension = this->_directions[point]; + int displacement = this->_distances[point]; + + int pd = _grid->_processors[dimension]; + int fd = _grid->_fdimensions[dimension]; + int ld = _grid->_ldimensions[dimension]; + int rd = _grid->_rdimensions[dimension]; + int simd_layout = _grid->_simd_layout[dimension]; + int comm_dim = _grid->_processors[dimension] >1 ; + + int recv_from_rank; + int xmit_to_rank; + + if ( ! comm_dim ) return 1; + + int nbr_proc; + if (displacement>0) nbr_proc = 1; + else nbr_proc = pd-1; + + // FIXME this logic needs to be sorted for three link term + // assert( (displacement==1) || (displacement==-1)); + // Present hack only works for >= 4^4 subvol per node + _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); + + void *shm = (void *) _grid->ShmBufferTranslate(recv_from_rank,this->u_recv_buf_p); + + if ( shm==NULL ) return 0; + return 1; + } +#endif ////////////////////////////////////////// // Comms packet queue for asynch thread // Use OpenMP Tasks for cleaner ??? + // must be called *inside* parallel region ////////////////////////////////////////// + /* void CommunicateThreaded() { #ifdef GRID_OMP - // must be called in parallel region int mythread = omp_get_thread_num(); int nthreads = CartesianCommunicator::nCommThreads; #else @@ -350,108 +419,52 @@ public: #endif if (nthreads == -1) nthreads = 1; if (mythread < nthreads) { - comm_enter_thr[mythread] = usecond(); for (int i = mythread; i < Packets.size(); i += nthreads) { uint64_t bytes = _grid->StencilSendToRecvFrom(Packets[i].send_buf, Packets[i].to_rank, Packets[i].recv_buf, Packets[i].from_rank, Packets[i].bytes,i); - comm_bytes_thr[mythread] += bytes; - shm_bytes_thr[mythread] += 2*Packets[i].bytes-bytes; // Send + Recv. - } - comm_leave_thr[mythread]= usecond(); - comm_time_thr[mythread] += comm_leave_thr[mythread] - comm_enter_thr[mythread]; } } - - void CollateThreads(void) - { - int nthreads = CartesianCommunicator::nCommThreads; - double first=0.0; - double last =0.0; - - for(int t=0;t 0.0) && ( t0 < first ) ) first = t0; // min time seen - - if ( t1 > last ) last = t1; // max time seen - - } - commtime+= last-first; - } + */ //////////////////////////////////////////////////////////////////////// // Non blocking send and receive. Necessarily parallel. //////////////////////////////////////////////////////////////////////// void CommunicateBegin(std::vector > &reqs) { - reqs.resize(Packets.size()); - commtime-=usecond(); for(int i=0;iStencilSendToRecvFromBegin(reqs[i], - Packets[i].send_buf, - Packets[i].to_rank, - Packets[i].recv_buf, - Packets[i].from_rank, - Packets[i].bytes,i); - comms_bytes+=bytes; - shm_bytes +=2*Packets[i].bytes-bytes; + _grid->StencilSendToRecvFromBegin(MpiReqs, + Packets[i].send_buf, + Packets[i].to_rank,Packets[i].do_send, + Packets[i].recv_buf, + Packets[i].from_rank,Packets[i].do_recv, + Packets[i].xbytes,Packets[i].rbytes,i); } - _grid->StencilBarrier();// Synch shared memory on a single nodes } void CommunicateComplete(std::vector > &reqs) { - for(int i=0;iStencilSendToRecvFromComplete(reqs[i],i); - } - commtime+=usecond(); + _grid->StencilSendToRecvFromComplete(MpiReqs,0); + if ( this->partialDirichlet ) DslashLogPartial(); + else if ( this->fullDirichlet ) DslashLogDirichlet(); + else DslashLogFull(); + acceleratorCopySynchronise(); + // Everyone agrees we are all done + _grid->StencilBarrier(); } //////////////////////////////////////////////////////////////////////// // Blocking send and receive. Either sequential or parallel. //////////////////////////////////////////////////////////////////////// void Communicate(void) { - if ( 0 ){ - thread_region { - // must be called in parallel region - int mythread = thread_num(); - int maxthreads= thread_max(); - int nthreads = CartesianCommunicator::nCommThreads; - assert(nthreads <= maxthreads); - if (nthreads == -1) nthreads = 1; - if (mythread < nthreads) { - for (int i = mythread; i < Packets.size(); i += nthreads) { - double start = usecond(); - uint64_t bytes= _grid->StencilSendToRecvFrom(Packets[i].send_buf, - Packets[i].to_rank, - Packets[i].recv_buf, - Packets[i].from_rank, - Packets[i].bytes,i); - comm_bytes_thr[mythread] += bytes; - shm_bytes_thr[mythread] += Packets[i].bytes - bytes; - comm_time_thr[mythread] += usecond() - start; - } - } - } - } else { // Concurrent and non-threaded asynch calls to MPI - std::vector > reqs; - this->CommunicateBegin(reqs); - this->CommunicateComplete(reqs); - } + ///////////////////////////////////////////////////////// + // Concurrent and non-threaded asynch calls to MPI + ///////////////////////////////////////////////////////// + std::vector > reqs; + this->CommunicateBegin(reqs); + this->CommunicateComplete(reqs); } template void HaloExchange(const Lattice &source,compressor &compress) @@ -489,31 +502,23 @@ public: sshift[1] = _grid->CheckerBoardShiftForCB(this->_checkerboard,dimension,shift,Odd); if ( sshift[0] == sshift[1] ) { if (splice_dim) { - splicetime-=usecond(); - auto tmp = GatherSimd(source,dimension,shift,0x3,compress,face_idx); + auto tmp = GatherSimd(source,dimension,shift,0x3,compress,face_idx,point); is_same_node = is_same_node && tmp; - splicetime+=usecond(); } else { - nosplicetime-=usecond(); - auto tmp = Gather(source,dimension,shift,0x3,compress,face_idx); + auto tmp = Gather(source,dimension,shift,0x3,compress,face_idx,point); is_same_node = is_same_node && tmp; - nosplicetime+=usecond(); } } else { if(splice_dim){ - splicetime-=usecond(); // if checkerboard is unfavourable take two passes // both with block stride loop iteration - auto tmp1 = GatherSimd(source,dimension,shift,0x1,compress,face_idx); - auto tmp2 = GatherSimd(source,dimension,shift,0x2,compress,face_idx); + auto tmp1 = GatherSimd(source,dimension,shift,0x1,compress,face_idx,point); + auto tmp2 = GatherSimd(source,dimension,shift,0x2,compress,face_idx,point); is_same_node = is_same_node && tmp1 && tmp2; - splicetime+=usecond(); } else { - nosplicetime-=usecond(); - auto tmp1 = Gather(source,dimension,shift,0x1,compress,face_idx); - auto tmp2 = Gather(source,dimension,shift,0x2,compress,face_idx); + auto tmp1 = Gather(source,dimension,shift,0x1,compress,face_idx,point); + auto tmp2 = Gather(source,dimension,shift,0x2,compress,face_idx,point); is_same_node = is_same_node && tmp1 && tmp2; - nosplicetime+=usecond(); } } } @@ -523,13 +528,9 @@ public: template void HaloGather(const Lattice &source,compressor &compress) { - mpi3synctime_g-=usecond(); _grid->StencilBarrier();// Synch shared memory on a single nodes - mpi3synctime_g+=usecond(); - // conformable(source.Grid(),_grid); assert(source.Grid()==_grid); - halogtime-=usecond(); u_comm_offset=0; @@ -542,8 +543,6 @@ public: face_table_computed=1; assert(u_comm_offset==_unified_buffer_size); - accelerator_barrier(); - halogtime+=usecond(); } ///////////////////////// @@ -556,19 +555,86 @@ public: Mergers.resize(0); MergersSHM.resize(0); Packets.resize(0); - calls++; + CopyReceiveBuffers.resize(0); + CachedTransfers.resize(0); + MpiReqs.resize(0); } - void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){ + void AddCopy(void *from,void * to, Integer bytes) + { + CopyReceiveBuffer obj; + obj.from_p = from; + obj.to_p = to; + obj.bytes= bytes; + CopyReceiveBuffers.push_back(obj); + } + void CommsCopy() + { + // These are device resident MPI buffers. + for(int i=0;i &dv) { Decompress d; + d.partial = this->partialDirichlet; + d.dims = _grid->_fdimensions; d.kernel_p = k_p; d.mpi_p = m_p; d.buffer_size = buffer_size; @@ -576,6 +642,8 @@ public: } void AddMerge(cobj *merge_p,std::vector &rpointers,Integer buffer_size,Integer type,std::vector &mv) { Merge m; + m.partial = this->partialDirichlet; + m.dims = _grid->_fdimensions; m.type = type; m.mpointer = merge_p; m.vpointers= rpointers; @@ -583,43 +651,23 @@ public: mv.push_back(m); } template void CommsMerge(decompressor decompress) { + CommsCopy(); CommsMerge(decompress,Mergers,Decompressions); } template void CommsMergeSHM(decompressor decompress) { - mpi3synctime-=usecond(); - accelerator_barrier(); - _grid->StencilBarrier();// Synch shared memory on a single nodes - mpi3synctime+=usecond(); - shmmergetime-=usecond(); - CommsMerge(decompress,MergersSHM,DecompressionsSHM); - shmmergetime+=usecond(); + assert(MergersSHM.size()==0); + assert(DecompressionsSHM.size()==0); } template - void CommsMerge(decompressor decompress,std::vector &mm,std::vector &dd) { - - - mergetime-=usecond(); + void CommsMerge(decompressor decompress,std::vector &mm,std::vector &dd) + { for(int i=0;i_npoints;ii++){ + int dimension = this->_directions[ii]; + int displacement = this->_distances[ii]; + int gd = _grid->_gdimensions[dimension]; + int fd = _grid->_fdimensions[dimension]; + int pd = _grid->_processors [dimension]; + int pc = _grid->_processor_coor[dimension]; + int ld = fd/pd; + /////////////////////////////////////////// + // Figure out dirichlet send and receive + // on this leg of stencil. + /////////////////////////////////////////// + int comm_dim = _grid->_processors[dimension] >1 ; + int block = dirichlet_block[dimension]; + this->_comms_send[ii] = comm_dim; + this->_comms_recv[ii] = comm_dim; + this->_comms_partial_send[ii] = 0; + this->_comms_partial_recv[ii] = 0; + if ( block && comm_dim ) { + assert(abs(displacement) < ld ); + // Quiesce communication across block boundaries + if( displacement > 0 ) { + // High side, low side + // | <--B--->| + // | | | + // noR + // noS + if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_recv[ii] = 0; + if ( ( (ld*pc ) % block ) == 0 ) this->_comms_send[ii] = 0; + } else { + // High side, low side + // | <--B--->| + // | | | + // noS + // noR + if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_send[ii] = 0; + if ( ( (ld*pc ) % block ) == 0 ) this->_comms_recv[ii] = 0; + } + if ( partialDirichlet ) { + this->_comms_partial_send[ii] = !this->_comms_send[ii]; + this->_comms_partial_recv[ii] = !this->_comms_recv[ii]; + } } } } - CartesianStencil(GridBase *grid, int npoints, int checkerboard, const std::vector &directions, const std::vector &distances, - Parameters p) - : shm_bytes_thr(npoints), - comm_bytes_thr(npoints), - comm_enter_thr(npoints), - comm_leave_thr(npoints), - comm_time_thr(npoints) + Parameters p=Parameters()) { face_table_computed=0; _grid = grid; @@ -681,8 +774,18 @@ public: this->_simd_layout = _grid->_simd_layout; // copy simd_layout to give access to Accelerator Kernels this->_directions = StencilVector(directions); this->_distances = StencilVector(distances); + this->_comms_send.resize(npoints); + this->_comms_recv.resize(npoints); this->same_node.resize(npoints); + if ( p.dirichlet.size() ==0 ) p.dirichlet.resize(grid->Nd(),0); + partialDirichlet = p.partialDirichlet; + DirichletBlock(p.dirichlet); // comms send/recv set up + fullDirichlet=0; + for(int d=0;d_gdimensions[dimension]; int fd = _grid->_fdimensions[dimension]; + int pd = _grid->_processors [dimension]; + // int ld = gd/pd; int rd = _grid->_rdimensions[dimension]; + int pc = _grid->_processor_coor[dimension]; this->_permute_type[point]=_grid->PermuteType(dimension); this->_checkerboard = checkerboard; - ////////////////////////// - // the permute type - ////////////////////////// int simd_layout = _grid->_simd_layout[dimension]; int comm_dim = _grid->_processors[dimension] >1 ; int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim); @@ -716,7 +820,6 @@ public: assert ( (rotate_dim && comm_dim) == false) ; // Do not think spread out is supported int sshift[2]; - ////////////////////////// // Underlying approach. For each local site build // up a table containing the npoint "neighbours" and whether they @@ -763,7 +866,6 @@ public: u_simd_recv_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj)); u_simd_send_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj)); } - PrecomputeByteOffsets(); } @@ -817,6 +919,7 @@ public: GridBase *grid=_grid; const int Nsimd = grid->Nsimd(); + int comms_recv = this->_comms_recv[point] || this->_comms_partial_recv[point] ; int fd = _grid->_fdimensions[dimension]; int ld = _grid->_ldimensions[dimension]; int rd = _grid->_rdimensions[dimension]; @@ -842,12 +945,14 @@ public: for(int x=0;xPermuteType(dimension); + int permute_slice; int sx = (x+sshift)%rd; int offnode = 0; if ( simd_layout > 1 ) { + permute_slice=1; for(int i=0;i>(permute_type+1)); @@ -864,6 +969,7 @@ public: } else { int comm_proc = ((x+sshift)/rd)%pd; offnode = (comm_proc!= 0); + permute_slice=0; } int wraparound=0; @@ -873,25 +979,31 @@ public: if ( (shiftpm== 1) && (sx_processor_coor[dimension]==grid->_processors[dimension]-1) ) { wraparound = 1; } - if (!offnode) { - int permute_slice=0; + // Wrap locally dirichlet support case OR node local + if ( offnode==0 ) { + + permute_slice=0; CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); - + } else { + if ( comms_recv ) { + + ScatterPlane(point,dimension,x,cbmask,_unified_buffer_size,wraparound); // permute/extract/merge is done in comms phase + + } else { + + CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); + + } + + } + + if ( offnode ) { int words = buffer_size; if (cbmask != 0x3) words=words>>1; - - // int rank = grid->_processor; - // int recv_from_rank; - // int xmit_to_rank; - - int unified_buffer_offset = _unified_buffer_size; _unified_buffer_size += words; - - ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase - } } } @@ -990,11 +1102,15 @@ public: } template - int Gather(const Lattice &rhs,int dimension,int shift,int cbmask,compressor & compress,int &face_idx) + int Gather(const Lattice &rhs,int dimension,int shift,int cbmask,compressor & compress,int &face_idx, int point) { typedef typename cobj::vector_type vector_type; - typedef typename cobj::scalar_type scalar_type; + int comms_send = this->_comms_send[point]; + int comms_recv = this->_comms_recv[point]; + int comms_partial_send = this->_comms_partial_send[point] ; + int comms_partial_recv = this->_comms_partial_recv[point] ; + assert(rhs.Grid()==_grid); // conformable(_grid,rhs.Grid()); @@ -1017,78 +1133,126 @@ public: int sx = (x+sshift)%rd; int comm_proc = ((x+sshift)/rd)%pd; - + if (comm_proc) { - + int words = buffer_size; if (cbmask != 0x3) words=words>>1; int bytes = words * compress.CommDatumSize(); + int xbytes; + int rbytes; + if ( comms_send ) xbytes = bytes; // Full send + else if ( comms_partial_send ) xbytes = bytes/compressor::PartialCompressionFactor(_grid); + else xbytes = 0; // full dirichlet + + if ( comms_recv ) rbytes = bytes; + else if ( comms_partial_recv ) rbytes = bytes/compressor::PartialCompressionFactor(_grid); + else rbytes = 0; + int so = sx*rhs.Grid()->_ostride[dimension]; // base offset for start of plane - if ( !face_table_computed ) { - face_table.resize(face_idx+1); - std::vector > face_table_host ; - Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host); - face_table[face_idx].resize(face_table_host.size()); - acceleratorCopyToDevice(&face_table_host[0], - &face_table[face_idx][0], - face_table[face_idx].size()*sizeof(face_table_host[0])); - } + int comm_off = u_comm_offset; - // int rank = _grid->_processor; int recv_from_rank; int xmit_to_rank; + cobj *recv_buf; + cobj *send_buf; _grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank); assert (xmit_to_rank != _grid->ThisRank()); assert (recv_from_rank != _grid->ThisRank()); - cobj *recv_buf; - if ( compress.DecompressionStep() ) { + if ( !face_table_computed ) { + face_table.resize(face_idx+1); + std::vector > face_table_host ; + Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host); + // std::cout << "bytes expect "<< bytes << " " << face_table_host.size()* compress.CommDatumSize()<u_recv_buf_p; } - cobj *send_buf; - send_buf = this->u_send_buf_p; // Gather locally, must send - + // potential SHM fast path for intranode + int shm_send=0; + int shm_recv=0; +#ifdef SHM_FAST_PATH + // Put directly in place if we can + send_buf = (cobj *)_grid->ShmBufferTranslate(xmit_to_rank,recv_buf); + if ( (send_buf==NULL) ) { + shm_send=0; + send_buf = this->u_send_buf_p; + } else { + shm_send=1; + } + void *test_ptr = _grid->ShmBufferTranslate(recv_from_rank,recv_buf); + if ( test_ptr != NULL ) shm_recv = 1; + // static int printed; + // if (!printed){ + // std::cout << " GATHER FAST PATH SHM "<u_send_buf_p; // Gather locally, must send assert(send_buf!=NULL); - Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); face_idx++; - gathertime+=usecond(); +#endif - /////////////////////////////////////////////////////////// - // Build a list of things to do after we synchronise GPUs - // Start comms now??? - /////////////////////////////////////////////////////////// - AddPacket((void *)&send_buf[u_comm_offset], - (void *)&recv_buf[u_comm_offset], - xmit_to_rank, - recv_from_rank, - bytes); + // std::cout << " GatherPlaneSimple partial send "<< comms_partial_send<u_recv_buf_p[u_comm_offset], - &recv_buf[u_comm_offset], + int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[comm_off],0,xbytes,rbytes,cbmask); + if ( !duplicate ) { // Force comms for now + + /////////////////////////////////////////////////////////// + // Build a list of things to do after we synchronise GPUs + // Start comms now??? + /////////////////////////////////////////////////////////// + int do_send = (comms_send|comms_partial_send) && (!shm_send ); + int do_recv = (comms_send|comms_partial_send) && (!shm_recv ); + + AddPacket((void *)&send_buf[comm_off], + (void *)&recv_buf[comm_off], + xmit_to_rank, do_send, + recv_from_rank, do_recv, + xbytes,rbytes); + } + + if ( (compress.DecompressionStep() && comms_recv) || comms_partial_recv ) { + AddDecompress(&this->u_recv_buf_p[comm_off], + &recv_buf[comm_off], words,Decompressions); } + u_comm_offset+=words; + face_idx++; } } return 0; } template - int GatherSimd(const Lattice &rhs,int dimension,int shift,int cbmask,compressor &compress,int & face_idx) + int GatherSimd(const Lattice &rhs,int dimension,int shift,int cbmask,compressor &compress,int & face_idx,int point) { const int Nsimd = _grid->Nsimd(); const int maxl =2;// max layout in a direction + + int comms_send = this->_comms_send[point]; + int comms_recv = this->_comms_recv[point]; + int comms_partial_send = this->_comms_partial_send[point] ; + int comms_partial_recv = this->_comms_partial_recv[point] ; + int fd = _grid->_fdimensions[dimension]; int rd = _grid->_rdimensions[dimension]; int ld = _grid->_ldimensions[dimension]; @@ -1103,7 +1267,6 @@ public: int permute_type=_grid->PermuteType(dimension); - // std::cout << "SimdNew permute type "< rpointers(maxl); @@ -1137,8 +1305,9 @@ public: if ( any_offnode ) { + int comm_off = u_comm_offset; for(int i=0;i > face_table_host ; - - Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host); + + Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host); face_table[face_idx].resize(face_table_host.size()); acceleratorCopyToDevice(&face_table_host[0], &face_table[face_idx][0], face_table[face_idx].size()*sizeof(face_table_host[0])); - } - gathermtime-=usecond(); - Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type); + } + + + if ( comms_send ) xbytes = bytes; + else if ( comms_partial_send ) xbytes = bytes/compressor::PartialCompressionFactor(_grid); + else xbytes = 0; + + if ( comms_recv ) rbytes = bytes; + else if ( comms_partial_recv ) rbytes = bytes/compressor::PartialCompressionFactor(_grid); + else rbytes = 0; + + // Gathers SIMD lanes for send and merge + // Different faces can be full comms or partial comms with multiple ranks per node + if ( comms_send || comms_recv||comms_partial_send||comms_partial_recv ) { + + int partial = partialDirichlet; + compressor::Gather_plane_exchange(face_table[face_idx],rhs, + spointers,dimension,sx,cbmask, + compress,permute_type,partial ); + } face_idx++; - gathermtime+=usecond(); - //spointers[0] -- low - //spointers[1] -- high - + //spointers[0] -- low simd coor + //spointers[1] -- high simd coor for(int i=0;iShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); - +#ifdef SHM_FAST_PATH + #warning STENCIL SHM FAST PATH SELECTED + int shm_recv=0; + // shm == receive pointer if offnode + // shm == Translate[send pointer] if on node -- my view of his send pointer + cobj *shm = (cobj *) _grid->ShmBufferTranslate(recv_from_rank,sp); + if (shm==NULL) { + shm = rp; + // we found a packet that comes from MPI and contributes to this shift. + // is_same_node is only used in the WilsonStencil, and gets set for this point in the stencil. + // Kernel will add the exterior_terms except if is_same_node. + // leg of stencil + shm_recv=0; + } else { + shm_recv=1; + } + rpointers[i] = shm; + // Test send side + void *test_ptr = (void *) _grid->ShmBufferTranslate(xmit_to_rank,sp); + if ( test_ptr != NULL ) shm_send = 1; + // static int printed; + // if (!printed){ + // std::cout << " GATHERSIMD FAST PATH SHM "<u_recv_buf_p[u_comm_offset],rpointers,reduced_buffer_size,permute_type,Mergers); + // rpointer may be doing a remote read in the gather over SHM + if ( comms_recv|comms_partial_recv ) { + AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers); + } u_comm_offset +=buffer_size; + } } return 0; } - void ZeroCounters(void) { }; - - void Report(void) { }; - }; NAMESPACE_END(Grid); diff --git a/Grid/tensors/Tensor_SIMT.h b/Grid/tensors/Tensor_SIMT.h index 0a7d3382..8015d74c 100644 --- a/Grid/tensors/Tensor_SIMT.h +++ b/Grid/tensors/Tensor_SIMT.h @@ -31,6 +31,27 @@ Author: Peter Boyle NAMESPACE_BEGIN(Grid); +//////////////////////////////////////////////// +// Inside a GPU thread +//////////////////////////////////////////////// +template +accelerator_inline void exchangeSIMT(vobj &mp0,vobj &mp1,const vobj &vp0,const vobj &vp1,Integer type) +{ + typedef decltype(coalescedRead(mp0)) sobj; + unsigned int Nsimd = vobj::Nsimd(); + unsigned int mask = Nsimd >> (type + 1); + int lane = acceleratorSIMTlane(Nsimd); + int j0 = lane &(~mask); // inner coor zero + int j1 = lane |(mask) ; // inner coor one + const vobj *vpa = &vp0; + const vobj *vpb = &vp1; + const vobj *vp = (lane&mask) ? (vpb) : (vpa); + auto sa = coalescedRead(vp[0],j0); + auto sb = coalescedRead(vp[0],j1); + coalescedWrite(mp0,sa); + coalescedWrite(mp1,sb); +} + #ifndef GRID_SIMT ////////////////////////////////////////// diff --git a/Grid/tensors/Tensor_class.h b/Grid/tensors/Tensor_class.h index be045ede..f3114cb5 100644 --- a/Grid/tensors/Tensor_class.h +++ b/Grid/tensors/Tensor_class.h @@ -178,6 +178,7 @@ public: stream << "S {" << o._internal << "}"; return stream; }; + // FIXME These will break with change of data layout strong_inline const scalar_type * begin() const { return reinterpret_cast(&_internal); } strong_inline scalar_type * begin() { return reinterpret_cast< scalar_type *>(&_internal); } strong_inline const scalar_type * end() const { return begin() + Traits::count; } @@ -288,6 +289,7 @@ public: // return _internal[i]; // } + // FIXME These will break with change of data layout strong_inline const scalar_type * begin() const { return reinterpret_cast(_internal); } strong_inline scalar_type * begin() { return reinterpret_cast< scalar_type *>(_internal); } strong_inline const scalar_type * end() const { return begin() + Traits::count; } @@ -430,6 +432,7 @@ public: // return _internal[i][j]; // } + // FIXME These will break with change of data layout strong_inline const scalar_type * begin() const { return reinterpret_cast(_internal[0]); } strong_inline scalar_type * begin() { return reinterpret_cast< scalar_type *>(_internal[0]); } strong_inline const scalar_type * end() const { return begin() + Traits::count; } diff --git a/Grid/tensors/Tensor_exp.h b/Grid/tensors/Tensor_exp.h index 1f637d5f..e35467d4 100644 --- a/Grid/tensors/Tensor_exp.h +++ b/Grid/tensors/Tensor_exp.h @@ -55,7 +55,7 @@ template accelerator_inline iVector Exponentiate(c // Specialisation: Cayley-Hamilton exponential for SU(3) -#ifndef GRID_CUDA +#ifndef GRID_ACCELERATED template::TensorLevel == 0>::type * =nullptr> accelerator_inline iMatrix Exponentiate(const iMatrix &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP ) { diff --git a/Grid/tensors/Tensor_extract_merge.h b/Grid/tensors/Tensor_extract_merge.h index ea619d0f..a92a02e3 100644 --- a/Grid/tensors/Tensor_extract_merge.h +++ b/Grid/tensors/Tensor_extract_merge.h @@ -1,5 +1,5 @@ /************************************************************************************* -n + Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/tensors/Tensor_extract_merge.h @@ -62,8 +62,18 @@ void extract(const vobj &vec,ExtractBuffer &extracted) const int words=sizeof(vobj)/sizeof(vector_type); const int Nsimd=vector_type::Nsimd(); const int Nextr=extracted.size(); + vector_type * vp = (vector_type *)&vec; const int s=Nsimd/Nextr; sobj_scalar_type *sp = (sobj_scalar_type *) &extracted[0]; + sobj_scalar_type stmp; + for(int w=0;w &extracted) memcpy((char *)&sp[i*words+w],(char *)&stmp,sizeof(stmp)); } } + */ + return; } @@ -93,7 +105,7 @@ void merge(vobj &vec,ExtractBuffer &extracted) const int s=Nsimd/Nextr; sobj_scalar_type *sp = (sobj_scalar_type *)&extracted[0]; - scalar_type *vp = (scalar_type *)&vec; + vector_type *vp = (vector_type *)&vec; scalar_type vtmp; sobj_scalar_type stmp; for(int w=0;w &extracted) for(int ii=0;ii::extract_type extract_type; - typedef extract_type * pointer; + typedef scalar_type * pointer; constexpr int words=sizeof(vobj)/sizeof(vector_type); - constexpr int Nsimd=vector_type::Nsimd(); scalar_object extracted; pointer __restrict__ sp = (pointer)&extracted; // Type pun - pointer __restrict__ vp = (pointer)&vec; + vector_type *vp = (vector_type *)&vec; for(int w=0;w::extract_type extract_type; - typedef extract_type * pointer; + typedef scalar_type * pointer; constexpr int words=sizeof(vobj)/sizeof(vector_type); - constexpr int Nsimd=vector_type::Nsimd(); pointer __restrict__ sp = (pointer)&extracted; - pointer __restrict__ vp = (pointer)&vec; + vector_type *vp = (vector_type *)&vec; for(int w=0;w &extracted, int off const int Nextr=extracted.size(); const int s = Nsimd/Nextr; - scalar_type * vp = (scalar_type *)&vec; - scalar_type vtmp; - sobj_scalar_type stmp; + vector_type * vp = (vector_type *)&vec; for(int w=0;w &extracted, int offset) const int Nextr=extracted.size(); const int s = Nsimd/Nextr; - scalar_type * vp = (scalar_type *)&vec; + vector_type * vp = (vector_type *)&vec; scalar_type vtmp; - sobj_scalar_type stmp; for(int w=0;w +accelerator_inline +void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in) +{ + static_assert( std::is_same::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same + + typedef typename vobjOut::vector_type ovector_type; + typedef typename vobjIn::vector_type ivector_type; + constexpr int owords=sizeof(vobjOut)/sizeof(ovector_type); + constexpr int iwords=sizeof(vobjIn)/sizeof(ivector_type); + static_assert( owords == iwords, "copyLane: Expected number of vector words in input and output objects to be equal" ); + + typedef typename vobjOut::scalar_type oscalar_type; + typedef typename vobjIn::scalar_type iscalar_type; + typedef typename ExtractTypeMap::extract_type oextract_type; + typedef typename ExtractTypeMap::extract_type iextract_type; + + typedef oextract_type * opointer; + typedef iextract_type * ipointer; + + iscalar_type itmp; + oscalar_type otmp; + + ovector_type * __restrict__ op = (ovector_type *)&vecOut; + ivector_type * __restrict__ ip = (ivector_type *)&vecIn; + for(int w=0;w struct isGridScalar : public std::false_type { static constexpr bool notvalue = true; }; template struct isGridScalar> : public std::true_type { static constexpr bool notvalue = false; }; - // Store double-precision data in single-precision grids for precision promoted localInnerProductD - template - class TypePair { - public: - T _internal[2]; - accelerator TypePair& operator=(const Grid::Zero& o) { - _internal[0] = Zero(); - _internal[1] = Zero(); - return *this; - } - - accelerator TypePair operator+(const TypePair& o) const { - TypePair r; - r._internal[0] = _internal[0] + o._internal[0]; - r._internal[1] = _internal[1] + o._internal[1]; - return r; - } - - accelerator TypePair& operator+=(const TypePair& o) { - _internal[0] += o._internal[0]; - _internal[1] += o._internal[1]; - return *this; - } - - friend accelerator_inline void add(TypePair* ret, const TypePair* a, const TypePair* b) { - add(&ret->_internal[0],&a->_internal[0],&b->_internal[0]); - add(&ret->_internal[1],&a->_internal[1],&b->_internal[1]); - } - }; - typedef TypePair ComplexD2; - typedef TypePair RealD2; - typedef TypePair vComplexD2; - typedef TypePair vRealD2; // Traits to identify fundamental data types template struct isGridFundamental : public std::false_type { static constexpr bool notvalue = true; }; @@ -88,8 +55,6 @@ NAMESPACE_BEGIN(Grid); template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; - template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; - template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; ////////////////////////////////////////////////////////////////////////////////// @@ -136,7 +101,7 @@ NAMESPACE_BEGIN(Grid); typedef ComplexF Complexified; typedef RealF Realified; typedef RealD DoublePrecision; - typedef RealD2 DoublePrecision2; + typedef RealD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef RealD scalar_type; @@ -151,19 +116,6 @@ NAMESPACE_BEGIN(Grid); typedef RealD DoublePrecision; typedef RealD DoublePrecision2; }; - template<> struct GridTypeMapper : public GridTypeMapper_Base { - typedef RealD2 scalar_type; - typedef RealD2 scalar_typeD; - typedef RealD2 vector_type; - typedef RealD2 vector_typeD; - typedef RealD2 tensor_reduced; - typedef RealD2 scalar_object; - typedef RealD2 scalar_objectD; - typedef ComplexD2 Complexified; - typedef RealD2 Realified; - typedef RealD2 DoublePrecision; - typedef RealD2 DoublePrecision2; - }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexF scalar_type; typedef ComplexD scalar_typeD; @@ -175,7 +127,7 @@ NAMESPACE_BEGIN(Grid); typedef ComplexF Complexified; typedef RealF Realified; typedef ComplexD DoublePrecision; - typedef ComplexD2 DoublePrecision2; + typedef ComplexD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexD scalar_type; @@ -191,7 +143,7 @@ NAMESPACE_BEGIN(Grid); typedef ComplexD DoublePrecision2; }; -#ifdef GRID_CUDA +#if defined(GRID_CUDA) || defined(GRID_HIP) template<> struct GridTypeMapper > : public GridTypeMapper_Base { typedef std::complex scalar_type; typedef std::complex scalar_typeD; @@ -220,19 +172,6 @@ NAMESPACE_BEGIN(Grid); }; #endif - template<> struct GridTypeMapper : public GridTypeMapper_Base { - typedef ComplexD2 scalar_type; - typedef ComplexD2 scalar_typeD; - typedef ComplexD2 vector_type; - typedef ComplexD2 vector_typeD; - typedef ComplexD2 tensor_reduced; - typedef ComplexD2 scalar_object; - typedef ComplexD2 scalar_objectD; - typedef ComplexD2 Complexified; - typedef RealD2 Realified; - typedef ComplexD2 DoublePrecision; - typedef ComplexD2 DoublePrecision2; - }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef Integer scalar_type; typedef Integer scalar_typeD; @@ -274,13 +213,13 @@ NAMESPACE_BEGIN(Grid); typedef vRealD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { - typedef RealD2 scalar_type; - typedef RealD2 scalar_typeD; + typedef RealD scalar_type; + typedef RealD scalar_typeD; typedef vRealD2 vector_type; typedef vRealD2 vector_typeD; typedef vRealD2 tensor_reduced; - typedef RealD2 scalar_object; - typedef RealD2 scalar_objectD; + typedef RealD scalar_object; + typedef RealD scalar_objectD; typedef vComplexD2 Complexified; typedef vRealD2 Realified; typedef vRealD2 DoublePrecision; @@ -341,13 +280,13 @@ NAMESPACE_BEGIN(Grid); typedef vComplexD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { - typedef ComplexD2 scalar_type; - typedef ComplexD2 scalar_typeD; + typedef ComplexD scalar_type; + typedef ComplexD scalar_typeD; typedef vComplexD2 vector_type; typedef vComplexD2 vector_typeD; typedef vComplexD2 tensor_reduced; - typedef ComplexD2 scalar_object; - typedef ComplexD2 scalar_objectD; + typedef ComplexD scalar_object; + typedef ComplexD scalar_objectD; typedef vComplexD2 Complexified; typedef vRealD2 Realified; typedef vComplexD2 DoublePrecision; diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index 163e4ac4..70f469b0 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -1,14 +1,23 @@ #include NAMESPACE_BEGIN(Grid); +int world_rank; // Use to control world rank for print guarding int acceleratorAbortOnGpuError=1; uint32_t accelerator_threads=2; uint32_t acceleratorThreads(void) {return accelerator_threads;}; void acceleratorThreads(uint32_t t) {accelerator_threads = t;}; +#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK" +#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK" +#define ENV_LOCAL_RANK_SLURM "SLURM_LOCALID" +#define ENV_RANK_SLURM "SLURM_PROCID" +#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" +#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" + #ifdef GRID_CUDA cudaDeviceProp *gpu_props; cudaStream_t copyStream; +cudaStream_t computeStream; void acceleratorInit(void) { int nDevices = 1; @@ -16,13 +25,8 @@ void acceleratorInit(void) gpu_props = new cudaDeviceProp[nDevices]; char * localRankStr = NULL; - int rank = 0, world_rank=0; -#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK" -#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK" -#define ENV_LOCAL_RANK_SLURM "SLURM_LOCALID" -#define ENV_RANK_SLURM "SLURM_PROCID" -#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" -#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" + int rank = 0; + world_rank=0; if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);} @@ -97,6 +101,7 @@ void acceleratorInit(void) cudaSetDevice(device); cudaStreamCreate(©Stream); + cudaStreamCreate(&computeStream); const int len=64; char busid[len]; if( rank == world_rank ) { @@ -111,6 +116,7 @@ void acceleratorInit(void) #ifdef GRID_HIP hipDeviceProp_t *gpu_props; hipStream_t copyStream; +hipStream_t computeStream; void acceleratorInit(void) { int nDevices = 1; @@ -118,11 +124,8 @@ void acceleratorInit(void) gpu_props = new hipDeviceProp_t[nDevices]; char * localRankStr = NULL; - int rank = 0, world_rank=0; -#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK" -#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" -#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK" -#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" + int rank = 0; + world_rank=0; // We extract the local rank initialization using an environment variable if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) { @@ -134,8 +137,10 @@ void acceleratorInit(void) } if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} + if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);} - printf("world_rank %d has %d devices\n",world_rank,nDevices); + if ( world_rank == 0 ) + printf("world_rank %d has %d devices\n",world_rank,nDevices); size_t totalDeviceMem=0; for (int i = 0; i < nDevices; i++) { @@ -181,6 +186,7 @@ void acceleratorInit(void) #endif hipSetDevice(device); hipStreamCreate(©Stream); + hipStreamCreate(&computeStream); const int len=64; char busid[len]; if( rank == world_rank ) { @@ -210,11 +216,9 @@ void acceleratorInit(void) #endif char * localRankStr = NULL; - int rank = 0, world_rank=0; -#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK" -#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" -#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK" -#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" + int rank = 0; + world_rank=0; + // We extract the local rank initialization using an environment variable if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) { diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index e17e85d1..2aeb9fa7 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -107,6 +107,7 @@ void acceleratorInit(void); extern int acceleratorAbortOnGpuError; extern cudaStream_t copyStream; +extern cudaStream_t computeStream; accelerator_inline int acceleratorSIMTlane(int Nsimd) { #ifdef GRID_SIMT @@ -134,7 +135,7 @@ inline void cuda_mem(void) }; \ dim3 cu_threads(nsimd,acceleratorThreads(),1); \ dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \ - LambdaApply<<>>(num1,num2,nsimd,lambda); \ + LambdaApply<<>>(num1,num2,nsimd,lambda); \ } #define accelerator_for6dNB(iter1, num1, \ @@ -153,7 +154,7 @@ inline void cuda_mem(void) }; \ dim3 cu_blocks (num1,num2,num3); \ dim3 cu_threads(num4,num5,num6); \ - Lambda6Apply<<>>(num1,num2,num3,num4,num5,num6,lambda); \ + Lambda6Apply<<>>(num1,num2,num3,num4,num5,num6,lambda); \ } template __global__ @@ -189,7 +190,7 @@ void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3, #define accelerator_barrier(dummy) \ { \ - cudaDeviceSynchronize(); \ + cudaStreamSynchronize(computeStream); \ cudaError err = cudaGetLastError(); \ if ( cudaSuccess != err ) { \ printf("accelerator_barrier(): Cuda error %s \n", \ @@ -247,17 +248,23 @@ inline int acceleratorIsCommunicable(void *ptr) ////////////////////////////////////////////// // SyCL acceleration ////////////////////////////////////////////// -#ifdef GRID_SYCL -NAMESPACE_END(Grid); -#include -#include +#ifdef GRID_SYCL #define GRID_SYCL_LEVEL_ZERO_IPC -#ifdef GRID_SYCL_LEVEL_ZERO_IPC +NAMESPACE_END(Grid); +#if 0 +#include +#include #include #include +#else +#include +#include +#include +#include #endif + NAMESPACE_BEGIN(Grid); extern cl::sycl::queue *theGridAccelerator; @@ -339,6 +346,7 @@ NAMESPACE_BEGIN(Grid); #define accelerator_inline __host__ __device__ inline extern hipStream_t copyStream; +extern hipStream_t computeStream; /*These routines define mapping from thread grid to loop & vector lane indexing */ accelerator_inline int acceleratorSIMTlane(int Nsimd) { #ifdef GRID_SIMT @@ -360,16 +368,15 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \ if(hip_threads.x * hip_threads.y * hip_threads.z <= 64){ \ hipLaunchKernelGGL(LambdaApply64,hip_blocks,hip_threads, \ - 0,0, \ - num1,num2,nsimd, lambda); \ + 0,computeStream, \ + num1,num2,nsimd, lambda); \ } else { \ hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \ - 0,0, \ - num1,num2,nsimd, lambda); \ + 0,computeStream, \ + num1,num2,nsimd, lambda); \ } \ } - template __global__ __launch_bounds__(64,1) void LambdaApply64(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda) @@ -398,7 +405,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda) #define accelerator_barrier(dummy) \ { \ - hipDeviceSynchronize(); \ + hipStreamSynchronize(computeStream); \ auto err = hipGetLastError(); \ if ( err != hipSuccess ) { \ printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \ @@ -441,7 +448,7 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch { - hipMemcpyAsync(to,from,bytes, hipMemcpyDeviceToDevice,copyStream); + hipMemcpyDtoDAsync(to,from,bytes, copyStream); } inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); }; @@ -451,7 +458,8 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); // Common on all GPU targets ////////////////////////////////////////////// #if defined(GRID_SYCL) || defined(GRID_CUDA) || defined(GRID_HIP) -#define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} ); +// FIXME -- the non-blocking nature got broken March 30 2023 by PAB +#define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} ); #define accelerator_for( iter, num, nsimd, ... ) \ accelerator_forNB(iter, num, nsimd, { __VA_ARGS__ } ); \ @@ -461,6 +469,8 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); accelerator_for2dNB(iter1, num1, iter2, num2, nsimd, { __VA_ARGS__ } ); \ accelerator_barrier(dummy); +#define GRID_ACCELERATED + #endif ////////////////////////////////////////////// @@ -516,7 +526,7 @@ inline void acceleratorFreeCpu (void *ptr){free(ptr);}; ////////////////////////////////////////////// #ifdef GRID_SYCL -inline void acceleratorFenceComputeStream(void){ accelerator_barrier();}; +inline void acceleratorFenceComputeStream(void){ theGridAccelerator->ext_oneapi_submit_barrier(); }; #else // Ordering within a stream guaranteed on Nvidia & AMD inline void acceleratorFenceComputeStream(void){ }; diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index 36854d9c..d013763a 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -167,14 +167,13 @@ void GridCmdOptionInt(std::string &str,int & val) return; } -void GridCmdOptionFloat(std::string &str,float & val) +void GridCmdOptionFloat(std::string &str,double & val) { std::stringstream ss(str); ss>>val; return; } - void GridParseLayout(char **argv,int argc, Coordinate &latt_c, Coordinate &mpi_c) @@ -356,6 +355,11 @@ void Grid_init(int *argc,char ***argv) ////////////////////////////////////////////////////////// CartesianCommunicator::Init(argc,argv); + GridLogger::GlobalStopWatch.Stop(); + CartesianCommunicator::BarrierWorld(); + GridLogger::GlobalStopWatch.Reset();// Back to zero with synchronised clock + GridLogger::GlobalStopWatch.Start(); + //////////////////////////////////// // Banner after MPI (unless GPU) //////////////////////////////////// diff --git a/Grid/util/Init.h b/Grid/util/Init.h index 585660a1..bdf0bcac 100644 --- a/Grid/util/Init.h +++ b/Grid/util/Init.h @@ -57,7 +57,7 @@ void GridCmdOptionCSL(std::string str,std::vector & vec); template void GridCmdOptionIntVector(const std::string &str,VectorInt & vec); void GridCmdOptionInt(std::string &str,int & val); -void GridCmdOptionFloat(std::string &str,float & val); +void GridCmdOptionFloat(std::string &str,double & val); void GridParseLayout(char **argv,int argc, diff --git a/HMC/Mobius2p1f.cc b/HMC/Mobius2p1f.cc index 5f82e0e7..4ab1f20f 100644 --- a/HMC/Mobius2p1f.cc +++ b/HMC/Mobius2p1f.cc @@ -39,7 +39,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef WilsonImplR FermionImplPolicy; - typedef MobiusFermionR FermionAction; + typedef MobiusFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; typedef Grid::XmlReader Serialiser; @@ -133,8 +133,8 @@ int main(int argc, char **argv) { //////////////////////////////////// // FermionAction StrangeOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_mass,M5,b,c, Params); - // DomainWallEOFAFermionR Strange_Op_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5); - // DomainWallEOFAFermionR Strange_Op_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5); + // DomainWallEOFAFermionD Strange_Op_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5); + // DomainWallEOFAFermionD Strange_Op_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5); // ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L,Strange_Op_R,CG,ofp, false); FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); diff --git a/HMC/Mobius2p1fEOFA.cc b/HMC/Mobius2p1fEOFA.cc index b1294da5..c961cbc9 100644 --- a/HMC/Mobius2p1fEOFA.cc +++ b/HMC/Mobius2p1fEOFA.cc @@ -175,9 +175,9 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef WilsonImplR FermionImplPolicy; - typedef MobiusFermionR FermionAction; + typedef MobiusFermionD FermionAction; typedef MobiusFermionF FermionActionF; - typedef MobiusEOFAFermionR FermionEOFAAction; + typedef MobiusEOFAFermionD FermionEOFAAction; typedef MobiusEOFAFermionF FermionEOFAActionF; typedef typename FermionAction::FermionField FermionField; typedef typename FermionActionF::FermionField FermionFieldF; @@ -293,9 +293,9 @@ int main(int argc, char **argv) { OFRp.precision= 50; - MobiusEOFAFermionR Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); - MobiusEOFAFermionR Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); ConjugateGradient ActionCG(ActionStoppingCondition,MaxCGIterations); diff --git a/HMC/Mobius2p1fEOFA_F1.cc b/HMC/Mobius2p1fEOFA_F1.cc index 3f0a7bf6..f910d69e 100644 --- a/HMC/Mobius2p1fEOFA_F1.cc +++ b/HMC/Mobius2p1fEOFA_F1.cc @@ -159,9 +159,9 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef WilsonImplR FermionImplPolicy; - typedef MobiusFermionR FermionAction; + typedef MobiusFermionD FermionAction; typedef MobiusFermionF FermionActionF; - typedef MobiusEOFAFermionR FermionEOFAAction; + typedef MobiusEOFAFermionD FermionEOFAAction; typedef MobiusEOFAFermionF FermionEOFAActionF; typedef typename FermionAction::FermionField FermionField; typedef typename FermionActionF::FermionField FermionFieldF; @@ -281,9 +281,9 @@ int main(int argc, char **argv) { OFRp.precision= 50; - MobiusEOFAFermionR Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); - MobiusEOFAFermionR Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); ConjugateGradient ActionCG(ActionStoppingCondition,MaxCGIterations); diff --git a/HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc b/HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc new file mode 100644 index 00000000..35ec2246 --- /dev/null +++ b/HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc @@ -0,0 +1,920 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./HMC/Mobius2p1fIDSDRGparityEOFA.cc + +Copyright (C) 2015-2016 + +Author: Christopher Kelly +Author: Peter Boyle + + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace Grid; + +//Production binary for the 40ID G-parity ensemble + +struct RatQuoParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters, + double, bnd_lo, + double, bnd_hi, + Integer, action_degree, + double, action_tolerance, + Integer, md_degree, + double, md_tolerance, + Integer, reliable_update_freq, + Integer, bnd_check_freq); + RatQuoParameters() { + bnd_lo = 1e-2; + bnd_hi = 30; + action_degree = 10; + action_tolerance = 1e-10; + md_degree = 10; + md_tolerance = 1e-8; + bnd_check_freq = 20; + reliable_update_freq = 50; + } + + void Export(RationalActionParams &into) const{ + into.lo = bnd_lo; + into.hi = bnd_hi; + into.action_degree = action_degree; + into.action_tolerance = action_tolerance; + into.md_degree = md_degree; + into.md_tolerance = md_tolerance; + into.BoundsCheckFreq = bnd_check_freq; + } +}; + +struct EOFAparameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(EOFAparameters, + OneFlavourRationalParams, rat_params, + double, action_tolerance, + double, action_mixcg_inner_tolerance, + double, md_tolerance, + double, md_mixcg_inner_tolerance); + + EOFAparameters() { + action_mixcg_inner_tolerance = 1e-8; + action_tolerance = 1e-10; + md_tolerance = 1e-8; + md_mixcg_inner_tolerance = 1e-8; + + rat_params.lo = 1.0; + rat_params.hi = 25.0; + rat_params.MaxIter = 50000; + rat_params.tolerance= 1.0e-9; + rat_params.degree = 14; + rat_params.precision= 50; + } +}; + +struct EvolParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters, + Integer, StartTrajectory, + Integer, Trajectories, + Integer, SaveInterval, + Integer, Steps, + RealD, TrajectoryLength, + bool, MetropolisTest, + std::string, StartingType, + std::vector, GparityDirs, + std::vector, eofa_l, + RatQuoParameters, rat_quo_s, + RatQuoParameters, rat_quo_DSDR); + + EvolParameters() { + //For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart + MetropolisTest = false; + StartTrajectory = 0; + Trajectories = 50; + SaveInterval = 5; + StartingType = "ColdStart"; + GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic + Steps = 5; + TrajectoryLength = 1.0; + } +}; + +bool fileExists(const std::string &fn){ + std::ifstream f(fn); + return f.good(); +} + + + + +struct LanczosParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters, + double, alpha, + double, beta, + double, mu, + int, ord, + int, n_stop, + int, n_want, + int, n_use, + double, tolerance); + + LanczosParameters() { + alpha = 35; + beta = 5; + mu = 0; + ord = 100; + n_stop = 10; + n_want = 10; + n_use = 15; + tolerance = 1e-6; + } +}; + + + +template +void computeEigenvalues(std::string param_file, + GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something + FermionActionD &action, GridParallelRNG &rng){ + + LanczosParameters params; + if(fileExists(param_file)){ + std::cout << GridLogMessage << " Reading " << param_file << std::endl; + Grid::XmlReader rd(param_file); + read(rd, "LanczosParameters", params); + }else if(!GlobalSharedMemory::WorldRank){ + std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl; + std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl; + Grid::XmlWriter wr(param_file + ".templ"); + write(wr, "LanczosParameters", params); + } + + FermionFieldD gauss_o(rbGrid); + FermionFieldD gauss(Grid); + gaussian(rng, gauss); + pickCheckerboard(Odd, gauss_o, gauss); + + action.ImportGauge(latt); + + SchurDiagMooeeOperator hermop(action); + PlainHermOp hermop_wrap(hermop); + //ChebyshevLanczos Cheb(params.alpha, params.beta, params.mu, params.ord); + assert(params.mu == 0.0); + + Chebyshev Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1); + FunctionHermOp Cheb_wrap(Cheb, hermop); + + std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl; + ImplicitlyRestartedLanczos IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 50000); + + std::vector eval(params.n_use); + std::vector evec(params.n_use, rbGrid); + int Nconv; + IRL.calc(eval, evec, gauss_o, Nconv); + + std::cout << "Eigenvalues:" << std::endl; + for(int i=0;i +void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something + FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng, + int inv_pow, const std::string &quark_descr, int action_or_md){ + assert(action_or_md == 0 || action_or_md == 1 || action_or_md == 2); + + FermionFieldD gauss_o(rbGrid); + FermionFieldD gauss(Grid); + gaussian(rng, gauss); + pickCheckerboard(Odd, gauss_o, gauss); + + numOp.ImportGauge(latt); + denOp.ImportGauge(latt); + + typedef typename FermionActionD::Impl_t FermionImplPolicyD; + SchurDifferentiableOperator MdagM(numOp); + SchurDifferentiableOperator VdagV(denOp); + + PowerMethod power_method; + RealD lambda_max; + + std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " numerator" << std::endl; + + lambda_max = power_method(MdagM,gauss_o); + std::cout << GridLogMessage << "Got lambda_max "< +void checkEOFA(ExactOneFlavourRatioPseudoFermionAction &EOFA, + GridCartesian* FGrid, GridParallelRNG &rng, const LatticeGaugeFieldD &latt){ + std::cout << GridLogMessage << "Starting EOFA action/bounds check" << std::endl; + typename FermionImplPolicy::FermionField eta(FGrid); + RealD scale = std::sqrt(0.5); + gaussian(rng,eta); eta = eta * scale; + + //Use the inbuilt check + EOFA.refresh(latt, eta); + EOFA.S(latt); + std::cout << GridLogMessage << "Finished EOFA upper action/bounds check" << std::endl; +} + + +template +class EOFAlinop: public LinearOperatorBase{ + ExactOneFlavourRatioPseudoFermionAction &EOFA; + LatticeGaugeFieldD &U; +public: + EOFAlinop(ExactOneFlavourRatioPseudoFermionAction &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){} + + typedef typename FermionImplPolicy::FermionField Field; + void OpDiag (const Field &in, Field &out){ assert(0); } + void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); } + void OpDirAll (const Field &in, std::vector &out){ assert(0); } + + void Op (const Field &in, Field &out){ assert(0); } + void AdjOp (const Field &in, Field &out){ assert(0); } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); } + void HermOp(const Field &in, Field &out){ EOFA.Meofa(U, in, out); } +}; + +template +void upperBoundEOFA(ExactOneFlavourRatioPseudoFermionAction &EOFA, + GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){ + std::cout << GridLogMessage << "Starting EOFA upper bound compute" << std::endl; + EOFAlinop linop(EOFA, latt); + typename FermionImplPolicy::FermionField eta(FGrid); + gaussian(rng,eta); + PowerMethod power_method; + auto lambda_max = power_method(linop,eta); + std::cout << GridLogMessage << "Upper bound of EOFA operator " << lambda_max << std::endl; +} + +//Applications of M^{-1} cost the same as M for EOFA! +template +class EOFAinvLinop: public LinearOperatorBase{ + ExactOneFlavourRatioPseudoFermionAction &EOFA; + LatticeGaugeFieldD &U; +public: + EOFAinvLinop(ExactOneFlavourRatioPseudoFermionAction &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){} + + typedef typename FermionImplPolicy::FermionField Field; + void OpDiag (const Field &in, Field &out){ assert(0); } + void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); } + void OpDirAll (const Field &in, std::vector &out){ assert(0); } + + void Op (const Field &in, Field &out){ assert(0); } + void AdjOp (const Field &in, Field &out){ assert(0); } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); } + void HermOp(const Field &in, Field &out){ EOFA.MeofaInv(U, in, out); } +}; + +template +void lowerBoundEOFA(ExactOneFlavourRatioPseudoFermionAction &EOFA, + GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){ + std::cout << GridLogMessage << "Starting EOFA lower bound compute using power method on M^{-1}. Inverse of highest eigenvalue is the lowest eigenvalue of M" << std::endl; + EOFAinvLinop linop(EOFA, latt); + typename FermionImplPolicy::FermionField eta(FGrid); + gaussian(rng,eta); + PowerMethod power_method; + auto lambda_max = power_method(linop,eta); + std::cout << GridLogMessage << "Lower bound of EOFA operator " << 1./lambda_max << std::endl; +} + + +NAMESPACE_BEGIN(Grid); + + template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.) + { + }; + + void operator()(LinearOperatorBase &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); + assert(&(SchurOpU->_Mat)==&(LinOpD._Mat)); + + precisionChange(FermOpF.Umu, FermOpD.Umu); + + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// + MixedPrecisionConjugateGradient MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); + MPCG.InnerTolerance = InnerTolerance; + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < + class MixedPrecisionReliableUpdateConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + Integer MaxIterations; + + RealD Delta; //reliable update parameter + + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + MixedPrecisionReliableUpdateConjugateGradientOperatorFunction(RealD tol, + RealD delta, + Integer maxit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + Delta(delta), + MaxIterations(maxit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5) + { + }; + + void operator()(LinearOperatorBase &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision reliable CG update wrapper operator() "<(&LinOpU); + assert(&(SchurOpU->_Mat)==&(LinOpD._Mat)); + + precisionChange(FermOpF.Umu, FermOpD.Umu); + + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// + + ConjugateGradientReliableUpdate MPCG(Tolerance,MaxIterations,Delta,SinglePrecGrid5,LinOpF,LinOpD); + std::cout << GridLogMessage << "Calling mixed precision reliable update Conjugate Gradient" < tmp; + GridCmdOptionIntVector(argv[i+1],tmp); + { + std::stringstream ss; + for(int j=0;j MixedPrecRHMC; + typedef GeneralEvenOddRatioRationalPseudoFermionAction DoublePrecRHMC; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + IntegratorParameters MD; + typedef ConjugateHMCRunnerD HMCWrapper; //NB: This is the "Omelyan integrator" + MD.name = std::string("MinimumNorm2"); + + // typedef ConjugateHMCRunnerD HMCWrapper; + // MD.name = std::string("ForceGradient"); + + MD.MDsteps = user_params.Steps; + MD.trajL = user_params.TrajectoryLength; + + typedef HMCWrapper::ImplPolicy GaugeImplPolicy; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = user_params.StartTrajectory; + HMCparams.Trajectories = user_params.Trajectories; + HMCparams.NoMetropolisUntil= 0; + HMCparams.StartingType = user_params.StartingType; + HMCparams.MetropolisTest = user_params.MetropolisTest; + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_lat"; + CPparams.rng_prefix = "ckpoint_rng"; + CPparams.saveInterval = user_params.SaveInterval; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + //Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = serial_seeds; + RNGpar.parallel_seeds = parallel_seeds; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + //aiming for ainv=1.723 GeV + // me bob + //Estimated a(ml+mres) [40ID] = 0.001305 0.00131 + // a(mh+mres) [40ID] = 0.035910 0.03529 + //Estimate Ls=12, b+c=2 mres~0.0011 + + //1/24/2022 initial mres measurement gives mres=0.001, adjusted light quark mass to 0.0003 from 0.0001 + + const int Ls = 12; + Real beta = 1.848; + Real light_mass = 0.0003; + Real strange_mass = 0.0342; + Real pv_mass = 1.0; + RealD M5 = 1.8; + RealD mobius_scale = 2.; //b+c + + RealD mob_bmc = 1.0; + RealD mob_b = (mobius_scale + mob_bmc)/2.; + RealD mob_c = (mobius_scale - mob_bmc)/2.; + + std::cout << GridLogMessage + << "Ensemble parameters:" << std::endl + << "Ls=" << Ls << std::endl + << "beta=" << beta << std::endl + << "light_mass=" << light_mass << std::endl + << "strange_mass=" << strange_mass << std::endl + << "mobius_scale=" << mobius_scale << std::endl; + + //Setup the Grids + auto UGridD = TheHMC.Resources.GetCartesian(); + auto UrbGridD = TheHMC.Resources.GetRBCartesian(); + auto FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD); + auto FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD); + + GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF); + auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF); + + ConjugateIwasakiGaugeActionD GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeFieldD Ud(UGridD); + LatticeGaugeFieldF Uf(UGridF); + + //Setup the BCs + FermionActionD::ImplParams Params; + for(int i=0;i dirs4(Nd); + for(int i=0;i Level1(1); //light quark + strange quark + ActionLevel Level2(4); //DSDR + ActionLevel Level3(2); //gauge + + + ///////////////////////////////////////////////////////////// + // Light EOFA action + // have to be careful with the parameters, cf. Test_dwf_gpforce_eofa.cc + ///////////////////////////////////////////////////////////// + typedef SchurDiagMooeeOperator EOFAschuropD; + typedef SchurDiagMooeeOperator EOFAschuropF; + typedef ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction EOFAmixPrecPFaction; + typedef MixedPrecisionConjugateGradientOperatorFunction EOFA_mxCG; + typedef MixedPrecisionReliableUpdateConjugateGradientOperatorFunction EOFA_relupCG; + + + std::vector eofa_light_masses = { light_mass , 0.004, 0.016, 0.064, 0.256 }; + std::vector eofa_pv_masses = { 0.004 , 0.016, 0.064, 0.256, 1.0 }; + int n_light_hsb = 5; + assert(user_params.eofa_l.size() == n_light_hsb); + + EOFAmixPrecPFaction* EOFA_pfactions[n_light_hsb]; + + for(int i=0;iInnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance; + + EOFA_mxCG* ActionMCG_R = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 50000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D); + ActionMCG_R->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance; + + EOFA_mxCG* DerivMCG_L = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 50000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D); + DerivMCG_L->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance; + + EOFA_mxCG* DerivMCG_R = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 50000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D); + DerivMCG_R->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance; + + std::cout << GridLogMessage << "Set EOFA action solver action tolerance outer=" << ActionMCG_L->Tolerance << " inner=" << ActionMCG_L->InnerTolerance << std::endl; + std::cout << GridLogMessage << "Set EOFA MD solver tolerance outer=" << DerivMCG_L->Tolerance << " inner=" << DerivMCG_L->InnerTolerance << std::endl; +#endif + + EOFAmixPrecPFaction* EOFA = new EOFAmixPrecPFaction(*LopF, *RopF, + *LopD, *RopD, + *ActionMCG_L, *ActionMCG_R, + *ActionMCG_L, *ActionMCG_R, + *DerivMCG_L, *DerivMCG_R, + user_params.eofa_l[i].rat_params, true); + EOFA_pfactions[i] = EOFA; + Level1.push_back(EOFA); + } + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD,strange_mass,M5,mob_b,mob_c,Params); + FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD, pv_mass,M5,mob_b,mob_c,Params); + + FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF,strange_mass,M5,mob_b,mob_c,Params); + FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF, pv_mass,M5,mob_b,mob_c,Params); + + RationalActionParams rat_act_params_s; + rat_act_params_s.inv_pow = 4; // (M^dag M)^{1/4} + rat_act_params_s.precision= 60; + rat_act_params_s.MaxIter = 50000; + user_params.rat_quo_s.Export(rat_act_params_s); + std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_s.BoundsCheckFreq << " trajectories (avg)" << std::endl; + + //MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq); + DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s); + Level1.push_back(&Quotient_s); + + /////////////////////////////////// + // DSDR action + /////////////////////////////////// + RealD dsdr_mass=-1.8; + //Use same DSDR twists as https://arxiv.org/pdf/1208.4412.pdf + RealD dsdr_epsilon_f = 0.02; //numerator (in determinant) + RealD dsdr_epsilon_b = 0.5; + GparityWilsonTMFermionD Numerator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_f, Params); + GparityWilsonTMFermionF Numerator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_f, Params); + + GparityWilsonTMFermionD Denominator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_b, Params); + GparityWilsonTMFermionF Denominator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_b, Params); + + RationalActionParams rat_act_params_DSDR; + rat_act_params_DSDR.inv_pow = 2; // (M^dag M)^{1/2} + rat_act_params_DSDR.precision= 60; + rat_act_params_DSDR.MaxIter = 50000; + user_params.rat_quo_DSDR.Export(rat_act_params_DSDR); + std::cout << GridLogMessage << "DSDR quark bounds check every " << rat_act_params_DSDR.BoundsCheckFreq << " trajectories (avg)" << std::endl; + + DoublePrecRHMC Quotient_DSDR(Denominator_DSDR_D, Numerator_DSDR_D, rat_act_params_DSDR); + Level2.push_back(&Quotient_DSDR); + + ///////////////////////////////////////////////////////////// + // Gauge action + ///////////////////////////////////////////////////////////// + Level3.push_back(&GaugeAction); + + TheHMC.TheAction.push_back(Level1); + TheHMC.TheAction.push_back(Level2); + TheHMC.TheAction.push_back(Level3); + std::cout << GridLogMessage << " Action complete "<< std::endl; + + + //Action tuning + bool + tune_rhmc_s=false, eigenrange_s=false, + tune_rhmc_DSDR=false, eigenrange_DSDR=false, + check_eofa=false, + upper_bound_eofa=false, lower_bound_eofa(false); + + std::string lanc_params_s; + std::string lanc_params_DSDR; + int tune_rhmc_s_action_or_md; + int tune_rhmc_DSDR_action_or_md; + int eofa_which_hsb; + + for(int i=1;i= 0 && eofa_which_hsb < n_light_hsb) ); + } + else if(sarg == "--upper_bound_eofa"){ + assert(i < argc-1); + upper_bound_eofa = true; + eofa_which_hsb = std::stoi(argv[i+1]); + assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb); + } + else if(sarg == "--lower_bound_eofa"){ + assert(i < argc-1); + lower_bound_eofa = true; + eofa_which_hsb = std::stoi(argv[i+1]); + assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb); + } + } + if(tune_rhmc_s || eigenrange_s || tune_rhmc_DSDR || eigenrange_DSDR ||check_eofa || upper_bound_eofa || lower_bound_eofa) { + std::cout << GridLogMessage << "Running checks" << std::endl; + TheHMC.initializeGaugeFieldAndRNGs(Ud); + + //std::cout << GridLogMessage << "EOFA action solver action tolerance outer=" << ActionMCG_L.Tolerance << " inner=" << ActionMCG_L.InnerTolerance << std::endl; + //std::cout << GridLogMessage << "EOFA MD solver tolerance outer=" << DerivMCG_L.Tolerance << " inner=" << DerivMCG_L.InnerTolerance << std::endl; + + if(check_eofa){ + if(eofa_which_hsb >= 0){ + std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << eofa_which_hsb << std::endl; + checkEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud); + std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << eofa_which_hsb << std::endl; + }else{ + for(int i=0;i(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG()); + if(tune_rhmc_s) checkRHMC(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange", tune_rhmc_s_action_or_md); + if(eigenrange_DSDR) computeEigenvalues(lanc_params_DSDR, UGridD, UrbGridD, Ud, Numerator_DSDR_D, TheHMC.Resources.GetParallelRNG()); + if(tune_rhmc_DSDR) checkRHMC(UGridD, UrbGridD, Ud, Numerator_DSDR_D, Denominator_DSDR_D, Quotient_DSDR, TheHMC.Resources.GetParallelRNG(), 2, "DSDR", tune_rhmc_DSDR_action_or_md); + + + std::cout << GridLogMessage << " Done" << std::endl; + Grid_finalize(); + return 0; + } + + + //Run the HMC + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.Run(); + + std::cout << GridLogMessage << " Done" << std::endl; + Grid_finalize(); + return 0; +#endif +} // main diff --git a/HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc b/HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc new file mode 100644 index 00000000..004a0953 --- /dev/null +++ b/HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc @@ -0,0 +1,875 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./HMC/Mobius2p1fIDSDRGparityEOFA.cc + +Copyright (C) 2015-2016 + +Author: Christopher Kelly +Author: Peter Boyle + + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace Grid; + +//Production binary for the 40ID G-parity ensemble + +struct RatQuoParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters, + double, bnd_lo, + double, bnd_hi, + Integer, action_degree, + double, action_tolerance, + Integer, md_degree, + double, md_tolerance, + Integer, reliable_update_freq, + Integer, bnd_check_freq); + RatQuoParameters() { + bnd_lo = 1e-2; + bnd_hi = 30; + action_degree = 10; + action_tolerance = 1e-10; + md_degree = 10; + md_tolerance = 1e-8; + bnd_check_freq = 20; + reliable_update_freq = 50; + } + + void Export(RationalActionParams &into) const{ + into.lo = bnd_lo; + into.hi = bnd_hi; + into.action_degree = action_degree; + into.action_tolerance = action_tolerance; + into.md_degree = md_degree; + into.md_tolerance = md_tolerance; + into.BoundsCheckFreq = bnd_check_freq; + } +}; + +struct EOFAparameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(EOFAparameters, + OneFlavourRationalParams, rat_params, + double, action_tolerance, + double, action_mixcg_inner_tolerance, + double, md_tolerance, + double, md_mixcg_inner_tolerance); + + EOFAparameters() { + action_mixcg_inner_tolerance = 1e-8; + action_tolerance = 1e-10; + md_tolerance = 1e-8; + md_mixcg_inner_tolerance = 1e-8; + + rat_params.lo = 1.0; + rat_params.hi = 25.0; + rat_params.MaxIter = 10000; + rat_params.tolerance= 1.0e-9; + rat_params.degree = 14; + rat_params.precision= 50; + } +}; + +struct EvolParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters, + Integer, StartTrajectory, + Integer, Trajectories, + Integer, SaveInterval, + Integer, Steps, + RealD, TrajectoryLength, + bool, MetropolisTest, + std::string, StartingType, + std::vector, GparityDirs, + std::vector, eofa_l, + RatQuoParameters, rat_quo_s, + RatQuoParameters, rat_quo_DSDR); + + EvolParameters() { + //For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart + MetropolisTest = false; + StartTrajectory = 0; + Trajectories = 50; + SaveInterval = 5; + StartingType = "ColdStart"; + GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic + Steps = 5; + TrajectoryLength = 1.0; + } +}; + +bool fileExists(const std::string &fn){ + std::ifstream f(fn); + return f.good(); +} + + + + +struct LanczosParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters, + double, alpha, + double, beta, + double, mu, + int, ord, + int, n_stop, + int, n_want, + int, n_use, + double, tolerance); + + LanczosParameters() { + alpha = 35; + beta = 5; + mu = 0; + ord = 100; + n_stop = 10; + n_want = 10; + n_use = 15; + tolerance = 1e-6; + } +}; + + + +template +void computeEigenvalues(std::string param_file, + GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something + FermionActionD &action, GridParallelRNG &rng){ + + LanczosParameters params; + if(fileExists(param_file)){ + std::cout << GridLogMessage << " Reading " << param_file << std::endl; + Grid::XmlReader rd(param_file); + read(rd, "LanczosParameters", params); + }else if(!GlobalSharedMemory::WorldRank){ + std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl; + std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl; + Grid::XmlWriter wr(param_file + ".templ"); + write(wr, "LanczosParameters", params); + } + + FermionFieldD gauss_o(rbGrid); + FermionFieldD gauss(Grid); + gaussian(rng, gauss); + pickCheckerboard(Odd, gauss_o, gauss); + + action.ImportGauge(latt); + + SchurDiagMooeeOperator hermop(action); + PlainHermOp hermop_wrap(hermop); + //ChebyshevLanczos Cheb(params.alpha, params.beta, params.mu, params.ord); + assert(params.mu == 0.0); + + Chebyshev Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1); + FunctionHermOp Cheb_wrap(Cheb, hermop); + + std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl; + ImplicitlyRestartedLanczos IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 10000); + + std::vector eval(params.n_use); + std::vector evec(params.n_use, rbGrid); + int Nconv; + IRL.calc(eval, evec, gauss_o, Nconv); + + std::cout << "Eigenvalues:" << std::endl; + for(int i=0;i +void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something + FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng, + int inv_pow, const std::string &quark_descr, int action_or_md){ + assert(action_or_md == 0 || action_or_md == 1 || action_or_md == 2); + + FermionFieldD gauss_o(rbGrid); + FermionFieldD gauss(Grid); + gaussian(rng, gauss); + pickCheckerboard(Odd, gauss_o, gauss); + + numOp.ImportGauge(latt); + denOp.ImportGauge(latt); + + typedef typename FermionActionD::Impl_t FermionImplPolicyD; + SchurDifferentiableOperator MdagM(numOp); + SchurDifferentiableOperator VdagV(denOp); + + PowerMethod power_method; + RealD lambda_max; + + std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " numerator" << std::endl; + + lambda_max = power_method(MdagM,gauss_o); + std::cout << GridLogMessage << "Got lambda_max "< +void checkEOFA(ExactOneFlavourRatioPseudoFermionAction &EOFA, + GridCartesian* FGrid, GridParallelRNG &rng, const LatticeGaugeFieldD &latt){ + std::cout << GridLogMessage << "Starting EOFA action/bounds check" << std::endl; + typename FermionImplPolicy::FermionField eta(FGrid); + RealD scale = std::sqrt(0.5); + gaussian(rng,eta); eta = eta * scale; + + //Use the inbuilt check + EOFA.refresh(latt, eta); + EOFA.S(latt); + std::cout << GridLogMessage << "Finished EOFA upper action/bounds check" << std::endl; +} + + +template +class EOFAlinop: public LinearOperatorBase{ + ExactOneFlavourRatioPseudoFermionAction &EOFA; + LatticeGaugeFieldD &U; +public: + EOFAlinop(ExactOneFlavourRatioPseudoFermionAction &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){} + + typedef typename FermionImplPolicy::FermionField Field; + void OpDiag (const Field &in, Field &out){ assert(0); } + void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); } + void OpDirAll (const Field &in, std::vector &out){ assert(0); } + + void Op (const Field &in, Field &out){ assert(0); } + void AdjOp (const Field &in, Field &out){ assert(0); } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); } + void HermOp(const Field &in, Field &out){ EOFA.Meofa(U, in, out); } +}; + +template +void upperBoundEOFA(ExactOneFlavourRatioPseudoFermionAction &EOFA, + GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){ + std::cout << GridLogMessage << "Starting EOFA upper bound compute" << std::endl; + EOFAlinop linop(EOFA, latt); + typename FermionImplPolicy::FermionField eta(FGrid); + gaussian(rng,eta); + PowerMethod power_method; + auto lambda_max = power_method(linop,eta); + std::cout << GridLogMessage << "Upper bound of EOFA operator " << lambda_max << std::endl; +} + +//Applications of M^{-1} cost the same as M for EOFA! +template +class EOFAinvLinop: public LinearOperatorBase{ + ExactOneFlavourRatioPseudoFermionAction &EOFA; + LatticeGaugeFieldD &U; +public: + EOFAinvLinop(ExactOneFlavourRatioPseudoFermionAction &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){} + + typedef typename FermionImplPolicy::FermionField Field; + void OpDiag (const Field &in, Field &out){ assert(0); } + void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); } + void OpDirAll (const Field &in, std::vector &out){ assert(0); } + + void Op (const Field &in, Field &out){ assert(0); } + void AdjOp (const Field &in, Field &out){ assert(0); } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); } + void HermOp(const Field &in, Field &out){ EOFA.MeofaInv(U, in, out); } +}; + +template +void lowerBoundEOFA(ExactOneFlavourRatioPseudoFermionAction &EOFA, + GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){ + std::cout << GridLogMessage << "Starting EOFA lower bound compute using power method on M^{-1}. Inverse of highest eigenvalue is the lowest eigenvalue of M" << std::endl; + EOFAinvLinop linop(EOFA, latt); + typename FermionImplPolicy::FermionField eta(FGrid); + gaussian(rng,eta); + PowerMethod power_method; + auto lambda_max = power_method(linop,eta); + std::cout << GridLogMessage << "Lower bound of EOFA operator " << 1./lambda_max << std::endl; +} + + +NAMESPACE_BEGIN(Grid); + + template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.) + { + }; + + void operator()(LinearOperatorBase &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); + assert(&(SchurOpU->_Mat)==&(LinOpD._Mat)); + + precisionChange(FermOpF.Umu, FermOpD.Umu); + + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// + MixedPrecisionConjugateGradient MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); + MPCG.InnerTolerance = InnerTolerance; + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < + class MixedPrecisionReliableUpdateConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + Integer MaxIterations; + + RealD Delta; //reliable update parameter + + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + MixedPrecisionReliableUpdateConjugateGradientOperatorFunction(RealD tol, + RealD delta, + Integer maxit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + Delta(delta), + MaxIterations(maxit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5) + { + }; + + void operator()(LinearOperatorBase &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision reliable CG update wrapper operator() "<(&LinOpU); + assert(&(SchurOpU->_Mat)==&(LinOpD._Mat)); + + precisionChange(FermOpF.Umu, FermOpD.Umu); + + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// + + ConjugateGradientReliableUpdate MPCG(Tolerance,MaxIterations,Delta,SinglePrecGrid5,LinOpF,LinOpD); + std::cout << GridLogMessage << "Calling mixed precision reliable update Conjugate Gradient" < MixedPrecRHMC; + typedef GeneralEvenOddRatioRationalPseudoFermionAction DoublePrecRHMC; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + IntegratorParameters MD; + typedef ConjugateHMCRunnerD HMCWrapper; //NB: This is the "Omelyan integrator" + typedef HMCWrapper::ImplPolicy GaugeImplPolicy; + MD.name = std::string("MinimumNorm2"); + MD.MDsteps = user_params.Steps; + MD.trajL = user_params.TrajectoryLength; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = user_params.StartTrajectory; + HMCparams.Trajectories = user_params.Trajectories; + HMCparams.NoMetropolisUntil= 0; + HMCparams.StartingType = user_params.StartingType; + HMCparams.MetropolisTest = user_params.MetropolisTest; + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_lat"; + CPparams.rng_prefix = "ckpoint_rng"; + CPparams.saveInterval = user_params.SaveInterval; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + //Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + //aiming for ainv=2.068 me Bob + //Estimated a(ml+mres) [48ID] = 0.001048 0.00104 + // a(mh+mres) [48ID] = 0.028847 0.02805 + //Estimate Ls=12, b+c=2 mres~0.0003 + + const int Ls = 12; + Real beta = 1.946; + Real light_mass = 0.00074; //0.00104 - mres_approx; + Real strange_mass = 0.02775; //0.02805 - mres_approx + Real pv_mass = 1.0; + RealD M5 = 1.8; + RealD mobius_scale = 2.; //b+c + + RealD mob_bmc = 1.0; + RealD mob_b = (mobius_scale + mob_bmc)/2.; + RealD mob_c = (mobius_scale - mob_bmc)/2.; + + //Setup the Grids + auto UGridD = TheHMC.Resources.GetCartesian(); + auto UrbGridD = TheHMC.Resources.GetRBCartesian(); + auto FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD); + auto FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD); + + GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF); + auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF); + + ConjugateIwasakiGaugeActionD GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeFieldD Ud(UGridD); + LatticeGaugeFieldF Uf(UGridF); + + //Setup the BCs + FermionActionD::ImplParams Params; + for(int i=0;i dirs4(Nd); + for(int i=0;i Level1(1); //light quark + strange quark + ActionLevel Level2(4); //DSDR + ActionLevel Level3(2); //gauge + + + ///////////////////////////////////////////////////////////// + // Light EOFA action + // have to be careful with the parameters, cf. Test_dwf_gpforce_eofa.cc + ///////////////////////////////////////////////////////////// + typedef SchurDiagMooeeOperator EOFAschuropD; + typedef SchurDiagMooeeOperator EOFAschuropF; + typedef ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction EOFAmixPrecPFaction; + typedef MixedPrecisionConjugateGradientOperatorFunction EOFA_mxCG; + typedef MixedPrecisionReliableUpdateConjugateGradientOperatorFunction EOFA_relupCG; + + std::vector eofa_light_masses = { light_mass , 0.004, 0.016, 0.064, 0.256 }; + std::vector eofa_pv_masses = { 0.004 , 0.016, 0.064, 0.256, 1.0 }; + int n_light_hsb = 5; + assert(user_params.eofa_l.size() == n_light_hsb); + + EOFAmixPrecPFaction* EOFA_pfactions[n_light_hsb]; + + for(int i=0;iInnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance; + + EOFA_mxCG* ActionMCG_R = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 10000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D); + ActionMCG_R->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance; + + EOFA_mxCG* DerivMCG_L = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 10000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D); + DerivMCG_L->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance; + + EOFA_mxCG* DerivMCG_R = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 10000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D); + DerivMCG_R->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance; + + std::cout << GridLogMessage << "Set EOFA action solver action tolerance outer=" << ActionMCG_L->Tolerance << " inner=" << ActionMCG_L->InnerTolerance << std::endl; + std::cout << GridLogMessage << "Set EOFA MD solver tolerance outer=" << DerivMCG_L->Tolerance << " inner=" << DerivMCG_L->InnerTolerance << std::endl; +#endif + + + EOFAmixPrecPFaction* EOFA = new EOFAmixPrecPFaction(*LopF, *RopF, + *LopD, *RopD, + *ActionMCG_L, *ActionMCG_R, + *ActionMCG_L, *ActionMCG_R, + *DerivMCG_L, *DerivMCG_R, + user_params.eofa_l[i].rat_params, true); + EOFA_pfactions[i] = EOFA; + Level1.push_back(EOFA); + } + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD,strange_mass,M5,mob_b,mob_c,Params); + FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD, pv_mass,M5,mob_b,mob_c,Params); + + FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF,strange_mass,M5,mob_b,mob_c,Params); + FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF, pv_mass,M5,mob_b,mob_c,Params); + + RationalActionParams rat_act_params_s; + rat_act_params_s.inv_pow = 4; // (M^dag M)^{1/4} + rat_act_params_s.precision= 60; + rat_act_params_s.MaxIter = 10000; + user_params.rat_quo_s.Export(rat_act_params_s); + std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_s.BoundsCheckFreq << " trajectories (avg)" << std::endl; + + //MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq); + DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s); + Level1.push_back(&Quotient_s); + + /////////////////////////////////// + // DSDR action + /////////////////////////////////// + RealD dsdr_mass=-1.8; + //Use same DSDR twists as https://arxiv.org/pdf/1208.4412.pdf + RealD dsdr_epsilon_f = 0.02; //numerator (in determinant) + RealD dsdr_epsilon_b = 0.5; + GparityWilsonTMFermionD Numerator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_f, Params); + GparityWilsonTMFermionF Numerator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_f, Params); + + GparityWilsonTMFermionD Denominator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_b, Params); + GparityWilsonTMFermionF Denominator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_b, Params); + + RationalActionParams rat_act_params_DSDR; + rat_act_params_DSDR.inv_pow = 2; // (M^dag M)^{1/2} + rat_act_params_DSDR.precision= 60; + rat_act_params_DSDR.MaxIter = 10000; + user_params.rat_quo_DSDR.Export(rat_act_params_DSDR); + std::cout << GridLogMessage << "DSDR quark bounds check every " << rat_act_params_DSDR.BoundsCheckFreq << " trajectories (avg)" << std::endl; + + DoublePrecRHMC Quotient_DSDR(Denominator_DSDR_D, Numerator_DSDR_D, rat_act_params_DSDR); + Level2.push_back(&Quotient_DSDR); + + ///////////////////////////////////////////////////////////// + // Gauge action + ///////////////////////////////////////////////////////////// + Level3.push_back(&GaugeAction); + + TheHMC.TheAction.push_back(Level1); + TheHMC.TheAction.push_back(Level2); + TheHMC.TheAction.push_back(Level3); + std::cout << GridLogMessage << " Action complete "<< std::endl; + + + //Action tuning + bool + tune_rhmc_s=false, eigenrange_s=false, + tune_rhmc_DSDR=false, eigenrange_DSDR=false, + check_eofa=false, + upper_bound_eofa=false, lower_bound_eofa(false); + + std::string lanc_params_s; + std::string lanc_params_DSDR; + int tune_rhmc_s_action_or_md; + int tune_rhmc_DSDR_action_or_md; + int eofa_which_hsb; + + for(int i=1;i= 0 && eofa_which_hsb < n_light_hsb) ); + } + else if(sarg == "--upper_bound_eofa"){ + assert(i < argc-1); + upper_bound_eofa = true; + eofa_which_hsb = std::stoi(argv[i+1]); + assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb); + } + else if(sarg == "--lower_bound_eofa"){ + assert(i < argc-1); + lower_bound_eofa = true; + eofa_which_hsb = std::stoi(argv[i+1]); + assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb); + } + } + if(tune_rhmc_s || eigenrange_s || tune_rhmc_DSDR || eigenrange_DSDR ||check_eofa || upper_bound_eofa || lower_bound_eofa) { + std::cout << GridLogMessage << "Running checks" << std::endl; + TheHMC.initializeGaugeFieldAndRNGs(Ud); + + //std::cout << GridLogMessage << "EOFA action solver action tolerance outer=" << ActionMCG_L.Tolerance << " inner=" << ActionMCG_L.InnerTolerance << std::endl; + //std::cout << GridLogMessage << "EOFA MD solver tolerance outer=" << DerivMCG_L.Tolerance << " inner=" << DerivMCG_L.InnerTolerance << std::endl; + + + if(check_eofa){ + if(eofa_which_hsb >= 0){ + std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << eofa_which_hsb << std::endl; + checkEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud); + std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << eofa_which_hsb << std::endl; + }else{ + for(int i=0;i(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG()); + if(tune_rhmc_s) checkRHMC(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange", tune_rhmc_s_action_or_md); + if(eigenrange_DSDR) computeEigenvalues(lanc_params_DSDR, UGridD, UrbGridD, Ud, Numerator_DSDR_D, TheHMC.Resources.GetParallelRNG()); + if(tune_rhmc_DSDR) checkRHMC(UGridD, UrbGridD, Ud, Numerator_DSDR_D, Denominator_DSDR_D, Quotient_DSDR, TheHMC.Resources.GetParallelRNG(), 2, "DSDR", tune_rhmc_DSDR_action_or_md); + + + std::cout << GridLogMessage << " Done" << std::endl; + Grid_finalize(); + return 0; + } + + + //Run the HMC + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.Run(); + + std::cout << GridLogMessage << " Done" << std::endl; + Grid_finalize(); + return 0; +#endif +} // main diff --git a/HMC/Mobius2p1fRHMC.cc b/HMC/Mobius2p1fRHMC.cc index b958d548..288a6c54 100644 --- a/HMC/Mobius2p1fRHMC.cc +++ b/HMC/Mobius2p1fRHMC.cc @@ -39,7 +39,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef WilsonImplR FermionImplPolicy; - typedef MobiusFermionR FermionAction; + typedef MobiusFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; typedef Grid::XmlReader Serialiser; @@ -137,8 +137,8 @@ int main(int argc, char **argv) { //////////////////////////////////// // FermionAction StrangeOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_mass,M5,b,c, Params); - // DomainWallEOFAFermionR Strange_Op_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5); - // DomainWallEOFAFermionR Strange_Op_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5); + // DomainWallEOFAFermionD Strange_Op_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5); + // DomainWallEOFAFermionD Strange_Op_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5); // ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L,Strange_Op_R,CG,ofp, false); FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); diff --git a/HMC/Mobius2p1f_DD_EOFA_96I_3level.cc b/HMC/Mobius2p1f_DD_EOFA_96I_3level.cc new file mode 100644 index 00000000..c305567c --- /dev/null +++ b/HMC/Mobius2p1f_DD_EOFA_96I_3level.cc @@ -0,0 +1,516 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_hmc_EODWFRatio.cc + +Copyright (C) 2015-2016 + +Author: Peter Boyle +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.) + { + /* Debugging instances of objects; references are stored + std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " < &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); + + // std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <_Mat)<_Mat)==&(LinOpD._Mat)); + + //////////////////////////////////////////////////////////////////////////////////// + // Must snarf a single precision copy of the gauge field in Linop_d argument + //////////////////////////////////////////////////////////////////////////////////// + typedef typename FermionOperatorF::GaugeField GaugeFieldF; + typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF; + typedef typename FermionOperatorD::GaugeField GaugeFieldD; + typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD; + + GridBase * GridPtrF = SinglePrecGrid4; + GridBase * GridPtrD = FermOpD.Umu.Grid(); + GaugeFieldF U_f (GridPtrF); + GaugeLinkFieldF Umu_f(GridPtrF); + // std::cout << " Dim gauge field "<Nd()<Nd()<(FermOpD.Umu, mu); + precisionChange(Umu_f,Umu_d); + PokeIndex(FermOpF.Umu, Umu_f, mu); + } + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// +#if 1 + RealD delta=1.e-4; + std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" < MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD); +#else + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); +#endif + MPCG(src,psi); + } + }; + +NAMESPACE_END(Grid); + + +int main(int argc, char **argv) { + using namespace Grid; + + Grid_init(&argc, &argv); + + CartesianCommunicator::BarrierWorld(); + std::cout << GridLogMessage << " Clock skew check" < HMCWrapper; + // MD.name = std::string("Leap Frog"); + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Force Gradient"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("MinimumNorm2"); + MD.MDsteps = 4; + MD.trajL = 0.5; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 1077; + HMCparams.Trajectories = 1; + HMCparams.NoMetropolisUntil= 0; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + // HMCparams.StartingType =std::string("ColdStart"); + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_DDHMC_lat"; + CPparams.rng_prefix = "ckpoint_DDHMC_rng"; + CPparams.saveInterval = 1; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + std::cout << "loaded NERSC checpointer"< PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 12; + RealD M5 = 1.8; + RealD b = 1.5; + RealD c = 0.5; + Real beta = 2.13; + // Real light_mass = 5.4e-4; + Real light_mass = 7.8e-4; + Real strange_mass = 0.0362; + Real pv_mass = 1.0; + // std::vector hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + std::vector hasenbusch({ light_mass, 5e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass }); + + + OneFlavourRationalParams OFRp; // Up/down + OFRp.lo = 2.0e-5; + OFRp.hi = 90.0; + OFRp.MaxIter = 60000; + OFRp.tolerance= 1.0e-5; + OFRp.mdtolerance= 1.0e-3; + // OFRp.degree = 20; converges + // OFRp.degree = 16; + OFRp.degree = 12; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + std::vector ActionTolByPole({ + 1.0e-6,1.0e-6,1.0e-7,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-10,1.0e-10,1.0e-10,1.0e-10 + }); + std::vector MDTolByPole({ + 3.0e-4,3.0e-4,3.0e-5,1.0e-5, + 1.0e-5,1.0e-5,1.0e-5,1.0e-5, + 1.0e-8,1.0e-10,1.0e-10,1.0e-10 + }); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + typedef SchurDiagMooeeOperator LinearOperatorEOFAF; + typedef SchurDiagMooeeOperator LinearOperatorEOFAD; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG_EOFA; + + //////////////////////////////////////////////////////////////// + // Domain decomposed + //////////////////////////////////////////////////////////////// + Coordinate latt4 = GridPtr->GlobalDimensions(); + Coordinate mpi = GridPtr->ProcessorGrid(); + Coordinate shm; + + GlobalSharedMemory::GetShmDims(mpi,shm); + + Coordinate CommDim(Nd); + for(int d=0;d1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + + Coordinate Block4(Nd); + Block4[0] = Dirichlet[1]; + Block4[1] = Dirichlet[2]; + Block4[2] = Dirichlet[3]; + Block4[3] = Dirichlet[4]; + + int Width=3; + TheHMC.Resources.SetMomentumFilter(new DDHMCFilter(Block4,Width)); + + ////////////////////////// + // Fermion Grids + ////////////////////////// + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); + auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi); + auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF); + auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF); + auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + LatticeGaugeFieldF UF(GridPtrF); + + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file + TheHMC.initializeGaugeFieldAndRNGs(U); + std::cout << "loaded NERSC gauge field"< boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + Params.dirichlet=NonDirichlet; + FermionAction::ImplParams ParamsDir(boundary); + ParamsDir.dirichlet=Dirichlet; + + // double StoppingCondition = 1e-14; + // double MDStoppingCondition = 1e-9; + double StoppingCondition = 1e-8; + double MDStoppingCondition = 1e-6; + double MDStoppingConditionLoose = 1e-6; + double MDStoppingConditionStrange = 1e-8; + double MaxCGIterations = 300000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + ActionLevel Level1(1); + ActionLevel Level2(8); // 6 x 20 = 120 = 8 x 15 + ActionLevel Level3(15); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + + // Probably dominates the force - back to EOFA. + OneFlavourRationalParams SFRp; + SFRp.lo = 0.1; + SFRp.hi = 25.0; + SFRp.MaxIter = 10000; + SFRp.tolerance= 1.0e-5; + SFRp.mdtolerance= 1.0e-3; + SFRp.degree = 14; + SFRp.precision= 50; + + MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + ConjugateGradient ActionCG(StoppingCondition,MaxCGIterations); + ConjugateGradient DerivativeCG(MDStoppingCondition,MaxCGIterations); + LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L); + LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R); + LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF); + LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF); + + const int MX_inner = 1000; + MxPCG_EOFA ActionCGL(StoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA DerivativeCGL(MDStoppingConditionStrange, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA ActionCGR(StoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + MxPCG_EOFA DerivativeCGR(MDStoppingConditionStrange, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + ExactOneFlavourRatioPseudoFermionAction + EOFA(Strange_Op_L, Strange_Op_R, + ActionCG, + ActionCGL, ActionCGR, + DerivativeCGL, DerivativeCGR, + SFRp, true); + Level2.push_back(&EOFA); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + std::vector dirichlet_den; + std::vector dirichlet_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); dirichlet_den.push_back(0); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector DenominatorsF; + std::vector *> Quotients; + std::vector *> Bdys; + std::vector ActionMPCG; + std::vector MPCG; + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + std::vector LinOpD; + std::vector LinOpF; + + for(int h=0;h(*Numerators[h],*Denominators[h],MDCG,CG)); + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG)); + } else { + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + } + } + for(int h=0;hSetTolerances(ActionTolByPole,MDTolByPole); + } + int nquo=Quotients.size(); + // Level1.push_back(Bdys[0]); + // Level1.push_back(Bdys[1]); + // Level2.push_back(Bdys[0]); + // Level2.push_back(Bdys[1]); + for(int h=0;h +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.) + { + /* Debugging instances of objects; references are stored + std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " < &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); + + // std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <_Mat)<_Mat)==&(LinOpD._Mat)); + + //////////////////////////////////////////////////////////////////////////////////// + // Must snarf a single precision copy of the gauge field in Linop_d argument + //////////////////////////////////////////////////////////////////////////////////// + typedef typename FermionOperatorF::GaugeField GaugeFieldF; + typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF; + typedef typename FermionOperatorD::GaugeField GaugeFieldD; + typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD; + + GridBase * GridPtrF = SinglePrecGrid4; + GridBase * GridPtrD = FermOpD.Umu.Grid(); + GaugeFieldF U_f (GridPtrF); + GaugeLinkFieldF Umu_f(GridPtrF); + // std::cout << " Dim gauge field "<Nd()<Nd()<(FermOpD.Umu, mu); + precisionChange(Umu_f,Umu_d); + PokeIndex(FermOpF.Umu, Umu_f, mu); + } + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// +#if 1 + RealD delta=1.e-4; + std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" < MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD); +#else + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); +#endif + MPCG(src,psi); + } + }; + +NAMESPACE_END(Grid); + + +int main(int argc, char **argv) { + using namespace Grid; + + Grid_init(&argc, &argv); + + CartesianCommunicator::BarrierWorld(); + std::cout << GridLogMessage << " Clock skew check" < HMCWrapper; + // MD.name = std::string("Leap Frog"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("Force Gradient"); + //typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("MinimumNorm2"); + // MD.MDsteps = 4; + MD.MDsteps = 4; + MD.trajL = 0.5; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 1077; + HMCparams.Trajectories = 1; + HMCparams.NoMetropolisUntil= 0; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + // HMCparams.StartingType =std::string("ColdStart"); + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_DDHMC_lat"; + CPparams.rng_prefix = "ckpoint_DDHMC_rng"; + CPparams.saveInterval = 1; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + std::cout << "loaded NERSC checpointer"< PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 12; + RealD M5 = 1.8; + RealD b = 1.5; + RealD c = 0.5; + Real beta = 2.13; + // Real light_mass = 5.4e-4; + Real light_mass = 7.8e-4; + Real strange_mass = 0.0362; + Real pv_mass = 1.0; + // std::vector hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + std::vector hasenbusch({ light_mass, 5e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass }); + + + OneFlavourRationalParams OFRp; // Up/down + OFRp.lo = 2.0e-5; + OFRp.hi = 90.0; + OFRp.MaxIter = 60000; + OFRp.tolerance= 1.0e-5; + OFRp.mdtolerance= 1.0e-3; + // OFRp.degree = 20; converges + // OFRp.degree = 16; + OFRp.degree = 12; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + std::vector ActionTolByPole({ + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-10,1.0e-10,1.0e-10,1.0e-10, + 1.0e-10,1.0e-10,1.0e-10,1.0e-10 + }); + std::vector MDTolByPole({ + 3.0e-5,1.0e-5,3.0e-6,1.0e-6, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-10,1.0e-10,1.0e-10,1.0e-10 + }); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + typedef SchurDiagMooeeOperator LinearOperatorEOFAF; + typedef SchurDiagMooeeOperator LinearOperatorEOFAD; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG_EOFA; + + //////////////////////////////////////////////////////////////// + // Domain decomposed + //////////////////////////////////////////////////////////////// + Coordinate latt4 = GridPtr->GlobalDimensions(); + Coordinate mpi = GridPtr->ProcessorGrid(); + Coordinate shm; + + GlobalSharedMemory::GetShmDims(mpi,shm); + + Coordinate CommDim(Nd); + for(int d=0;d1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + + Coordinate Block4(Nd); + Block4[0] = Dirichlet[1]; + Block4[1] = Dirichlet[2]; + Block4[2] = Dirichlet[3]; + Block4[3] = Dirichlet[4]; + + int Width=3; + TheHMC.Resources.SetMomentumFilter(new DDHMCFilter(Block4,Width)); + + ////////////////////////// + // Fermion Grids + ////////////////////////// + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); + auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi); + auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF); + auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF); + auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + LatticeGaugeFieldF UF(GridPtrF); + + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file + TheHMC.initializeGaugeFieldAndRNGs(U); + std::cout << "loaded NERSC gauge field"< boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + Params.dirichlet=NonDirichlet; + FermionAction::ImplParams ParamsDir(boundary); + ParamsDir.dirichlet=Dirichlet; + + // double StoppingCondition = 1e-14; + // double MDStoppingCondition = 1e-9; + double StoppingCondition = 1e-8; + double MDStoppingCondition = 1e-7; + double MDStoppingConditionLoose = 1e-7; + double MDStoppingConditionStrange = 1e-7; + double MaxCGIterations = 300000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + ActionLevel Level1(1); + ActionLevel Level2(2); + ActionLevel Level3(30); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + + // Probably dominates the force - back to EOFA. + OneFlavourRationalParams SFRp; + SFRp.lo = 0.1; + SFRp.hi = 25.0; + SFRp.MaxIter = 10000; + SFRp.tolerance= 1.0e-5; + SFRp.mdtolerance= 2.0e-4; + SFRp.degree = 14; + SFRp.precision= 50; + + MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + ConjugateGradient ActionCG(StoppingCondition,MaxCGIterations); + ConjugateGradient DerivativeCG(MDStoppingCondition,MaxCGIterations); + LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L); + LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R); + LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF); + LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF); + + const int MX_inner = 1000; + MxPCG_EOFA ActionCGL(StoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA DerivativeCGL(MDStoppingConditionStrange, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA ActionCGR(StoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + MxPCG_EOFA DerivativeCGR(MDStoppingConditionStrange, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + ExactOneFlavourRatioPseudoFermionAction + EOFA(Strange_Op_L, Strange_Op_R, + ActionCG, + ActionCGL, ActionCGR, + DerivativeCGL, DerivativeCGR, + SFRp, true); + // Level2.push_back(&EOFA); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + std::vector dirichlet_den; + std::vector dirichlet_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); dirichlet_den.push_back(0); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector DenominatorsF; + std::vector *> Quotients; + std::vector *> Bdys; + std::vector ActionMPCG; + std::vector MPCG; + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + std::vector LinOpD; + std::vector LinOpF; + + for(int h=0;h(*Numerators[h],*Denominators[h],MDCG,CG)); + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG)); + } else { + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + } + } + for(int h=0;hSetTolerances(ActionTolByPole,MDTolByPole); + } + int nquo=Quotients.size(); + Level2.push_back(Bdys[0]); + Level2.push_back(Bdys[1]); + for(int h=0;h +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.) + { + /* Debugging instances of objects; references are stored + std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " < &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); + + // std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <_Mat)<_Mat)==&(LinOpD._Mat)); + + //////////////////////////////////////////////////////////////////////////////////// + // Must snarf a single precision copy of the gauge field in Linop_d argument + //////////////////////////////////////////////////////////////////////////////////// + typedef typename FermionOperatorF::GaugeField GaugeFieldF; + typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF; + typedef typename FermionOperatorD::GaugeField GaugeFieldD; + typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD; + + GridBase * GridPtrF = SinglePrecGrid4; + GridBase * GridPtrD = FermOpD.Umu.Grid(); + GaugeFieldF U_f (GridPtrF); + GaugeLinkFieldF Umu_f(GridPtrF); + // std::cout << " Dim gauge field "<Nd()<Nd()<(FermOpD.Umu, mu); + precisionChange(Umu_f,Umu_d); + PokeIndex(FermOpF.Umu, Umu_f, mu); + } + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// +#if 1 + RealD delta=1.e-4; + std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" < MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD); +#else + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); +#endif + MPCG(src,psi); + } + }; + +NAMESPACE_END(Grid); + + +int main(int argc, char **argv) { + using namespace Grid; + + Grid_init(&argc, &argv); + + CartesianCommunicator::BarrierWorld(); + std::cout << GridLogMessage << " Clock skew check" < HMCWrapper; + // MD.name = std::string("Leap Frog"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("Force Gradient"); + //typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("MinimumNorm2"); + // TrajL = 2 + // 4/2 => 0.6 dH + // 3/3 => 0.8 dH .. depth 3, slower + //MD.MDsteps = 4; + MD.MDsteps = 3; + MD.trajL = 0.5; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 1077; + HMCparams.Trajectories = 1; + HMCparams.NoMetropolisUntil= 0; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + // HMCparams.StartingType =std::string("ColdStart"); + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_DDHMC_lat"; + CPparams.rng_prefix = "ckpoint_DDHMC_rng"; + CPparams.saveInterval = 1; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + std::cout << "loaded NERSC checpointer"< PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 12; + RealD M5 = 1.8; + RealD b = 1.5; + RealD c = 0.5; + Real beta = 2.13; + // Real light_mass = 5.4e-4; + Real light_mass = 7.8e-4; + Real light_mass_dir = 0.01; + Real strange_mass = 0.0362; + Real pv_mass = 1.0; + std::vector hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + // std::vector hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + // std::vector hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated + // std::vector hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass }); + + int SP_iters=9000; + + RationalActionParams OFRp; // Up/down + OFRp.lo = 6.0e-5; + OFRp.hi = 90.0; + OFRp.inv_pow = 2; + OFRp.MaxIter = SP_iters; // get most shifts by 2000, stop sharing space + OFRp.action_tolerance= 1.0e-8; + OFRp.action_degree = 18; + OFRp.md_tolerance= 1.0e-5; + OFRp.md_degree = 14; + // OFRp.degree = 20; converges + // OFRp.degree = 16; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + std::vector ActionTolByPole({ + 1.0e-7,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8 + }); + std::vector MDTolByPole({ + 1.6e-5,5.0e-6,1.0e-6,3.0e-7, // soften convergence more more + // 1.0e-6,3.0e-7,1.0e-7,1.0e-7, + // 3.0e-6,1.0e-6,1.0e-7,1.0e-7, // soften convergence + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8 + }); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + typedef SchurDiagMooeeOperator LinearOperatorEOFAF; + typedef SchurDiagMooeeOperator LinearOperatorEOFAD; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG_EOFA; + + //////////////////////////////////////////////////////////////// + // Domain decomposed + //////////////////////////////////////////////////////////////// + Coordinate latt4 = GridPtr->GlobalDimensions(); + Coordinate mpi = GridPtr->ProcessorGrid(); + Coordinate shm; + + GlobalSharedMemory::GetShmDims(mpi,shm); + + Coordinate CommDim(Nd); + for(int d=0;d1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + //Dirichlet[1] = 0; + //Dirichlet[2] = 0; + //Dirichlet[3] = 0; + + // + Coordinate Block4(Nd); + Block4[0] = Dirichlet[1]; + Block4[1] = Dirichlet[2]; + Block4[2] = Dirichlet[3]; + Block4[3] = Dirichlet[4]; + + int Width=4; + TheHMC.Resources.SetMomentumFilter(new DDHMCFilter(Block4,Width)); + + ////////////////////////// + // Fermion Grids + ////////////////////////// + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); + auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi); + auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF); + auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF); + auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeFieldD U(GridPtr); U=Zero(); + LatticeGaugeFieldF UF(GridPtrF); UF=Zero(); + + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file + TheHMC.initializeGaugeFieldAndRNGs(U); + std::cout << "loaded NERSC gauge field"< boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + FermionAction::ImplParams ParamsDir(boundary); + FermionActionF::ImplParams ParamsF(boundary); + FermionActionF::ImplParams ParamsDirF(boundary); + Params.dirichlet=NonDirichlet; + ParamsF.dirichlet=NonDirichlet; + ParamsDir.dirichlet=Dirichlet; + ParamsDirF.dirichlet=Dirichlet; + ParamsDir.partialDirichlet=1; + ParamsDirF.partialDirichlet=1; + std::cout << GridLogMessage<< "Partial Dirichlet depth is "< CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + ActionLevel Level1(1); + ActionLevel Level2(3); + ActionLevel Level3(15); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + + // Probably dominates the force - back to EOFA. + OneFlavourRationalParams SFRp; + SFRp.lo = 0.1; + SFRp.hi = 25.0; + SFRp.MaxIter = 10000; + SFRp.tolerance= 1.0e-8; + SFRp.mdtolerance= 2.0e-4; + SFRp.degree = 12; + SFRp.precision= 50; + + MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + ConjugateGradient ActionCG(StoppingCondition,MaxCGIterations); + ConjugateGradient DerivativeCG(MDStoppingCondition,MaxCGIterations); + LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L); + LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R); + LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF); + LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF); + + const int MX_inner = 1000; + MxPCG_EOFA ActionCGL(StoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA DerivativeCGL(MDStoppingConditionStrange, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA ActionCGR(StoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + MxPCG_EOFA DerivativeCGR(MDStoppingConditionStrange, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + ExactOneFlavourRatioPseudoFermionAction + EOFA(Strange_Op_L, Strange_Op_R, + ActionCG, + ActionCGL, ActionCGR, + DerivativeCGL, DerivativeCGR, + SFRp, true); + Level2.push_back(&EOFA); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + std::vector dirichlet_den; + std::vector dirichlet_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); dirichlet_den.push_back(0); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector NumeratorsF; + std::vector DenominatorsF; + std::vector *> Quotients; + std::vector ActionMPCG; + std::vector MPCG; + +#define MIXED_PRECISION +#ifdef MIXED_PRECISION + std::vector *> Bdys; +#else + std::vector *> Bdys; +#endif + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + std::vector LinOpD; + std::vector LinOpF; + + for(int h=0;h(*Numerators[h],*Denominators[h],MDCG,CG)); + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG)); + } else { +#ifdef MIXED_PRECISION + Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction( + *Numerators[h],*Denominators[h], + *NumeratorsF[h],*DenominatorsF[h], + OFRp, SP_iters) ); + Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction( + *Numerators[h],*Denominators[h], + *NumeratorsF[h],*DenominatorsF[h], + OFRp, SP_iters) ); +#else + Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); +#endif + } + } + for(int h=0;hSetTolerances(ActionTolByPole,MDTolByPole); + } + int nquo=Quotients.size(); + Level1.push_back(Bdys[0]); + Level1.push_back(Bdys[1]); + Level2.push_back(Quotients[0]); + for(int h=1;h +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +int main(int argc, char **argv) { + using namespace Grid; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + + // Typedefs to simplify notation + typedef WilsonImplR FermionImplPolicy; + typedef MobiusFermionD FermionAction; + typedef typename FermionAction::FermionField FermionField; + + typedef Grid::XmlReader Serialiser; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + IntegratorParameters MD; + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Leap Frog"); + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Force Gradient"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("MinimumNorm2"); + MD.MDsteps = 4; + MD.trajL = 1.0; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 17; + HMCparams.Trajectories = 200; + HMCparams.NoMetropolisUntil= 0; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + // HMCparams.StartingType =std::string("ColdStart"); + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_DDHMC_lat"; + CPparams.rng_prefix = "ckpoint_DDHMC_rng"; + CPparams.saveInterval = 1; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + // here there is too much indirection + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 16; + RealD M5 = 1.8; + RealD b = 1.0; + RealD c = 0.0; + Real beta = 2.13; + Real light_mass = 0.01; + Real strange_mass = 0.04; + Real pv_mass = 1.0; + std::vector hasenbusch({ light_mass, 0.04, 0.25, 0.4, 0.7 , pv_mass }); + + // FIXME: + // Same in MC and MD + // Need to mix precision too + OneFlavourRationalParams SFRp; + SFRp.lo = 4.0e-3; + SFRp.hi = 30.0; + SFRp.MaxIter = 10000; + SFRp.tolerance= 1.0e-8; + SFRp.mdtolerance= 1.0e-5; + SFRp.degree = 16; + SFRp.precision= 50; + SFRp.BoundsCheckFreq=5; + + OneFlavourRationalParams OFRp; + OFRp.lo = 1.0e-4; + OFRp.hi = 30.0; + OFRp.MaxIter = 10000; + OFRp.tolerance= 1.0e-8; + OFRp.mdtolerance= 1.0e-5; + OFRp.degree = 16; + OFRp.precision= 50; + OFRp.BoundsCheckFreq=5; + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + //////////////////////////////////////////////////////////////// + // Domain decomposed + //////////////////////////////////////////////////////////////// + Coordinate latt4 = GridPtr->GlobalDimensions(); + Coordinate mpi = GridPtr->ProcessorGrid(); + Coordinate shm; + + GlobalSharedMemory::GetShmDims(mpi,shm); + + Coordinate CommDim(Nd); + for(int d=0;d1 ? 1 : 0; + + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + + Coordinate Block4(Nd); + Block4[0] = Dirichlet[1]; + Block4[1] = Dirichlet[2]; + Block4[2] = Dirichlet[3]; + Block4[3] = Dirichlet[4]; + int Width=3; + TheHMC.Resources.SetMomentumFilter(new DDHMCFilter(Block4,Width)); + + ////////////////////////// + // Fermion Grid + ////////////////////////// + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + + // These lines are unecessary if BC are all periodic + std::vector boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + + double StoppingCondition = 1e-8; + double MDStoppingCondition = 1e-6; + double MaxCGIterations = 30000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + ActionLevel Level1(1); + ActionLevel Level2(4); + ActionLevel Level3(8); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + + FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + StrangeOpDir.DirichletBlock(Dirichlet); + StrangePauliVillarsOpDir.DirichletBlock(Dirichlet); + + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp); + Level1.push_back(&StrangePseudoFermionBdy); + Level2.push_back(&StrangePseudoFermionLocal); + Level1.push_back(&StrangePseudoFermionPVBdy); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + std::vector dirichlet_den; + std::vector dirichlet_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); dirichlet_den.push_back(0); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector *> Quotients; + std::vector *> Bdys; + + for(int h=0;h(*Numerators[h],*Denominators[h],MDCG,CG)); + } else { + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + } + if ( dirichlet_den[h]==1) Denominators[h]->DirichletBlock(Dirichlet); + if ( dirichlet_num[h]==1) Numerators[h]->DirichletBlock(Dirichlet); + } + + int nquo=Quotients.size(); + Level1.push_back(Bdys[0]); + Level1.push_back(Bdys[1]); + for(int h=0;h +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +int main(int argc, char **argv) { + using namespace Grid; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + + // Typedefs to simplify notation + typedef WilsonImplR FermionImplPolicy; + typedef MobiusFermionD FermionAction; + typedef typename FermionAction::FermionField FermionField; + + typedef Grid::XmlReader Serialiser; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + IntegratorParameters MD; + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Leap Frog"); + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Force Gradient"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("MinimumNorm2"); + MD.MDsteps = 6; + MD.trajL = 1.0; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 1077; + HMCparams.Trajectories = 1; + HMCparams.NoMetropolisUntil= 0; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + // HMCparams.StartingType =std::string("ColdStart"); + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_DDHMC_lat"; + CPparams.rng_prefix = "ckpoint_DDHMC_rng"; + CPparams.saveInterval = 1; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + // here there is too much indirection + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 12; + RealD M5 = 1.8; + RealD b = 1.5; + RealD c = 0.5; + // Real beta = 2.31; + // Real light_mass = 5.4e-4; + Real beta = 2.13; + Real light_mass = 7.8e-4; + Real strange_mass = 0.02132; + Real pv_mass = 1.0; + // std::vector hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + std::vector hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + + // FIXME: + // Same in MC and MD + // Need to mix precision too + OneFlavourRationalParams SFRp; // Strange + SFRp.lo = 4.0e-3; + SFRp.hi = 90.0; + SFRp.MaxIter = 60000; + SFRp.tolerance= 1.0e-8; + SFRp.mdtolerance= 1.0e-4; + SFRp.degree = 12; + SFRp.precision= 50; + SFRp.BoundsCheckFreq=0; + + OneFlavourRationalParams OFRp; // Up/down + OFRp.lo = 2.0e-5; + OFRp.hi = 90.0; + OFRp.MaxIter = 60000; + OFRp.tolerance= 1.0e-7; + OFRp.mdtolerance= 1.0e-4; + // OFRp.degree = 20; converges + // OFRp.degree = 16; + OFRp.degree = 12; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + //////////////////////////////////////////////////////////////// + // Domain decomposed + //////////////////////////////////////////////////////////////// + Coordinate latt4 = GridPtr->GlobalDimensions(); + Coordinate mpi = GridPtr->ProcessorGrid(); + Coordinate shm; + + GlobalSharedMemory::GetShmDims(mpi,shm); + + Coordinate CommDim(Nd); + for(int d=0;d1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + + Coordinate Block4(Nd); + // Block4[0] = Dirichlet[1]; + // Block4[1] = Dirichlet[2]; + // Block4[2] = Dirichlet[3]; + Block4[0] = 0; + Block4[1] = 0; + Block4[2] = 0; + Block4[3] = Dirichlet[4]; + + int Width=3; + TheHMC.Resources.SetMomentumFilter(new DDHMCFilter(Block4,Width)); + + ////////////////////////// + // Fermion Grid + ////////////////////////// + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file + TheHMC.initializeGaugeFieldAndRNGs(U); + + + // These lines are unecessary if BC are all periodic + std::vector boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + Params.dirichlet=NonDirichlet; + FermionAction::ImplParams ParamsDir(boundary); + ParamsDir.dirichlet=Dirichlet; + + // double StoppingCondition = 1e-14; + // double MDStoppingCondition = 1e-9; + double StoppingCondition = 1e-8; + double MDStoppingCondition = 1e-6; + double MaxCGIterations = 300000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + ActionLevel Level1(1); + ActionLevel Level2(4); + ActionLevel Level3(8); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + + FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir); + FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir); + + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp); + Level1.push_back(&StrangePseudoFermionBdy); + Level2.push_back(&StrangePseudoFermionLocal); + Level1.push_back(&StrangePseudoFermionPVBdy); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + std::vector dirichlet_den; + std::vector dirichlet_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); dirichlet_den.push_back(0); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector *> Quotients; + std::vector *> Bdys; + + for(int h=0;h(*Numerators[h],*Denominators[h],MDCG,CG)); + } else { + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + } + } + + int nquo=Quotients.size(); + Level1.push_back(Bdys[0]); + Level1.push_back(Bdys[1]); + for(int h=0;h SdagS(StrangeOp); + HighBoundCheck(SdagS,vec,SFRp.hi); + ChebyBoundsCheck(SdagS,vec,SFRp.lo,SFRp.hi); + std::cout << "Strange inversion"<Mass() < UdagU(*Denominators[0]); + HighBoundCheck(UdagU,vec,OFRp.hi); + ChebyBoundsCheck(UdagU,vec,OFRp.lo,OFRp.hi); + std::cout << "light inversion"< SddagSd(StrangeOpDir); + HighBoundCheck(SddagSd,vec,SFRp.hi); + ChebyBoundsCheck(SddagSd,vec,SFRp.lo,SFRp.hi); + std::cout << "strange dirichlet inversion"<Mass()< UddagUd(*Numerators[0]); + HighBoundCheck(UddagUd,vec,OFRp.hi); + ChebyBoundsCheck(UddagUd,vec,OFRp.lo,OFRp.hi); + std::cout << "light dirichlet inversion"< Cheby(bound,90.,order); + FunctionHermOp OpCheby(Cheby,UddagUd); + PlainHermOp Op (UddagUd); + ImplicitlyRestartedLanczos IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); + std::vector eval(Nm); + std::vector evec(Nm,rbgrid); + FermionField src(rbgrid);src = 1.0; + IRL.calc(eval,evec,src,Nconv); + + FermionField tmp(rbgrid); + FermionField ftmp(grid); + FermionField ftmp4(grid4); + for(int ev=0;ev Cheby(bound,90.,order); + FunctionHermOp OpCheby(Cheby,UdagU); + PlainHermOp Op (UdagU); + ImplicitlyRestartedLanczos IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); + std::vector eval(Nm); + std::vector evec(Nm,rbgrid); + FermionField src(rbgrid); src = 1.0; + IRL.calc(eval,evec,src,Nconv); + + FermionField tmp(rbgrid); + FermionField ftmp(grid); + FermionField ftmp4(grid4); + for(int e=0;e +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.) + { + /* Debugging instances of objects; references are stored + std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " < &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); + + // std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <_Mat)<_Mat)==&(LinOpD._Mat)); + + //////////////////////////////////////////////////////////////////////////////////// + // Must snarf a single precision copy of the gauge field in Linop_d argument + //////////////////////////////////////////////////////////////////////////////////// + typedef typename FermionOperatorF::GaugeField GaugeFieldF; + typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF; + typedef typename FermionOperatorD::GaugeField GaugeFieldD; + typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD; + + GridBase * GridPtrF = SinglePrecGrid4; + GridBase * GridPtrD = FermOpD.Umu.Grid(); + GaugeFieldF U_f (GridPtrF); + GaugeLinkFieldF Umu_f(GridPtrF); + // std::cout << " Dim gauge field "<Nd()<Nd()<(FermOpD.Umu, mu); + precisionChange(Umu_f,Umu_d); + PokeIndex(FermOpF.Umu, Umu_f, mu); + } + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// +#if 1 + RealD delta=1.e-4; + std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" < MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD); +#else + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); +#endif + MPCG(src,psi); + } + }; + +NAMESPACE_END(Grid); + + +int main(int argc, char **argv) { + using namespace Grid; + + Grid_init(&argc, &argv); + + CartesianCommunicator::BarrierWorld(); + std::cout << GridLogMessage << " Clock skew check" < HMCWrapper; + // MD.name = std::string("Leap Frog"); + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Force Gradient"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("MinimumNorm2"); + MD.MDsteps = 6; + MD.trajL = 1.0; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 1077; + HMCparams.Trajectories = 1; + HMCparams.NoMetropolisUntil= 0; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + // HMCparams.StartingType =std::string("ColdStart"); + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_DDHMC_lat"; + CPparams.rng_prefix = "ckpoint_DDHMC_rng"; + CPparams.saveInterval = 1; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + std::cout << "loaded NERSC checpointer"< PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 12; + RealD M5 = 1.8; + RealD b = 1.5; + RealD c = 0.5; + Real beta = 2.31; + // Real light_mass = 5.4e-4; + Real light_mass = 7.8e-4; + Real strange_mass = 0.02132; + Real pv_mass = 1.0; + // std::vector hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + std::vector hasenbusch({ light_mass, 5e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + + // FIXME: + // Same in MC and MD + // Need to mix precision too + OneFlavourRationalParams SFRp; // Strange + SFRp.lo = 4.0e-3; + SFRp.hi = 90.0; + SFRp.MaxIter = 60000; + SFRp.tolerance= 1.0e-8; + SFRp.mdtolerance= 1.0e-3; + SFRp.degree = 12; + SFRp.precision= 50; + SFRp.BoundsCheckFreq=0; + + OneFlavourRationalParams OFRp; // Up/down + OFRp.lo = 2.0e-5; + OFRp.hi = 90.0; + OFRp.MaxIter = 60000; + OFRp.tolerance= 1.0e-8; + OFRp.mdtolerance= 1.0e-3; + // OFRp.degree = 20; converges + // OFRp.degree = 16; + OFRp.degree = 12; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; + + //////////////////////////////////////////////////////////////// + // Domain decomposed + //////////////////////////////////////////////////////////////// + Coordinate latt4 = GridPtr->GlobalDimensions(); + Coordinate mpi = GridPtr->ProcessorGrid(); + Coordinate shm; + + GlobalSharedMemory::GetShmDims(mpi,shm); + + Coordinate CommDim(Nd); + for(int d=0;d1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + + Coordinate Block4(Nd); + Block4[0] = Dirichlet[1]; + Block4[1] = Dirichlet[2]; + Block4[2] = Dirichlet[3]; + Block4[3] = Dirichlet[4]; + + int Width=4; + TheHMC.Resources.SetMomentumFilter(new DDHMCFilter(Block4,Width)); + + ////////////////////////// + // Fermion Grids + ////////////////////////// + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); + auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi); + auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF); + auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF); + auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + LatticeGaugeFieldF UF(GridPtrF); + + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file + TheHMC.initializeGaugeFieldAndRNGs(U); + std::cout << "loaded NERSC gauge field"< boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + Params.dirichlet=NonDirichlet; + FermionAction::ImplParams ParamsDir(boundary); + ParamsDir.dirichlet=Dirichlet; + + // double StoppingCondition = 1e-14; + // double MDStoppingCondition = 1e-9; + double StoppingCondition = 1e-10; + double MDStoppingCondition = 1e-6; + double MDStoppingConditionLoose = 1e-6; + double MaxCGIterations = 300000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + ActionLevel Level1(1); + ActionLevel Level2(4); + ActionLevel Level3(8); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + + FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir); + FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir); + + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp); + Level1.push_back(&StrangePseudoFermionBdy); + Level2.push_back(&StrangePseudoFermionLocal); + Level1.push_back(&StrangePseudoFermionPVBdy); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + std::vector dirichlet_den; + std::vector dirichlet_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); dirichlet_den.push_back(0); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector DenominatorsF; + std::vector *> Quotients; + std::vector *> Bdys; + std::vector ActionMPCG; + std::vector MPCG; + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + std::vector LinOpD; + std::vector LinOpF; + + for(int h=0;h(*Numerators[h],*Denominators[h],MDCG,CG)); + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG)); + } else { + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + } + } + + int nquo=Quotients.size(); + Level1.push_back(Bdys[0]); + Level1.push_back(Bdys[1]); + for(int h=0;h +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.) + { + /* Debugging instances of objects; references are stored + std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " < &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); + + // std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <_Mat)<_Mat)==&(LinOpD._Mat)); + + //////////////////////////////////////////////////////////////////////////////////// + // Must snarf a single precision copy of the gauge field in Linop_d argument + //////////////////////////////////////////////////////////////////////////////////// + typedef typename FermionOperatorF::GaugeField GaugeFieldF; + typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF; + typedef typename FermionOperatorD::GaugeField GaugeFieldD; + typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD; + + GridBase * GridPtrF = SinglePrecGrid4; + GridBase * GridPtrD = FermOpD.Umu.Grid(); + GaugeFieldF U_f (GridPtrF); + GaugeLinkFieldF Umu_f(GridPtrF); + // std::cout << " Dim gauge field "<Nd()<Nd()<(FermOpD.Umu, mu); + precisionChange(Umu_f,Umu_d); + PokeIndex(FermOpF.Umu, Umu_f, mu); + } + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// +#if 1 + RealD delta=1.e-4; + std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" < MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD); +#else + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); +#endif + MPCG(src,psi); + } + }; + +NAMESPACE_END(Grid); + + +int main(int argc, char **argv) { + using namespace Grid; + + Grid_init(&argc, &argv); + + CartesianCommunicator::BarrierWorld(); + std::cout << GridLogMessage << " Clock skew check" < HMCWrapper; + // MD.name = std::string("Leap Frog"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("Force Gradient"); + //typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("MinimumNorm2"); + // TrajL = 2 + // 4/2 => 0.6 dH + // 3/3 => 0.8 dH .. depth 3, slower + //MD.MDsteps = 4; + MD.MDsteps = 12; + MD.trajL = 0.5; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 1077; + HMCparams.Trajectories = 1; + HMCparams.NoMetropolisUntil= 0; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + // HMCparams.StartingType =std::string("ColdStart"); + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_HMC_lat"; + CPparams.rng_prefix = "ckpoint_HMC_rng"; + CPparams.saveInterval = 1; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + std::cout << "loaded NERSC checpointer"< PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 12; + RealD M5 = 1.8; + RealD b = 1.5; + RealD c = 0.5; + Real beta = 2.13; + // Real light_mass = 5.4e-4; + Real light_mass = 7.8e-4; + Real strange_mass = 0.0362; + Real pv_mass = 1.0; + // std::vector hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + // std::vector hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + std::vector hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated + // std::vector hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass }); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + typedef SchurDiagMooeeOperator LinearOperatorEOFAF; + typedef SchurDiagMooeeOperator LinearOperatorEOFAD; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG_EOFA; + + //////////////////////////////////////////////////////////////// + // Domain decomposed + //////////////////////////////////////////////////////////////// + Coordinate latt4 = GridPtr->GlobalDimensions(); + Coordinate mpi = GridPtr->ProcessorGrid(); + Coordinate shm; + + GlobalSharedMemory::GetShmDims(mpi,shm); + + ////////////////////////// + // Fermion Grids + ////////////////////////// + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); + auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi); + auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF); + auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF); + auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeFieldD U(GridPtr); U=Zero(); + LatticeGaugeFieldF UF(GridPtrF); UF=Zero(); + + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file + TheHMC.initializeGaugeFieldAndRNGs(U); + std::cout << "loaded NERSC gauge field"< boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + FermionActionF::ImplParams ParamsF(boundary); + + // double StoppingCondition = 1e-14; + // double MDStoppingCondition = 1e-9; + double StoppingCondition = 1e-8; + double MDStoppingCondition = 1e-7; + double MDStoppingConditionLoose = 1e-7; + double MDStoppingConditionStrange = 1e-7; + double MaxCGIterations = 300000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + // ActionLevel Level1(1); + ActionLevel Level2(1); + ActionLevel Level3(15); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + + // Probably dominates the force - back to EOFA. + OneFlavourRationalParams SFRp; + SFRp.lo = 0.1; + SFRp.hi = 30.0; + SFRp.MaxIter = 10000; + SFRp.tolerance= 1.0e-8; + SFRp.mdtolerance= 2.0e-6; + SFRp.degree = 10; + SFRp.precision= 50; + + MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + ConjugateGradient ActionCG(StoppingCondition,MaxCGIterations); + ConjugateGradient DerivativeCG(MDStoppingCondition,MaxCGIterations); + LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L); + LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R); + LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF); + LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF); + + const int MX_inner = 1000; + MxPCG_EOFA ActionCGL(StoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA DerivativeCGL(MDStoppingConditionStrange, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_LF,Strange_Op_L, + Strange_LinOp_LF,Strange_LinOp_L); + + MxPCG_EOFA ActionCGR(StoppingCondition, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + MxPCG_EOFA DerivativeCGR(MDStoppingConditionStrange, + MX_inner, + MaxCGIterations, + GridPtrF, + FrbGridF, + Strange_Op_RF,Strange_Op_R, + Strange_LinOp_RF,Strange_LinOp_R); + + ExactOneFlavourRatioPseudoFermionAction + EOFA(Strange_Op_L, Strange_Op_R, + ActionCG, + ActionCGL, ActionCGR, + DerivativeCGL, DerivativeCGR, + SFRp, true); + Level2.push_back(&EOFA); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector NumeratorsF; + std::vector DenominatorsF; + std::vector *> Quotients; + std::vector ActionMPCG; + std::vector MPCG; + +#define MIXED_PRECISION +#ifdef MIXED_PRECISION + std::vector *> Bdys; +#else + std::vector *> Bdys; +#endif + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + std::vector LinOpD; + std::vector LinOpF; + + for(int h=0;h(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG)); + } + int nquo=Quotients.size(); + for(int h=0;h + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +int main(int argc, char **argv) +{ + using namespace Grid; + + Grid_init(&argc, &argv); + + Coordinate latt4 = GridDefaultLatt(); + Coordinate mpi = GridDefaultMpi(); + Coordinate simd = GridDefaultSimd(Nd,vComplexD::Nsimd()); + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4,simd,mpi); + + GridSerialRNG sRNG; sRNG.SeedUniqueString(std::string("The Serial RNG")); + GridParallelRNG pRNG(UGrid); pRNG.SeedUniqueString(std::string("The 4D RNG")); + + std::string rngfile("ckpoint_rng.0"); + NerscIO::writeRNGState(sRNG, pRNG, rngfile); + + Grid_finalize(); +} + + + diff --git a/README.md b/README.md index 4af52d78..29b99671 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ -# Grid [![Teamcity status](http://ci.cliath.ph.ed.ac.uk/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:GridBasedSoftware_Grid)),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=GridBasedSoftware_Grid&tab=projectOverview) - +# Grid **Data parallel C++ mathematical object library.** +[![Teamcity status](https://ci.dev.dirac.ed.ac.uk/guestAuth/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:GridBasedSoftware_Grid)),branch:default:true)/statusIcon.svg)](https://ci.dev.dirac.ed.ac.uk/project/GridBasedSoftware_Grid?mode=builds) + License: GPL v2. Last update June 2017. diff --git a/TODO b/TODO index e23e040d..750deb55 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,24 @@ +- - Slice sum optimisation & A2A - atomic addition +- - Also faster non-atomic reduction +- - Remaining PRs +- - DDHMC + - - MixedPrec is the action eval, high precision + - - MixedPrecCleanup is the force eval, low precision + +================= +================= +Lattice_basis.h -- > HIP and SYCL GPU code + + +====== +DDHMC +====== +-- Reliable Update CG - DONE +-- Multishift Mixed Precision - DONE +-- Pole dependent residual - DONE + + +======= -- comms threads issue?? -- Part done: Staggered kernel performance on GPU @@ -8,7 +29,7 @@ General - Make representations code take Gimpl - Simplify the HMCand remove modules - Lattice_arith - are the mult, mac etc.. still needed after ET engine? -- Lattice_rng - faster local only loop in init +- Lattice_rng - faster local only loop in init -- DDHMC - Audit: accelerate A2Autils -- off critical path for HMC ========================================================= diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 2f7acb56..2b1f6261 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -420,7 +420,6 @@ public: FGrid->Broadcast(0,&ncall,sizeof(ncall)); // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"< t_time(ncall); @@ -589,7 +588,6 @@ public: FGrid->Broadcast(0,&ncall,sizeof(ncall)); // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"< t_time(ncall); diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index ccffb564..00526893 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -217,10 +217,10 @@ int main (int argc, char ** argv) dbytes+= Grid.StencilSendToRecvFromBegin(requests, (void *)&xbuf[mu][0], - xmit_to_rank, + xmit_to_rank,1, (void *)&rbuf[mu][0], - recv_from_rank, - bytes,mu); + recv_from_rank,1, + bytes,bytes,mu); comm_proc = mpi_layout[mu]-1; @@ -228,10 +228,10 @@ int main (int argc, char ** argv) dbytes+= Grid.StencilSendToRecvFromBegin(requests, (void *)&xbuf[mu+4][0], - xmit_to_rank, + xmit_to_rank,1, (void *)&rbuf[mu+4][0], - recv_from_rank, - bytes,mu+4); + recv_from_rank,1, + bytes,bytes,mu+4); } } @@ -309,10 +309,10 @@ int main (int argc, char ** argv) dbytes+= Grid.StencilSendToRecvFromBegin(requests, (void *)&xbuf[mu][0], - xmit_to_rank, + xmit_to_rank,1, (void *)&rbuf[mu][0], - recv_from_rank, - bytes,mu); + recv_from_rank,1, + bytes,bytes,mu); Grid.StencilSendToRecvFromComplete(requests,mu); requests.resize(0); @@ -322,10 +322,10 @@ int main (int argc, char ** argv) dbytes+= Grid.StencilSendToRecvFromBegin(requests, (void *)&xbuf[mu+4][0], - xmit_to_rank, + xmit_to_rank,1, (void *)&rbuf[mu+4][0], - recv_from_rank, - bytes,mu+4); + recv_from_rank,1, + bytes,bytes,mu+4); Grid.StencilSendToRecvFromComplete(requests,mu+4); requests.resize(0); @@ -411,8 +411,8 @@ int main (int argc, char ** argv) Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); } int tid = omp_get_thread_num(); - tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, - (void *)&rbuf[dir][0], recv_from_rank, bytes,tid); + tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,1, + (void *)&rbuf[dir][0], recv_from_rank,1, bytes,tid); thread_critical { dbytes+=tbytes; } } diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 707f330c..55135322 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -167,7 +167,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <Barrier(); - Dw.ZeroCounters(); Dw.Dhop(src,result,0); std::cout<Barrier(); @@ -233,7 +230,6 @@ int main (int argc, char ** argv) exit(-1); } assert (norm2(err)< 1.0e-4 ); - Dw.Report(); } if (1) @@ -295,7 +291,7 @@ int main (int argc, char ** argv) // S-direction is INNERMOST and takes no part in the parity. std::cout << GridLogMessage<< "*********************************************************" <Barrier(); Dw.DhopEO(src_o,r_e,DaggerNo); double t0=usecond(); @@ -330,7 +325,6 @@ int main (int argc, char ** argv) std::cout< -struct scal { - d internal; +//////////////////////// +/// Move to domains //// +//////////////////////// + +Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT }; - Gamma::Algebra Gmu [] = { - Gamma::Algebra::GammaX, - Gamma::Algebra::GammaY, - Gamma::Algebra::GammaZ, - Gamma::Algebra::GammaT - }; - +void Benchmark(int Ls, Coordinate Dirichlet); int main (int argc, char ** argv) { @@ -52,39 +52,115 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); - Coordinate latt4 = GridDefaultLatt(); int Ls=16; - for(int i=0;i> Ls; } + } + ////////////////// + // With comms + ////////////////// + Coordinate Dirichlet(Nd+1,0); + + std::cout << "\n\n\n\n\n\n" <1 ? 1 : 0; + Dirichlet[0] = 0; + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + + Benchmark(Ls,Dirichlet); + + std::cout << "\n\n\n\n\n\n" <1 ? 1 : 0; + Dirichlet[0] = 0; + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3]; + + Benchmark(Ls,Dirichlet); + + Grid_finalize(); + exit(0); +} +void Benchmark(int Ls, Coordinate Dirichlet) +{ + Coordinate latt4 = GridDefaultLatt(); GridLogLayout(); long unsigned int single_site_flops = 8*Nc*(7+16*Nc); - - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); +#define SINGLE +#ifdef SINGLE + typedef vComplexF Simd; + typedef LatticeFermionF FermionField; + typedef LatticeGaugeFieldF GaugeField; + typedef LatticeColourMatrixF ColourMatrixField; + typedef DomainWallFermionF FermionAction; +#endif +#ifdef DOUBLE + typedef vComplexD Simd; + typedef LatticeFermionD FermionField; + typedef LatticeGaugeFieldD GaugeField; + typedef LatticeColourMatrixD ColourMatrixField; + typedef DomainWallFermionD FermionAction; +#endif +#ifdef DOUBLE2 + typedef vComplexD2 Simd; + typedef LatticeFermionD2 FermionField; + typedef LatticeGaugeFieldD2 GaugeField; + typedef LatticeColourMatrixD2 ColourMatrixField; + typedef DomainWallFermionD2 FermionAction; +#endif + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,Simd::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - std::cout << GridLogMessage << "Making s innermost grids"< seeds4({1,2,3,4}); - std::vector seeds5({5,6,7,8}); - std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl; GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG")); + std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG")); - std::cout << GridLogMessage << "Initialised RNGs" << std::endl; - LatticeFermionF src (FGrid); random(RNG5,src); + + FermionField src (FGrid); random(RNG5,src); #if 0 src = Zero(); { @@ -100,37 +176,38 @@ int main (int argc, char ** argv) src = src*N2; #endif - - LatticeFermionF result(FGrid); result=Zero(); - LatticeFermionF ref(FGrid); ref=Zero(); - LatticeFermionF tmp(FGrid); - LatticeFermionF err(FGrid); + FermionField result(FGrid); result=Zero(); + FermionField ref(FGrid); ref=Zero(); + FermionField tmp(FGrid); + FermionField err(FGrid); std::cout << GridLogMessage << "Drawing gauge field" << std::endl; - LatticeGaugeFieldF Umu(UGrid); + GaugeField Umu(UGrid); + GaugeField UmuCopy(UGrid); SU::HotConfiguration(RNG4,Umu); + UmuCopy=Umu; std::cout << GridLogMessage << "Random gauge initialised " << std::endl; -#if 0 - Umu=1.0; - for(int mu=0;mu(Umu,mu); - // if (mu !=2 ) ttmp = 0; - // ttmp = ttmp* pow(10.0,mu); - PokeIndex(Umu,ttmp,mu); - } - std::cout << GridLogMessage << "Forced to diagonal " << std::endl; -#endif + //////////////////////////////////// + // Apply BCs + //////////////////////////////////// + Coordinate Block(4); + for(int d=0;d<4;d++) Block[d]= Dirichlet[d+1]; + + std::cout << GridLogMessage << "Applying BCs for Dirichlet Block5 " << Dirichlet << std::endl; + std::cout << GridLogMessage << "Applying BCs for Dirichlet Block4 " << Block << std::endl; + + DirichletFilter Filter(Block); + Filter.applyFilter(Umu); + //////////////////////////////////// // Naive wilson implementation //////////////////////////////////// - // replicate across fifth dimension - // LatticeGaugeFieldF Umu5d(FGrid); - std::vector U(4,UGrid); + std::vector U(4,UGrid); for(int mu=0;mu(Umu,mu); } + std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl; if (1) @@ -177,10 +254,8 @@ int main (int argc, char ** argv) std::cout << GridLogMessage<< "*****************************************************************" <Barrier(); - Dw.ZeroCounters(); Dw.Dhop(src,result,0); std::cout<Barrier(); @@ -210,8 +287,8 @@ int main (int argc, char ** argv) double volume=Ls; for(int mu=0;mu1.0e-4) ) { - /* - std::cout << "RESULT\n " << result<1.0e-4) ) { std::cout<Barrier(); exit(-1); } - assert (norm2(err)< 1.0e-4 ); - Dw.Report(); + assert (n2e< 1.0e-4 ); } if (1) @@ -286,26 +353,27 @@ int main (int argc, char ** argv) } ref = -0.5*ref; } - // dump=1; - Dw.Dhop(src,result,1); + + Dw.Dhop(src,result,DaggerYes); + + std::cout << GridLogMessage << "----------------------------------------------------------------" << std::endl; std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl; + std::cout << GridLogMessage << "----------------------------------------------------------------" << std::endl; + std::cout<1.0e-4)){ -/* - std::cout<< "DAG RESULT\n " <Barrier(); Dw.DhopEO(src_o,r_e,DaggerNo); double t0=usecond(); for(int i=0;iBarrier(); @@ -352,7 +411,6 @@ int main (int argc, char ** argv) std::cout<1.0e-4)){ - /* - std::cout<< "Deo RESULT\n " < + Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#ifdef GRID_CUDA +#define CUDA_PROFILE +#endif + +#ifdef CUDA_PROFILE +#include +#endif + +using namespace std; +using namespace Grid; + +template +struct scal { + d internal; +}; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + int threads = GridThread::GetThreads(); + + Coordinate latt4 = GridDefaultLatt(); + int Ls=16; + for(int i=0;i> Ls; + } + + GridLogLayout(); + + long unsigned int single_site_flops = 8*Nc*(7+16*Nc); + + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::cout << GridLogMessage << "Making s innermost grids"< seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl; + GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG")); + std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; + GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG")); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; + + LatticeFermionF src (FGrid); random(RNG5,src); + LatticeFermionF src1 (FGrid); random(RNG5,src1); +#if 0 + src = Zero(); + { + Coordinate origin({0,0,0,latt4[2]-1,0}); + SpinColourVectorF tmp; + tmp=Zero(); + tmp()(0)(0)=Complex(-2.0,0.0); + std::cout << " source site 0 " << tmp<::HotConfiguration(RNG4,Umu); + std::cout << GridLogMessage << "Random gauge initialised " << std::endl; +#if 0 + Umu=1.0; + for(int mu=0;mu(Umu,mu); + // if (mu !=2 ) ttmp = 0; + // ttmp = ttmp* pow(10.0,mu); + PokeIndex(Umu,ttmp,mu); + } + std::cout << GridLogMessage << "Forced to diagonal " << std::endl; +#endif + + //////////////////////////////////// + // Naive wilson implementation + //////////////////////////////////// + // replicate across fifth dimension + // LatticeGaugeFieldF Umu5d(FGrid); + std::vector U(4,UGrid); + for(int mu=0;mu(Umu,mu); + } + std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl; + + if (1) + { + ref = Zero(); + for(int mu=0;muoSites();ss++){ + for(int s=0;soSites();ss++){ + for(int s=0;s_Nprocessors; + RealD NN = UGrid->NodeCount(); + + std::cout << GridLogMessage<< "*****************************************************************" <Barrier(); + Dw.Dhop(src,result,0); + std::cout<Barrier(); + + double volume=Ls; for(int mu=0;mu1.0e-4) ) { + + /* + std::cout << "RESULT\n " << result<Barrier(); + exit(-1); + } + assert (norm2(err)< 1.0e-4 ); + } + + if (1) + { // Naive wilson dag implementation + ref = Zero(); + for(int mu=0;muoSites();ss++){ + for(int s=0;soSites();ss++){ + for(int s=0;s1.0e-4)){ +/* + std::cout<< "DAG RESULT\n " <Barrier(); + Dw.DhopEO(src_o,r_e,DaggerNo); + double t0=usecond(); + for(int i=0;iBarrier(); + + double volume=Ls; for(int mu=0;mu1.0e-4)){ + /* + std::cout<< "Deo RESULT\n " < + Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#ifdef GRID_CUDA +#define CUDA_PROFILE +#endif + +#ifdef CUDA_PROFILE +#include +#endif + +using namespace std; +using namespace Grid; + +//////////////////////// +/// Move to domains //// +//////////////////////// + +Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT +}; + +void Benchmark(int Ls, Coordinate Dirichlet, int partial); + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + int threads = GridThread::GetThreads(); + + int Ls=8; + for(int i=0;i> Ls; + } + } + + ////////////////// + // With comms + ////////////////// + Coordinate Dirichlet(Nd+1,0); + + for(auto partial : {0}) { + std::cout << "\n\n\n\n\n\n" <1 ? 1 : 0; + // for(int d=0;d1 ? 1 : 0; + Dirichlet[0] = 0; + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3]; + + for(auto partial : {0,1}) { + std::cout << "\n\n\n\n\n\n" < seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); +#define SINGLE +#ifdef SINGLE + typedef vComplexF Simd; + typedef LatticeFermionF FermionField; + typedef LatticeGaugeFieldF GaugeField; + typedef LatticeColourMatrixF ColourMatrixField; + typedef DomainWallFermionF FermionAction; +#endif +#ifdef DOUBLE + typedef vComplexD Simd; + typedef LatticeFermionD FermionField; + typedef LatticeGaugeFieldD GaugeField; + typedef LatticeColourMatrixD ColourMatrixField; + typedef DomainWallFermionD FermionAction; +#endif +#ifdef DOUBLE2 + typedef vComplexD2 Simd; + typedef LatticeFermionD2 FermionField; + typedef LatticeGaugeFieldD2 GaugeField; + typedef LatticeColourMatrixD2 ColourMatrixField; + typedef DomainWallFermionD2 FermionAction; +#endif + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,Simd::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl; + GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG")); + + std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; + GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG")); + + + FermionField src (FGrid); random(RNG5,src); +#if 0 + src = Zero(); + { + Coordinate origin({0,0,0,latt4[2]-1,0}); + SpinColourVectorF tmp; + tmp=Zero(); + tmp()(0)(0)=Complex(-2.0,0.0); + std::cout << " source site 0 " << tmp<::HotConfiguration(RNG4,Umu); + UmuCopy=Umu; + UmuFull=Umu; + std::cout << GridLogMessage << "Random gauge initialised " << std::endl; + + //////////////////////////////////// + // Apply BCs + //////////////////////////////////// + Coordinate Block(4); + for(int d=0;d<4;d++) Block[d]= Dirichlet[d+1]; + + std::cout << GridLogMessage << "Applying BCs for Dirichlet Block5 " << Dirichlet << std::endl; + std::cout << GridLogMessage << "Applying BCs for Dirichlet Block4 " << Block << std::endl; + + DirichletFilter Filter(Block); + Filter.applyFilter(Umu); + if(!partial) Filter.applyFilter(UmuCopy); + + //////////////////////////////////// + // Naive wilson implementation + //////////////////////////////////// + std::vector U(4,UGrid); + std::vector Ucopy(4,UGrid); + for(int mu=0;mu(Umu,mu); + Ucopy[mu] = PeekIndex(UmuCopy,mu); + } + + std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl; + + if (1) + { + ref = Zero(); + for(int mu=0;muoSites();ss++){ + for(int s=0;s=Ls-depth)){ + tmp_v[Ls*ss+s] = Ucopy_v[ss]*tmp_v[Ls*ss+s]; + } else { + tmp_v[Ls*ss+s] = U_v[ss]*tmp_v[Ls*ss+s]; + } + } + } + } + ref=ref + tmp - Gamma(Gmu[mu])*tmp; + { + autoView( tmp_v , tmp , CpuWrite); + autoView( U_v , U[mu] , CpuRead); + autoView( Ucopy_v, Ucopy[mu] , CpuRead); + autoView( src_v, src , CpuRead); + for(int ss=0;ssoSites();ss++){ + for(int s=0;s=Ls-depth)){ + tmp_v[Ls*ss+s] = adj(Ucopy_v[ss])*src_v[Ls*ss+s]; + } else { + tmp_v[Ls*ss+s] = adj(U_v[ss])*src_v[Ls*ss+s]; + } + } + } + } + tmp =Cshift(tmp,mu+1,-1); + ref=ref + tmp + Gamma(Gmu[mu])*tmp; + } + ref = -0.5*ref; + } + + RealD mass=0.1; + RealD M5 =1.8; + + RealD NP = UGrid->_Nprocessors; + RealD NN = UGrid->NodeCount(); + + std::cout << GridLogMessage<< "*****************************************************************" <Barrier(); + Dw.Dhop(src,result,0); + std::cout<Barrier(); + + double volume=Ls; for(int mu=0;mu1.0e-4) ) { + std::cout<Barrier(); + + DumpSliceNorm("s-slice ref ",ref,1); + DumpSliceNorm("s-slice res ",result,1); + DumpSliceNorm("s-slice error ",err,1); + exit(-1); + } + assert (n2e< 1.0e-4 ); + } + + if (1) + { // Naive wilson dag implementation + + ref = Zero(); + for(int mu=0;muoSites();ss++){ + for(int s=0;s=Ls-depth)){ + tmp_v[Ls*ss+s] = Ucopy_v[ss]*tmp_v[Ls*ss+s]; + } else { + tmp_v[Ls*ss+s] = U_v[ss]*tmp_v[Ls*ss+s]; + } + } + } + } + ref=ref + tmp + Gamma(Gmu[mu])*tmp; + { + autoView( tmp_v , tmp , CpuWrite); + autoView( U_v , U[mu] , CpuRead); + autoView( Ucopy_v, Ucopy[mu] , CpuRead); + autoView( src_v, src , CpuRead); + for(int ss=0;ssoSites();ss++){ + for(int s=0;s=Ls-depth)){ + tmp_v[Ls*ss+s] = adj(Ucopy_v[ss])*src_v[Ls*ss+s]; + } else { + tmp_v[Ls*ss+s] = adj(U_v[ss])*src_v[Ls*ss+s]; + } + } + } + } + tmp =Cshift(tmp,mu+1,-1); + ref=ref + tmp - Gamma(Gmu[mu])*tmp; + } + ref = -0.5*ref; + } + + Dw.Dhop(src,result,DaggerYes); + + std::cout << GridLogMessage << "----------------------------------------------------------------" << std::endl; + std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl; + std::cout << GridLogMessage << "----------------------------------------------------------------" << std::endl; + + std::cout<Barrier(); + Dw.DhopEO(src_o,r_e,DaggerNo); + double t0=usecond(); + for(int i=0;iBarrier(); + + double volume=Ls; for(int mu=0;mu & latt4, int Ls, int threads,int report ) RealD M5 =1.8; RealD NP = UGrid->_Nprocessors; - DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionD Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); double t0=usecond(); Dw.Dhop(src,result,0); diff --git a/benchmarks/Benchmark_gparity.cc b/benchmarks/Benchmark_gparity.cc index ce84ecbc..421dd3cd 100644 --- a/benchmarks/Benchmark_gparity.cc +++ b/benchmarks/Benchmark_gparity.cc @@ -93,14 +93,11 @@ int main (int argc, char ** argv) int ncall =1000; if (1) { FGrid->Barrier(); - Dw.ZeroCounters(); Dw.Dhop(src,result,0); std::cout<Barrier(); @@ -114,7 +111,6 @@ int main (int argc, char ** argv) std::cout<Barrier(); @@ -157,7 +150,6 @@ int main (int argc, char ** argv) std::cout< diag = Dw.bs; Vector upper= Dw.cs; Vector lower= Dw.cs; @@ -103,35 +103,30 @@ int main (int argc, char ** argv) #define BENCH_DW(A,...) \ Dw. A (__VA_ARGS__); \ FGrid->Barrier(); \ - Dw.CayleyZeroCounters(); \ t0=usecond(); \ for(int i=0;iBarrier(); \ - Dw.CayleyReport(); \ std::cout<Barrier(); \ - zDw.CayleyZeroCounters(); \ t0=usecond(); \ for(int i=0;iBarrier(); \ - zDw.CayleyReport(); \ std::cout<Barrier(); \ - Dw.CayleyZeroCounters(); \ t0=usecond(); \ for(int i=0;iBarrier(); \ - Dw.CayleyReport(); \ std::cout< +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int Ls = 12; + Coordinate latt4 = GridDefaultLatt(); + + GridCartesian * UGridD = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGridD = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridD); + GridCartesian * FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD); + GridRedBlackCartesian * FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD); + + GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF); + GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF); + + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl; + GridParallelRNG RNG4(UGridD); RNG4.SeedFixedIntegers(seeds4); + std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; + GridParallelRNG RNG5(FGridD); RNG5.SeedFixedIntegers(seeds5); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; + + LatticeFermionD field_d(FGridD), tmp_d(FGridD); + random(RNG5,field_d); tmp_d = field_d; + + LatticeFermionD2 field_d2(FGridF), tmp_d2(FGridF); + precisionChange(field_d2, field_d); tmp_d2 = field_d2; + + LatticeFermionF field_f(FGridF), tmp_f(FGridF); + precisionChange(field_f, field_d); tmp_f = field_f; + + int N = 500; + + double time_ds = 0, time_sd = 0; + + std::cout<double original implementation (fields initially device-resident)" << std::endl; + for(int i=0;is " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl; + + + precisionChangeWorkspace wk_sp_to_dp(field_d.Grid(),field_f.Grid()); + precisionChangeWorkspace wk_dp_to_sp(field_f.Grid(),field_d.Grid()); + + std::cout<double with pregenerated workspace(fields initially device-resident)" << std::endl; + time_sd = time_ds = 0; + for(int i=0;is " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl; + + std::cout<double with workspace generated on-the-fly (fields initially device-resident)" << std::endl; + time_sd = time_ds = 0; + for(int i=0;is " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl; + + + std::cout<double2 (fields initially device-resident)" << std::endl; + time_sd = time_ds = 0; + for(int i=0;is " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl; + + + std::cout<double2 through standard precisionChange call(fields initially device-resident) [NB: perf should be the same as the previous test!]" << std::endl; + time_sd = time_ds = 0; + for(int i=0;is " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl; + + Grid_finalize(); +} diff --git a/benchmarks/Benchmark_staggered.cc b/benchmarks/Benchmark_staggered.cc index 34e1e470..a2be7f62 100644 --- a/benchmarks/Benchmark_staggered.cc +++ b/benchmarks/Benchmark_staggered.cc @@ -53,8 +53,8 @@ int main (int argc, char ** argv) pRNG.SeedFixedIntegers(seeds); // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); - typedef typename ImprovedStaggeredFermionR::FermionField FermionField; - typename ImprovedStaggeredFermionR::ImplParams params; + typedef typename ImprovedStaggeredFermionD::FermionField FermionField; + typename ImprovedStaggeredFermionD::ImplParams params; FermionField src (&Grid); random(pRNG,src); FermionField result(&Grid); result=Zero(); @@ -93,7 +93,7 @@ int main (int argc, char ** argv) RealD c1=9.0/8.0; RealD c2=-1.0/24.0; RealD u0=1.0; - ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params); + ImprovedStaggeredFermionD Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params); std::cout<()); - WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params); + WilsonFermionD Dw(Umu,Grid,RBGrid,mass,params); // Full operator bench_wilson(src,result,Dw,volume,DaggerNo); @@ -130,7 +130,7 @@ int main (int argc, char ** argv) void bench_wilson ( LatticeFermion & src, LatticeFermion & result, - WilsonFermionR & Dw, + WilsonFermionD & Dw, double const volume, int const dag ) { @@ -149,7 +149,7 @@ void bench_wilson ( void bench_wilson_eo ( LatticeFermion & src, LatticeFermion & result, - WilsonFermionR & Dw, + WilsonFermionD & Dw, double const volume, int const dag ) { diff --git a/configure.ac b/configure.ac index 9ab0595a..fedca3fe 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ -AC_PREREQ([2.63]) -AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid]) +AC_PREREQ([2.69]) +AC_INIT([Grid],[0.7.0],[https://github.com/paboyle/Grid],[Grid]) AC_CANONICAL_BUILD AC_CANONICAL_HOST AC_CANONICAL_TARGET @@ -20,7 +20,7 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) ################ Set flags # do not move! -AC_ARG_ENABLE([debug],[AC_HELP_STRING([--enable-debug=yes|no], [enable debug compilation ])], +AC_ARG_ENABLE([debug],[AS_HELP_STRING([--enable-debug=yes|no],[enable debug compilation ])], [ac_DEBUG=${enable_debug}], [ac_DEBUG=no]) case ${ac_DEBUG} in yes) @@ -114,7 +114,7 @@ AC_ARG_WITH([openssl], ############### lapack AC_ARG_ENABLE([lapack], - [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], + [AS_HELP_STRING([--enable-lapack=yes|no|prefix],[enable LAPACK])], [ac_LAPACK=${enable_lapack}], [ac_LAPACK=no]) case ${ac_LAPACK} in @@ -128,21 +128,41 @@ case ${ac_LAPACK} in AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; esac +############### tracing +AC_ARG_ENABLE([tracing], + [AS_HELP_STRING([--enable-tracing=none|nvtx|roctx|timer],[enable tracing])], + [ac_TRACING=${enable_tracing}], [ac_TRACING=none]) + +case ${ac_TRACING} in + nvtx) + AC_DEFINE([GRID_TRACING_NVTX],[1],[use NVTX]) + LIBS="${LIBS} -lnvToolsExt64_1" + ;; + roctx) + AC_DEFINE([GRID_TRACING_ROCTX],[1],[use ROCTX]) + LIBS="${LIBS} -lroctx64" + ;; + timer) + AC_DEFINE([GRID_TRACING_TIMER],[1],[use TIMER]);; + *) + AC_DEFINE([GRID_TRACING_NONE],[1],[no tracing]);; +esac + ############### fermions AC_ARG_ENABLE([fermion-reps], - [AC_HELP_STRING([--enable-fermion-reps=yes|no], [enable extra fermion representation support])], + [AS_HELP_STRING([--enable-fermion-reps=yes|no],[enable extra fermion representation support])], [ac_FERMION_REPS=${enable_fermion_reps}], [ac_FERMION_REPS=yes]) AM_CONDITIONAL(BUILD_FERMION_REPS, [ test "${ac_FERMION_REPS}X" == "yesX" ]) AC_ARG_ENABLE([gparity], - [AC_HELP_STRING([--enable-gparity=yes|no], [enable G-parity support])], + [AS_HELP_STRING([--enable-gparity=yes|no],[enable G-parity support])], [ac_GPARITY=${enable_gparity}], [ac_GPARITY=yes]) AM_CONDITIONAL(BUILD_GPARITY, [ test "${ac_GPARITY}X" == "yesX" ]) AC_ARG_ENABLE([zmobius], - [AC_HELP_STRING([--enable-zmobius=yes|no], [enable Zmobius support])], + [AS_HELP_STRING([--enable-zmobius=yes|no],[enable Zmobius support])], [ac_ZMOBIUS=${enable_zmobius}], [ac_ZMOBIUS=yes]) AM_CONDITIONAL(BUILD_ZMOBIUS, [ test "${ac_ZMOBIUS}X" == "yesX" ]) @@ -159,7 +179,7 @@ case ${ac_ZMOBIUS} in esac ############### Nc AC_ARG_ENABLE([Nc], - [AC_HELP_STRING([--enable-Nc=2|3|4|5], [enable number of colours])], + [AS_HELP_STRING([--enable-Nc=2|3|4|5],[enable number of colours])], [ac_Nc=${enable_Nc}], [ac_Nc=3]) case ${ac_Nc} in @@ -177,7 +197,7 @@ esac ############### FP16 conversions AC_ARG_ENABLE([sfw-fp16], - [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])], + [AS_HELP_STRING([--enable-sfw-fp16=yes|no],[enable software fp16 comms])], [ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes]) case ${ac_SFW_FP16} in yes) @@ -189,11 +209,11 @@ esac ############### Default to accelerator cshift, but revert to host if UCX is buggy or other reasons AC_ARG_ENABLE([accelerator-cshift], - [AC_HELP_STRING([--enable-accelerator-cshift=yes|no], [run cshift on the device])], + [AS_HELP_STRING([--enable-accelerator-cshift=yes|no],[run cshift on the device])], [ac_ACC_CSHIFT=${enable_accelerator_cshift}], [ac_ACC_CSHIFT=yes]) AC_ARG_ENABLE([ucx-buggy], - [AC_HELP_STRING([--enable-ucx-buggy=yes|no], [enable workaround for UCX device buffer bugs])], + [AS_HELP_STRING([--enable-ucx-buggy=yes|no],[enable workaround for UCX device buffer bugs])], [ac_UCXBUGGY=${enable_ucx_buggy}], [ac_UCXBUGGY=no]) case ${ac_UCXBUGGY} in @@ -211,7 +231,7 @@ esac ############### SYCL/CUDA/HIP/none AC_ARG_ENABLE([accelerator], - [AC_HELP_STRING([--enable-accelerator=cuda|sycl|hip|none], [enable none,cuda,sycl,hip acceleration])], + [AS_HELP_STRING([--enable-accelerator=cuda|sycl|hip|none],[enable none,cuda,sycl,hip acceleration])], [ac_ACCELERATOR=${enable_accelerator}], [ac_ACCELERATOR=none]) case ${ac_ACCELERATOR} in cuda) @@ -234,7 +254,7 @@ esac ############### UNIFIED MEMORY AC_ARG_ENABLE([unified], - [AC_HELP_STRING([--enable-unified=yes|no], [enable unified address space for accelerator loops])], + [AS_HELP_STRING([--enable-unified=yes|no],[enable unified address space for accelerator loops])], [ac_UNIFIED=${enable_unified}], [ac_UNIFIED=yes]) case ${ac_UNIFIED} in yes) @@ -248,10 +268,10 @@ esac ############### Intel libraries AC_ARG_ENABLE([mkl], - [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], + [AS_HELP_STRING([--enable-mkl=yes|no|prefix],[enable Intel MKL for LAPACK & FFTW])], [ac_MKL=${enable_mkl}], [ac_MKL=no]) AC_ARG_ENABLE([ipp], - [AC_HELP_STRING([--enable-ipp=yes|no|prefix], [enable Intel IPP for fast CRC32C])], + [AS_HELP_STRING([--enable-ipp=yes|no|prefix],[enable Intel IPP for fast CRC32C])], [ac_IPP=${enable_ipp}], [ac_IPP=no]) case ${ac_MKL} in @@ -349,8 +369,7 @@ CXXFLAGS=$CXXFLAGS_CPY LDFLAGS=$LDFLAGS_CPY ############### SIMD instruction selection -AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=code], - [select SIMD target (cf. README.md)])], [ac_SIMD=${enable_simd}], [ac_SIMD=GEN]) +AC_ARG_ENABLE([simd],[AS_HELP_STRING([--enable-simd=code],[select SIMD target (cf. README.md)])], [ac_SIMD=${enable_simd}], [ac_SIMD=GEN]) AC_ARG_ENABLE([gen-simd-width], [AS_HELP_STRING([--enable-gen-simd-width=size], @@ -394,11 +413,10 @@ case ${CXXTEST} in fi ;; hipcc) -# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr" CXXFLAGS="$CXXFLAGS -fno-strict-aliasing" CXXLD=${CXX} if test $ac_openmp = yes; then - CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp" + CXXFLAGS="$CXXFLAGS -fopenmp" fi ;; dpcpp) @@ -416,7 +434,13 @@ case ${ax_cv_cxx_compiler_vendor} in clang|gnu) case ${ac_SIMD} in GPU) - AC_DEFINE([GPU_VEC],[1],[GPU vectorised 512bit]) + AC_DEFINE([GPU_VEC],[1],[GPU vectorised]) + AC_DEFINE_UNQUOTED([GEN_SIMD_WIDTH],[$ac_gen_simd_width], + [generic SIMD vector width (in bytes)]) + SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" + SIMD_FLAGS='';; + GPU-RRII) + AC_DEFINE([GPU_RRII],[1],[GPU vectorised with RRRR / IIII layout]) AC_DEFINE_UNQUOTED([GEN_SIMD_WIDTH],[$ac_gen_simd_width], [generic SIMD vector width (in bytes)]) SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" @@ -485,6 +509,12 @@ case ${ax_cv_cxx_compiler_vendor} in GPU) AC_DEFINE([GPU_VEC],[1],[GPU vectorised ]) SIMD_FLAGS='';; + GPU-RRII) + AC_DEFINE([GPU_RRII],[1],[GPU vectorised with RRRR / IIII layout]) + AC_DEFINE_UNQUOTED([GEN_SIMD_WIDTH],[$ac_gen_simd_width], + [generic SIMD vector width (in bytes)]) + SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" + SIMD_FLAGS='';; SSE4) AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) SIMD_FLAGS='-msse4.2 -xsse4.2';; @@ -532,8 +562,7 @@ AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ######################################################### ###################### GRID ALLOCATOR ALIGNMENT ## ######################################################### -AC_ARG_ENABLE([alloc-align],[AC_HELP_STRING([--enable-alloc-align=2MB|4k], - [Alignment in bytes of GRID Allocator ])],[ac_ALLOC_ALIGN=${enable_alloc_align}],[ac_ALLOC_ALIGN=2MB]) +AC_ARG_ENABLE([alloc-align],[AS_HELP_STRING([--enable-alloc-align=2MB|4k],[Alignment in bytes of GRID Allocator ])],[ac_ALLOC_ALIGN=${enable_alloc_align}],[ac_ALLOC_ALIGN=2MB]) case ${ac_ALLOC_ALIGN} in 4k) AC_DEFINE([GRID_ALLOC_ALIGN],[(4096)],[GRID_ALLOC_ALIGN]);; @@ -542,8 +571,7 @@ case ${ac_ALLOC_ALIGN} in *);; esac -AC_ARG_ENABLE([alloc-cache],[AC_HELP_STRING([--enable-alloc-cache ], - [Cache a pool of recent "frees" to reuse])],[ac_ALLOC_CACHE=${enable_alloc_cache}],[ac_ALLOC_CACHE=yes]) +AC_ARG_ENABLE([alloc-cache],[AS_HELP_STRING([--enable-alloc-cache ],[Cache a pool of recent "frees" to reuse])],[ac_ALLOC_CACHE=${enable_alloc_cache}],[ac_ALLOC_CACHE=yes]) case ${ac_ALLOC_CACHE} in yes) AC_DEFINE([ALLOCATION_CACHE],[1],[ALLOCATION_CACHE]);; @@ -554,20 +582,21 @@ esac ######################################################### ###################### set GPU device to rank in node ## ######################################################### -AC_ARG_ENABLE([setdevice],[AC_HELP_STRING([--enable-setdevice | --disable-setdevice], - [Set GPU to rank in node with cudaSetDevice or similar])],[ac_SETDEVICE=${enable_SETDEVICE}],[ac_SETDEVICE=no]) +AC_ARG_ENABLE([setdevice],[AS_HELP_STRING([--enable-setdevice | --disable-setdevice],[Set GPU to rank in node with cudaSetDevice or similar])],[ac_SETDEVICE=${enable_SETDEVICE}],[ac_SETDEVICE=no]) case ${ac_SETDEVICE} in - yes);; - no) + yes) + echo ENABLE SET DEVICE + ;; + *) AC_DEFINE([GRID_DEFAULT_GPU],[1],[GRID_DEFAULT_GPU] ) + echo DISABLE SET DEVICE ;; esac ######################################################### ###################### Shared memory intranode ######### ######################################################### -AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone|nvlink|no], - [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=no]) +AC_ARG_ENABLE([shm],[AS_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone|nvlink|no|none],[Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=no]) case ${ac_SHM} in @@ -586,7 +615,7 @@ case ${ac_SHM} in AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) ;; - shmnone | no) + shmnone | no | none) AC_DEFINE([GRID_MPI3_SHM_NONE],[1],[GRID_MPI3_SHM_NONE] ) ;; @@ -604,25 +633,30 @@ case ${ac_SHM} in esac ###################### Shared base path for SHMMMAP -AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path], - [Select SHM mmap base path for hugetlbfs])], +AC_ARG_ENABLE([shmpath],[AS_HELP_STRING([--enable-shmpath=path],[Select SHM mmap base path for hugetlbfs])], [ac_SHMPATH=${enable_shmpath}], [ac_SHMPATH=/var/lib/hugetlbfs/global/pagesize-2MB/]) AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing]) ############### force MPI in SMP -AC_ARG_ENABLE([shm-force-mpi],[AC_HELP_STRING([--enable-shm-force-mpi], - [Force MPI within shared memory])],[ac_SHM_FORCE_MPI=${enable_shm_force_mpi}],[ac_SHM_FORCE_MPI=no]) +AC_ARG_ENABLE([shm-force-mpi],[AS_HELP_STRING([--enable-shm-force-mpi],[Force MPI within shared memory])],[ac_SHM_FORCE_MPI=${enable_shm_force_mpi}],[ac_SHM_FORCE_MPI=no]) case ${ac_SHM_FORCE_MPI} in yes) AC_DEFINE([GRID_SHM_FORCE_MPI],[1],[GRID_SHM_FORCE_MPI] ) ;; *) ;; esac +############### force MPI in SMP +AC_ARG_ENABLE([shm-fast-path],[AS_HELP_STRING([--enable-shm-fast-path],[Allow kernels to remote copy over intranode])],[ac_SHM_FAST_PATH=${enable_shm_fast_path}],[ac_SHM_FAST_PATH=no]) +case ${ac_SHM_FAST_PATH} in + yes) + AC_DEFINE([SHM_FAST_PATH],[1],[SHM_FAST_PATH] ) + ;; + *) ;; +esac ############### communication type selection -AC_ARG_ENABLE([comms-threads],[AC_HELP_STRING([--enable-comms-threads | --disable-comms-threads], - [Use multiple threads in MPI calls])],[ac_COMMS_THREADS=${enable_comms_threads}],[ac_COMMS_THREADS=yes]) +AC_ARG_ENABLE([comms-threads],[AS_HELP_STRING([--enable-comms-threads | --disable-comms-threads],[Use multiple threads in MPI calls])],[ac_COMMS_THREADS=${enable_comms_threads}],[ac_COMMS_THREADS=yes]) case ${ac_COMMS_THREADS} in yes) @@ -632,8 +666,7 @@ case ${ac_COMMS_THREADS} in esac ############### communication type selection -AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto], - [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) +AC_ARG_ENABLE([comms],[AS_HELP_STRING([--enable-comms=none|mpi|mpi-auto],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) case ${ac_COMMS} in @@ -667,8 +700,8 @@ AM_CONDITIONAL(BUILD_COMMS_MPI3, [ test "${comms_type}X" == "mpi3X" ] ) AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ]) ############### RNG selection -AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\ - [Select Random Number Generator to be used])],\ +AC_ARG_ENABLE([rng],[AS_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],[\ + Select Random Number Generator to be used])],\ [ac_RNG=${enable_rng}],[ac_RNG=sitmo]) case ${ac_RNG} in @@ -687,8 +720,8 @@ case ${ac_RNG} in esac ############### Timer option -AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\ - [Enable system dependent high res timers])],\ +AC_ARG_ENABLE([timers],[AS_HELP_STRING([--enable-timers],[\ + Enable system dependent high res timers])],\ [ac_TIMERS=${enable_timers}],[ac_TIMERS=yes]) case ${ac_TIMERS} in @@ -704,8 +737,7 @@ case ${ac_TIMERS} in esac ############### Chroma regression test -AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma], - [Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no) +AC_ARG_ENABLE([chroma],[AS_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no) case ${ac_CHROMA} in yes|no) diff --git a/examples/Example_Laplacian.cc b/examples/Example_Laplacian.cc index fa8466cf..587bc66b 100644 --- a/examples/Example_Laplacian.cc +++ b/examples/Example_Laplacian.cc @@ -93,14 +93,14 @@ template class FreeLaplacianStencil : public SparseMatrixBase StencilImpl; + typedef CartesianStencil StencilImpl; GridBase *grid; StencilImpl Stencil; SimpleCompressor Compressor; FreeLaplacianStencil(GridBase *_grid) - : Stencil (_grid,6,Even,directions,displacements,0), grid(_grid) + : Stencil (_grid,6,Even,directions,displacements,SimpleStencilParams()), grid(_grid) { }; virtual GridBase *Grid(void) { return grid; }; @@ -168,7 +168,8 @@ public: typedef iImplDoubledGaugeField SiteDoubledGaugeField; typedef Lattice DoubledGaugeField; - typedef CartesianStencil StencilImpl; + typedef CartesianStencil StencilImpl; + SimpleStencilParams p; GridBase *grid; StencilImpl Stencil; @@ -177,7 +178,7 @@ public: CovariantLaplacianStencil(GaugeField &Umu) : grid(Umu.Grid()), - Stencil (grid,6,Even,directions,displacements,0), + Stencil (grid,6,Even,directions,displacements,p), Uds(grid) { for (int mu = 0; mu < Nd; mu++) { @@ -324,7 +325,7 @@ int main(int argc, char ** argv) U_GT = U; // Make a random xform to teh gauge field - SU::RandomGaugeTransform(RNG,U_GT,g); // Unit gauge + SU::RandomGaugeTransform(RNG,U_GT,g); // Unit gauge Field in_GT(&Grid); Field out_GT(&Grid); diff --git a/examples/Example_Mobius_spectrum.cc b/examples/Example_Mobius_spectrum.cc index f4cd3335..b604eec4 100644 --- a/examples/Example_Mobius_spectrum.cc +++ b/examples/Example_Mobius_spectrum.cc @@ -253,7 +253,7 @@ int main (int argc, char ** argv) int nmass = masses.size(); - std::vector FermActs; + std::vector FermActs; std::cout< + +using namespace std; +using namespace Grid; + +RealD LLscale =1.0; +RealD LCscale =1.0; + +template class CovariantLaplacianCshift : public SparseMatrixBase +{ +public: + INHERIT_GIMPL_TYPES(Gimpl); + + GridBase *grid; + GaugeField U; + + CovariantLaplacianCshift(GaugeField &_U) : + grid(_U.Grid()), + U(_U) { }; + + virtual GridBase *Grid(void) { return grid; }; + + virtual void M (const Field &in, Field &out) + { + out=Zero(); + for(int mu=0;mu(U, mu); // NB: Inefficent + out = out - Gimpl::CovShiftForward(Umu,mu,in); + out = out - Gimpl::CovShiftBackward(Umu,mu,in); + out = out + 2.0*in; + } + }; + virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian + virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid + virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid + virtual void MdirAll (const Field &in, std::vector &out) {assert(0);}; // Unimplemented need only for multigrid +}; + +void MakePhase(Coordinate mom,LatticeComplex &phase) +{ + GridBase *grid = phase.Grid(); + auto latt_size = grid->GlobalDimensions(); + ComplexD ci(0.0,1.0); + phase=Zero(); + + LatticeComplex coor(phase.Grid()); + for(int mu=0;mu +void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared) +{ + typedef CovariantLaplacianCshift Laplacian_t; + Laplacian_t Laplacian(U); + + Integer Iterations = 40; + Real width = 2.0; + Real coeff = (width*width) / Real(4*Iterations); + + Field tmp(U.Grid()); + smeared=unsmeared; + // chi = (1-p^2/2N)^N kronecker + for(int n = 0; n < Iterations; ++n) { + Laplacian.M(smeared,tmp); + smeared = smeared - coeff*tmp; + std::cout << " smear iter " << n<<" " < +void MasslessFreePropagator(Action &D,LatticePropagator &source,LatticePropagator &propagator) +{ + GridBase *UGrid = source.Grid(); + GridBase *FGrid = D.FermionGrid(); + bool fiveD = true; //calculate 5d free propagator + RealD mass = D.Mass(); + LatticeFermion src4 (UGrid); + LatticeFermion result4 (UGrid); + LatticeFermion result5(FGrid); + LatticeFermion src5(FGrid); + LatticePropagator prop5(FGrid); + for(int s=0;s(src4,source,s,c); + + D.ImportPhysicalFermionSource(src4,src5); + D.FreePropagator(src5,result5,mass,true); + std::cout<(prop5,result5,s,c); + FermToProp(propagator,result4,s,c); + } + } + + LatticePropagator Vector_mu(UGrid); + LatticeComplex VV (UGrid); + std::vector sumVV; + Gamma::Algebra GammaV[3] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ + }; + for( int mu=0;mu<3;mu++ ) { + Gamma gV(GammaV[mu]); + D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu); + VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current + sliceSum(VV,sumVV,Tdir); + int Nt = sumVV.size(); + for(int t=0;t +void MasslessFreePropagator1(Action &D,LatticePropagator &source,LatticePropagator &propagator) +{ + bool fiveD = false; //calculate 4d free propagator + RealD mass = D.Mass(); + GridBase *UGrid = source.Grid(); + LatticeFermion src4 (UGrid); + LatticeFermion result4 (UGrid); + for(int s=0;s(src4,source,s,c); + D.FreePropagator(src4,result4,mass,false); + FermToProp(propagator,result4,s,c); + } + } +} + +template +void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator) +{ + GridBase *UGrid = D.GaugeGrid(); + GridBase *FGrid = D.FermionGrid(); + + LatticeFermion src4 (UGrid); + LatticeFermion src5 (FGrid); + LatticeFermion result5(FGrid); + LatticeFermion result4(UGrid); + LatticePropagator prop5(FGrid); + + ConjugateGradient CG(1.0e-7,100000); + SchurRedBlackDiagMooeeSolve schur(CG); + ZeroGuesser ZG; // Could be a DeflatedGuesser if have eigenvectors + for(int s=0;s(src4,source,s,c); + + D.ImportPhysicalFermionSource(src4,src5); + + result5=Zero(); + schur(D,src5,result5,ZG); + std::cout<(prop5,result5,s,c); + FermToProp(propagator,result4,s,c); + } + } + LatticePropagator Axial_mu(UGrid); + LatticePropagator Vector_mu(UGrid); + + LatticeComplex PA (UGrid); + LatticeComplex VV (UGrid); + LatticeComplex PJ5q(UGrid); + LatticeComplex PP (UGrid); + + std::vector sumPA; + std::vector sumVV; + std::vector sumPP; + std::vector sumPJ5q; + + Gamma g5(Gamma::Algebra::Gamma5); + D.ContractConservedCurrent(prop5,prop5,Axial_mu,source,Current::Axial,Tdir); + PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current + sliceSum(PA,sumPA,Tdir); + + int Nt{static_cast(sumPA.size())}; + + for(int t=0;t >, data); +}; + +void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase) +{ + const int nchannel=4; + Gamma::Algebra Gammas[nchannel][2] = { + {Gamma::Algebra::GammaXGamma5,Gamma::Algebra::GammaXGamma5}, + {Gamma::Algebra::GammaYGamma5,Gamma::Algebra::GammaYGamma5}, + {Gamma::Algebra::GammaZGamma5,Gamma::Algebra::GammaZGamma5}, + {Gamma::Algebra::Identity,Gamma::Algebra::Identity} + }; + + LatticeComplex meson_CF(q1.Grid()); + MesonFile MF; + + for(int ch=0;ch meson_T; + sliceSum(meson_CF,meson_T, Tdir); + + int nt=meson_T.size(); + + std::vector corr(nt); + for(int t=0;t seeds4({1,2,3,4}); + // GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); + std::string config; + RealD M5=atof(getenv("M5")); + RealD mq = atof(getenv("mass")); + int point_x = atoi(getenv("point_x")); + int point_y = atoi(getenv("point_y")); + int point_z = atoi(getenv("point_z")); + int point_t = atoi(getenv("point_t")); + std::vector masses({ mq} ); // u/d, s, c ?? + if( argc > 1 && argv[1][0] != '-' ) + { + std::cout< FermActs; + + std::cout< boundary = {1,1,1,-1}; + FermionActionD::ImplParams Params(boundary); + RealD b=1.5; + RealD c=0.5; + FermActs.push_back(new FermionActionD(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c)); + } + + LatticePropagator point_source(UGrid); + + Coordinate Origin({point_x,point_y,point_z,point_t}); + PointSource (Origin,point_source); + + std::vector PointProps(nmass,UGrid); + + for(int m=0;m + +using namespace std; +using namespace Grid; + +RealD LLscale =1.0; +RealD LCscale =1.0; + +template class CovariantLaplacianCshift : public SparseMatrixBase +{ +public: + INHERIT_GIMPL_TYPES(Gimpl); + + GridBase *grid; + GaugeField U; + + CovariantLaplacianCshift(GaugeField &_U) : + grid(_U.Grid()), + U(_U) { }; + + virtual GridBase *Grid(void) { return grid; }; + + virtual void M (const Field &in, Field &out) + { + out=Zero(); + for(int mu=0;mu(U, mu); // NB: Inefficent + out = out - Gimpl::CovShiftForward(Umu,mu,in); + out = out - Gimpl::CovShiftBackward(Umu,mu,in); + out = out + 2.0*in; + } + }; + virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian + virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid + virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid + virtual void MdirAll (const Field &in, std::vector &out) {assert(0);}; // Unimplemented need only for multigrid +}; + +void MakePhase(Coordinate mom,LatticeComplex &phase) +{ + GridBase *grid = phase.Grid(); + auto latt_size = grid->GlobalDimensions(); + ComplexD ci(0.0,1.0); + phase=Zero(); + + LatticeComplex coor(phase.Grid()); + for(int mu=0;mu +void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared) +{ + typedef CovariantLaplacianCshift Laplacian_t; + Laplacian_t Laplacian(U); + + Integer Iterations = 40; + Real width = 2.0; + Real coeff = (width*width) / Real(4*Iterations); + + Field tmp(U.Grid()); + smeared=unsmeared; + // chi = (1-p^2/2N)^N kronecker + for(int n = 0; n < Iterations; ++n) { + Laplacian.M(smeared,tmp); + smeared = smeared - coeff*tmp; + std::cout << " smear iter " << n<<" " < +void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator) +{ + GridBase *UGrid = D.GaugeGrid(); + GridBase *FGrid = D.FermionGrid(); + + LatticeFermion src4 (UGrid); + LatticeFermion src5 (FGrid); + LatticeFermion result5(FGrid); + LatticeFermion result4(UGrid); + LatticePropagator prop5(FGrid); + + ConjugateGradient CG(1.0e-8,100000); + SchurRedBlackDiagMooeeSolve schur(CG); + ZeroGuesser ZG; // Could be a DeflatedGuesser if have eigenvectors + for(int s=0;s(src4,source,s,c); + + D.ImportPhysicalFermionSource(src4,src5); + + result5=Zero(); + schur(D,src5,result5,ZG); + std::cout<(prop5,result5,s,c); + FermToProp(propagator,result4,s,c); + } + } + LatticePropagator Axial_mu(UGrid); + LatticePropagator Vector_mu(UGrid); + + LatticeComplex PA (UGrid); + LatticeComplex VV (UGrid); + LatticeComplex PJ5q(UGrid); + LatticeComplex PP (UGrid); + + std::vector sumPA; + std::vector sumVV; + std::vector sumPP; + std::vector sumPJ5q; + + Gamma g5(Gamma::Algebra::Gamma5); + D.ContractConservedCurrent(prop5,prop5,Axial_mu,source,Current::Axial,Tdir); + PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current + sliceSum(PA,sumPA,Tdir); + + int Nt{static_cast(sumPA.size())}; + + for(int t=0;t >, data); +}; + +void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase) +{ + const int nchannel=3; + Gamma::Algebra Gammas[nchannel][2] = { + {Gamma::Algebra::GammaX,Gamma::Algebra::GammaX}, + {Gamma::Algebra::GammaY,Gamma::Algebra::GammaY}, + {Gamma::Algebra::GammaZ,Gamma::Algebra::GammaZ} + }; + + Gamma G5(Gamma::Algebra::Gamma5); + + LatticeComplex meson_CF(q1.Grid()); + MesonFile MF; + + for(int ch=0;ch meson_T; + sliceSum(meson_CF,meson_T, Tdir); + + int nt=meson_T.size(); + + std::vector corr(nt); + for(int t=0;t seeds4({1,2,3,4}); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); + std::string config; + RealD M5=1.8; + if( argc > 1 && argv[1][0] != '-' ) + { + std::cout<::ColdConfiguration(Umu); + config="ColdConfig"; + // RealD P=1.0; // Don't scale + RealD P=0.5871119; // 48I + // RealD P=0.6153342; // 64I + // RealD P=0.6388238 // 32Ifine + RealD u0 = sqrt(sqrt(P)); + RealD M5mf = M5 - 4.0*(1.0-u0); + RealD w0 = 1.0 - M5mf; +#if 0 + // M5=1.8 with U=u0 + Umu = Umu * u0; + LLscale = 1.0; + LCscale = 1.0; + std::cout< PointProps(nmass,UGrid); + // std::vector GaussProps(nmass,UGrid); + // std::vector Z2Props (nmass,UGrid); + + for(int m=0;m + +using namespace std; +using namespace Grid; + +RealD LLscale =1.0; +RealD LCscale =1.0; + +template class CovariantLaplacianCshift : public SparseMatrixBase +{ +public: + INHERIT_GIMPL_TYPES(Gimpl); + + GridBase *grid; + GaugeField U; + + CovariantLaplacianCshift(GaugeField &_U) : + grid(_U.Grid()), + U(_U) { }; + + virtual GridBase *Grid(void) { return grid; }; + + virtual void M (const Field &in, Field &out) + { + out=Zero(); + for(int mu=0;mu(U, mu); // NB: Inefficent + out = out - Gimpl::CovShiftForward(Umu,mu,in); + out = out - Gimpl::CovShiftBackward(Umu,mu,in); + out = out + 2.0*in; + } + }; + virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian + virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid + virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid + virtual void MdirAll (const Field &in, std::vector &out) {assert(0);}; // Unimplemented need only for multigrid +}; + +void MakePhase(Coordinate mom,LatticeComplex &phase) +{ + GridBase *grid = phase.Grid(); + auto latt_size = grid->GlobalDimensions(); + ComplexD ci(0.0,1.0); + phase=Zero(); + + LatticeComplex coor(phase.Grid()); + for(int mu=0;mu +void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared) +{ + typedef CovariantLaplacianCshift Laplacian_t; + Laplacian_t Laplacian(U); + + Integer Iterations = 40; + Real width = 2.0; + Real coeff = (width*width) / Real(4*Iterations); + + Field tmp(U.Grid()); + smeared=unsmeared; + // chi = (1-p^2/2N)^N kronecker + for(int n = 0; n < Iterations; ++n) { + Laplacian.M(smeared,tmp); + smeared = smeared - coeff*tmp; + std::cout << " smear iter " << n<<" " < +void MasslessFreePropagator(Action &D,LatticePropagator &source,LatticePropagator &propagator) +{ + GridBase *UGrid = source.Grid(); + GridBase *FGrid = D.FermionGrid(); + bool fiveD = true; //calculate 5d free propagator + RealD mass = D.Mass(); + LatticeFermion src4 (UGrid); + LatticeFermion result4 (UGrid); + LatticeFermion result5(FGrid); + LatticeFermion src5(FGrid); + LatticePropagator prop5(FGrid); + for(int s=0;s(src4,source,s,c); + + D.ImportPhysicalFermionSource(src4,src5); + D.FreePropagator(src5,result5,mass,true); + std::cout<(prop5,result5,s,c); + FermToProp(propagator,result4,s,c); + } + } + + LatticePropagator Vector_mu(UGrid); + LatticeComplex VV (UGrid); + std::vector sumVV; + Gamma::Algebra GammaV[3] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ + }; + for( int mu=0;mu<3;mu++ ) { + Gamma gV(GammaV[mu]); + D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu); + VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current + sliceSum(VV,sumVV,Tdir); + int Nt = sumVV.size(); + for(int t=0;t +void MasslessFreePropagator1(Action &D,LatticePropagator &source,LatticePropagator &propagator) +{ + bool fiveD = false; //calculate 4d free propagator + RealD mass = D.Mass(); + GridBase *UGrid = source.Grid(); + LatticeFermion src4 (UGrid); + LatticeFermion result4 (UGrid); + for(int s=0;s(src4,source,s,c); + D.FreePropagator(src4,result4,mass,false); + FermToProp(propagator,result4,s,c); + } + } +} + +template +void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator) +{ + GridBase *UGrid = D.GaugeGrid(); + GridBase *FGrid = D.FermionGrid(); + + LatticeFermion src4 (UGrid); + LatticeFermion src5 (FGrid); + LatticeFermion result5(FGrid); + LatticeFermion result4(UGrid); + LatticePropagator prop5(FGrid); + + ConjugateGradient CG(1.0e-10,100000); + SchurRedBlackDiagMooeeSolve schur(CG); + ZeroGuesser ZG; // Could be a DeflatedGuesser if have eigenvectors + for(int s=0;s(src4,source,s,c); + + D.ImportPhysicalFermionSource(src4,src5); + + result5=Zero(); + schur(D,src5,result5,ZG); + std::cout<(prop5,result5,s,c); + FermToProp(propagator,result4,s,c); + } + } + LatticePropagator Axial_mu(UGrid); + LatticePropagator Vector_mu(UGrid); + + LatticeComplex PA (UGrid); + LatticeComplex VV (UGrid); + LatticeComplex PJ5q(UGrid); + LatticeComplex PP (UGrid); + + std::vector sumPA; + std::vector sumVV; + std::vector sumPP; + std::vector sumPJ5q; + + Gamma g5(Gamma::Algebra::Gamma5); + D.ContractConservedCurrent(prop5,prop5,Axial_mu,source,Current::Axial,Tdir); + PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current + sliceSum(PA,sumPA,Tdir); + + int Nt{static_cast(sumPA.size())}; + + for(int t=0;t >, data); +}; + +void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase) +{ + const int nchannel=4; + Gamma::Algebra Gammas[nchannel][2] = { + {Gamma::Algebra::GammaXGamma5,Gamma::Algebra::GammaXGamma5}, + {Gamma::Algebra::GammaYGamma5,Gamma::Algebra::GammaYGamma5}, + {Gamma::Algebra::GammaZGamma5,Gamma::Algebra::GammaZGamma5}, + {Gamma::Algebra::Identity,Gamma::Algebra::Identity} + }; + + LatticeComplex meson_CF(q1.Grid()); + MesonFile MF; + + for(int ch=0;ch meson_T; + sliceSum(meson_CF,meson_T, Tdir); + + int nt=meson_T.size(); + + std::vector corr(nt); + for(int t=0;t seeds4({1,2,3,4}); + // GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); + std::string config; + RealD M5=atof(getenv("M5")); + RealD mq = atof(getenv("mass")); + int tadpole = atof(getenv("tadpole")); + std::vector masses({ mq} ); // u/d, s, c ?? + if( argc > 1 && argv[1][0] != '-' ) + { + std::cout<::ColdConfiguration(Umu); + config="ColdConfig"; + // RealD P=1.0; // Don't scale + // RealD P=0.6388238 // 32Ifine + // RealD P=0.6153342; // 64I + RealD P=0.5871119; // 48I + RealD u0 = sqrt(sqrt(P)); + RealD w0 = 1 - M5; + std::cout< boundary = {1,1,1,-1}; + FermionActionD::ImplParams Params(boundary); + RealD b=1.5; + RealD c=0.5; + std::cout< PointProps(nmass,UGrid); + // std::vector FreeProps(nmass,UGrid); + // LatticePropagator delta(UGrid); + + for(int m=0;m + +using namespace std; +using namespace Grid; + +RealD LLscale =1.0; +RealD LCscale =1.0; + +template class CovariantLaplacianCshift : public SparseMatrixBase +{ +public: + INHERIT_GIMPL_TYPES(Gimpl); + + GridBase *grid; + GaugeField U; + + CovariantLaplacianCshift(GaugeField &_U) : + grid(_U.Grid()), + U(_U) { }; + + virtual GridBase *Grid(void) { return grid; }; + + virtual void M (const Field &in, Field &out) + { + out=Zero(); + for(int mu=0;mu(U, mu); // NB: Inefficent + out = out - Gimpl::CovShiftForward(Umu,mu,in); + out = out - Gimpl::CovShiftBackward(Umu,mu,in); + out = out + 2.0*in; + } + }; + virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian + virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid + virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid + virtual void MdirAll (const Field &in, std::vector &out) {assert(0);}; // Unimplemented need only for multigrid +}; + +void MakePhase(Coordinate mom,LatticeComplex &phase) +{ + GridBase *grid = phase.Grid(); + auto latt_size = grid->GlobalDimensions(); + ComplexD ci(0.0,1.0); + phase=Zero(); + + LatticeComplex coor(phase.Grid()); + for(int mu=0;mu +void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared) +{ + typedef CovariantLaplacianCshift Laplacian_t; + Laplacian_t Laplacian(U); + + Integer Iterations = 40; + Real width = 2.0; + Real coeff = (width*width) / Real(4*Iterations); + + Field tmp(U.Grid()); + smeared=unsmeared; + // chi = (1-p^2/2N)^N kronecker + for(int n = 0; n < Iterations; ++n) { + Laplacian.M(smeared,tmp); + smeared = smeared - coeff*tmp; + std::cout << " smear iter " << n<<" " < +void MasslessFreePropagator(Action &D,LatticePropagator &source,LatticePropagator &propagator) +{ + GridBase *UGrid = source.Grid(); + GridBase *FGrid = D.FermionGrid(); + bool fiveD = true; //calculate 4d free propagator + RealD mass = D.Mass(); + LatticeFermion src4 (UGrid); + LatticeFermion result4 (UGrid); + LatticeFermion result5(FGrid); + LatticeFermion src5(FGrid); + LatticePropagator prop5(FGrid); + for(int s=0;s(src4,source,s,c); + + D.ImportPhysicalFermionSource(src4,src5); + D.FreePropagator(src5,result5,mass,true); + std::cout<(prop5,result5,s,c); + FermToProp(propagator,result4,s,c); + } + } + + LatticePropagator Vector_mu(UGrid); + LatticeComplex VV (UGrid); + std::vector sumVV; + Gamma::Algebra GammaV[3] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ + }; + for( int mu=0;mu<3;mu++ ) { + Gamma gV(GammaV[mu]); + D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu); + VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current + sliceSum(VV,sumVV,Tdir); + int Nt = sumVV.size(); + for(int t=0;t +void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator) +{ + GridBase *UGrid = D.GaugeGrid(); + GridBase *FGrid = D.FermionGrid(); + + LatticeFermion src4 (UGrid); + LatticeFermion src5 (FGrid); + LatticeFermion result5(FGrid); + LatticeFermion result4(UGrid); + LatticePropagator prop5(FGrid); + + ConjugateGradient CG(1.0e-6,100000); + SchurRedBlackDiagMooeeSolve schur(CG); + ZeroGuesser ZG; // Could be a DeflatedGuesser if have eigenvectors + for(int s=0;s(src4,source,s,c); + + D.ImportPhysicalFermionSource(src4,src5); + + result5=Zero(); + schur(D,src5,result5,ZG); + std::cout<(prop5,result5,s,c); + FermToProp(propagator,result4,s,c); + } + } + LatticePropagator Axial_mu(UGrid); + LatticePropagator Vector_mu(UGrid); + + LatticeComplex PA (UGrid); + LatticeComplex VV (UGrid); + LatticeComplex PJ5q(UGrid); + LatticeComplex PP (UGrid); + + std::vector sumPA; + std::vector sumVV; + std::vector sumPP; + std::vector sumPJ5q; + + Gamma g5(Gamma::Algebra::Gamma5); + D.ContractConservedCurrent(prop5,prop5,Axial_mu,source,Current::Axial,Tdir); + PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current + sliceSum(PA,sumPA,Tdir); + + int Nt{static_cast(sumPA.size())}; + + for(int t=0;t >, data); +}; + +void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase) +{ + const int nchannel=3; + Gamma::Algebra Gammas[nchannel][2] = { + {Gamma::Algebra::GammaX,Gamma::Algebra::GammaX}, + {Gamma::Algebra::GammaY,Gamma::Algebra::GammaY}, + // {Gamma::Algebra::GammaZ,Gamma::Algebra::GammaZ} + {Gamma::Algebra::Gamma5,Gamma::Algebra::Gamma5} + }; + + Gamma G5(Gamma::Algebra::Gamma5); + + LatticeComplex meson_CF(q1.Grid()); + MesonFile MF; + + for(int ch=0;ch meson_T; + sliceSum(meson_CF,meson_T, Tdir); + + int nt=meson_T.size(); + + std::vector corr(nt); + for(int t=0;t seeds4({1,2,3,4}); + // GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); + std::string config; + RealD M5=atof(getenv("M5")); + RealD mq = atof(getenv("mass")); + std::vector masses({ mq} ); // u/d, s, c ?? + if( argc > 1 && argv[1][0] != '-' ) + { + std::cout<::ColdConfiguration(Umu); + config="ColdConfig"; + // RealD P=1.0; // Don't scale + // RealD P=0.6153342; // 64I + // RealD P=0.6388238 // 32Ifine + // RealD P=0.5871119; // 48I + // RealD u0 = sqrt(sqrt(P)); + // Umu = Umu * u0; + RealD w0 = 1 - M5; + LLscale = 1.0/(1-w0*w0)/(1-w0*w0); + LCscale = 1.0/(1-w0*w0)/(1-w0*w0); + std::cout< PointProps(nmass,UGrid); + std::vector FreeProps(nmass,UGrid); + LatticePropagator delta(UGrid); + + for(int m=0;m FermActs; + std::vector FermActs; std::cout< boundary = {1,1,1,-1}; - typedef MobiusFermionR FermionAction; + typedef MobiusFermionD FermionAction; FermionAction::ImplParams Params(boundary); for(int m=0;m FermActs; + std::vector FermActs; std::cout< boundary = {1,1,1,-1}; - typedef MobiusFermionR FermionAction; + typedef MobiusFermionD FermionAction; FermionAction::ImplParams Params(boundary); for(int m=0;m +#include +#include +#include +#include +#include +#include +#include +#include + +static int sock; +static const char *sock_path_fmt = "/tmp/GridUnixSocket.%d"; +static char sock_path[256]; + +class UnixSockets { +public: + static void Open(int rank) + { + int errnum; + + sock = socket(AF_UNIX, SOCK_DGRAM, 0); assert(sock>0); + printf("allocated socket %d\n",sock); + + struct sockaddr_un sa_un = { 0 }; + sa_un.sun_family = AF_UNIX; + snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,rank); + unlink(sa_un.sun_path); + if (bind(sock, (struct sockaddr *)&sa_un, sizeof(sa_un))) { + perror("bind failure"); + exit(EXIT_FAILURE); + } + printf("bound socket %d to %s\n",sock,sa_un.sun_path); + } + + static int RecvFileDescriptor(void) + { + int n; + int fd; + char buf[1]; + struct iovec iov; + struct msghdr msg; + struct cmsghdr *cmsg; + char cms[CMSG_SPACE(sizeof(int))]; + + iov.iov_base = buf; + iov.iov_len = 1; + + memset(&msg, 0, sizeof msg); + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = (caddr_t)cms; + msg.msg_controllen = sizeof cms; + + if((n=recvmsg(sock, &msg, 0)) < 0) { + perror("recvmsg failed"); + return -1; + } + if(n == 0){ + perror("recvmsg returned 0"); + return -1; + } + cmsg = CMSG_FIRSTHDR(&msg); + memmove(&fd, CMSG_DATA(cmsg), sizeof(int)); + printf("received fd %d from socket %d\n",fd,sock); + return fd; + } + + static void SendFileDescriptor(int fildes,int xmit_to_rank) + { + struct msghdr msg; + struct iovec iov; + struct cmsghdr *cmsg = NULL; + char ctrl[CMSG_SPACE(sizeof(int))]; + char data = ' '; + + memset(&msg, 0, sizeof(struct msghdr)); + memset(ctrl, 0, CMSG_SPACE(sizeof(int))); + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + sprintf(sock_path,sock_path_fmt,xmit_to_rank); + printf("sending FD %d over socket %d to rank %d AF_UNIX path %s\n",fildes,sock,xmit_to_rank,sock_path);fflush(stdout); + + struct sockaddr_un sa_un = { 0 }; + sa_un.sun_family = AF_UNIX; + snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,xmit_to_rank); + + msg.msg_name = (void *)&sa_un; + msg.msg_namelen = sizeof(sa_un); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_controllen = CMSG_SPACE(sizeof(int)); + msg.msg_control = ctrl; + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + + *((int *) CMSG_DATA(cmsg)) = fildes; + + if ( sendmsg(sock, &msg, 0) == -1 ) perror("sendmsg failed"); + }; +}; + +int main(int argc, char **argv) +{ + int me = fork()?0:1; + + UnixSockets::Open(me); + + // need MPI barrier + sleep(10); + const char * message = "Hello, World\n"; + if( me ) { + int fd = open("foo",O_RDWR|O_CREAT,0666); + if ( fd < 0 ) { + perror("failed to open file"); + exit(EXIT_FAILURE); + } + // rank 1 sends ot rank 0 + UnixSockets::SendFileDescriptor(fd,0); + close(fd); + } else { + // rank 0 sends receives frmo rank 1 + int fd = UnixSockets::RecvFileDescriptor(); + write(fd,(const void *)message,strlen(message)); + close(fd); + } +} diff --git a/systems/Spock/comms.slurm b/systems/Crusher/comms.slurm similarity index 100% rename from systems/Spock/comms.slurm rename to systems/Crusher/comms.slurm diff --git a/systems/Crusher/config-command b/systems/Crusher/config-command index 90737808..2176fc6b 100644 --- a/systems/Crusher/config-command +++ b/systems/Crusher/config-command @@ -1,12 +1,22 @@ +CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-` ../../configure --enable-comms=mpi-auto \ +--with-lime=$CLIME \ --enable-unified=no \ --enable-shm=nvlink \ +--enable-tracing=timer \ --enable-accelerator=hip \ --enable-gen-simd-width=64 \ --enable-simd=GPU \ +--disable-accelerator-cshift \ +--with-gmp=$OLCF_GMP_ROOT \ +--with-fftw=$FFTW_DIR/.. \ +--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \ --disable-fermion-reps \ ---disable-gparity \ CXX=hipcc MPICXX=mpicxx \ -CXXFLAGS="-fPIC -I/opt/rocm-4.5.0/include/ -std=c++14 -I${MPICH_DIR}/include " \ - LDFLAGS=" -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa " -HIPFLAGS = --amdgpu-target=gfx90a +CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 " \ + LDFLAGS="-L/lib64 -L/opt/rocm-5.2.0/lib/ -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 " + + +#--enable-simd=GPU-RRII \ + + diff --git a/systems/Crusher/dwf.slurm b/systems/Crusher/dwf.slurm index 286615ef..20239e80 100644 --- a/systems/Crusher/dwf.slurm +++ b/systems/Crusher/dwf.slurm @@ -3,28 +3,33 @@ #SBATCH -A LGT104 #SBATCH -t 01:00:00 ##SBATCH -U openmpThu -##SBATCH -p ecp #SBATCH -J DWF #SBATCH -o DWF.%J #SBATCH -e DWF.%J #SBATCH -N 1 -#SBATCH -n 1 -#SBATCH --exclusive +#SBATCH -n 8 +#SBATCH --exclusive +#SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4 +#export BIND="--cpu-bind=verbose,map_ldom:3,3,1,1,2,2,0,0" DIR=. -module list -#export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 +source sourceme.sh +export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=16384 export MPICH_GPU_SUPPORT_ENABLED=1 -export MPICH_SMP_SINGLE_COPY_MODE=XPMEM -#export MPICH_SMP_SINGLE_COPY_MODE=NONE -#export MPICH_SMP_SINGLE_COPY_MODE=CMA -export OMP_NUM_THREADS=1 +export MPICH_SMP_SINGLE_COPY_MODE=CMA +export OMP_NUM_THREADS=4 +export MPICH_OFI_NIC_POLICY=GPU -AT=8 -echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE +#PARAMS=" --accelerator-threads 8 --grid 64.64.32.16 --mpi 2.2.2.1 --comms-sequential --shm 2048 --shm-mpi 1" +#srun --gpus-per-task 1 -n8 $BIND ./wrap.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS -PARAMS=" --accelerator-threads ${AT} --grid 24.24.24.24 --shm-mpi 0 --mpi 1.1.1.1" +PARAMS=" --accelerator-threads 8 --grid 16.16.32.32 --mpi 1.1.1.1 --comms-sequential --shm 2048 --shm-mpi 1" +srun --gpus-per-task 1 -n1 $BIND ./wrap.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS -srun --gpus-per-task 1 -n1 ./benchmarks/Benchmark_dwf_fp32 $PARAMS +PARAMS=" --accelerator-threads 8 --grid 32.16.32.32 --mpi 1.1.1.2 --comms-sequential --shm 2048 --shm-mpi 1" +srun --gpus-per-task 1 -n2 $BIND ./wrap.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS + +PARAMS=" --accelerator-threads 8 --grid 32.32.32.64 --mpi 1.2.2.2 --comms-sequential --shm 2048 --shm-mpi 1" +srun --gpus-per-task 1 -n8 $BIND ./wrap.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS diff --git a/systems/Crusher/dwf4.slurm b/systems/Crusher/dwf4.slurm index 6bb953c4..8e2a6e4c 100644 --- a/systems/Crusher/dwf4.slurm +++ b/systems/Crusher/dwf4.slurm @@ -7,21 +7,19 @@ #SBATCH -o DWF.%J #SBATCH -e DWF.%J #SBATCH -N 1 -#SBATCH -n 4 -#SBATCH --exclusive +#SBATCH -n 2 +#SBATCH --gpu-bind=map_gpu:0,1 DIR=. -module list +source setup.sh + +export MPICH_OFI_NIC_POLICY=GPU export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 export MPICH_GPU_SUPPORT_ENABLED=1 -#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM -export MPICH_SMP_SINGLE_COPY_MODE=NONE -#export MPICH_SMP_SINGLE_COPY_MODE=CMA -export OMP_NUM_THREADS=4 +export OMP_NUM_THREADS=16 echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -PARAMS=" --accelerator-threads 8 --grid 32.32.64.64 --mpi 1.1.2.2 --comms-overlap --shm 2048 --shm-mpi 0" -srun --gpus-per-task 1 -n4 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS +srun --gpus-per-task 1 -N1 -n2 ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 16.16.32.64 --shm-mpi 1 --shm 2048 --comms-sequential --accelerator-threads 8 diff --git a/systems/Crusher/dwf8.slurm b/systems/Crusher/dwf8.slurm index 30e83fff..4bc1917a 100644 --- a/systems/Crusher/dwf8.slurm +++ b/systems/Crusher/dwf8.slurm @@ -8,20 +8,21 @@ #SBATCH -e DWF.%J #SBATCH -N 1 #SBATCH -n 8 -#SBATCH --exclusive +##SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4 +#SBATCH --gpu-bind=map_gpu:0,1,2,3,6,7,4,5 DIR=. -module list +source setup.sh + +export MPICH_OFI_NIC_POLICY=GPU export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 export MPICH_GPU_SUPPORT_ENABLED=1 -export MPICH_SMP_SINGLE_COPY_MODE=XPMEM -#export MPICH_SMP_SINGLE_COPY_MODE=NONE +#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM #export MPICH_SMP_SINGLE_COPY_MODE=CMA -export OMP_NUM_THREADS=1 +#export MPICH_SMP_SINGLE_COPY_MODE=NONE +export OMP_NUM_THREADS=16 echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -PARAMS=" --accelerator-threads 8 --grid 32.64.64.64 --mpi 1.2.2.2 --comms-overlap --shm 2048 --shm-mpi 0" - -srun --gpus-per-task 1 -n8 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS +srun --gpus-per-task 1 -N1 -n8 ./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.1 --shm-mpi 1 --shm 2048 --comms-sequential --accelerator-threads 8 diff --git a/systems/Crusher/mpiwrapper.sh b/systems/Crusher/mpiwrapper.sh index 76c4e364..f6a56698 100755 --- a/systems/Crusher/mpiwrapper.sh +++ b/systems/Crusher/mpiwrapper.sh @@ -1,10 +1,11 @@ #!/bin/bash lrank=$SLURM_LOCALID +lgpu=(0 1 2 3 7 6 5 4) -export ROCR_VISIBLE_DEVICES=$SLURM_LOCALID +export ROCR_VISIBLE_DEVICES=${lgpu[$lrank]} -echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES binding=$BINDING" +echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES " $* diff --git a/systems/Crusher/sourceme.sh b/systems/Crusher/sourceme.sh index 3f400ca4..3fc39c63 100644 --- a/systems/Crusher/sourceme.sh +++ b/systems/Crusher/sourceme.sh @@ -1,5 +1,15 @@ +. /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh +spack load c-lime +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/sw/crusher/spack-envs/base/opt/cray-sles15-zen3/gcc-11.2.0/gperftools-2.9.1-72ubwtuc5wcz2meqltbfdb76epufgzo2/lib +module load emacs +#module load gperftools module load PrgEnv-gnu -module load rocm/4.5.0 +module load rocm/5.3.0 +#module load cray-mpich/8.1.16 +module load cray-mpich/8.1.17 module load gmp module load cray-fftw module load craype-accel-amd-gfx90a +export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH +#Hack for lib +export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH diff --git a/systems/Crusher/wrap.sh b/systems/Crusher/wrap.sh new file mode 100755 index 00000000..eb58353c --- /dev/null +++ b/systems/Crusher/wrap.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +export HIP_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES +unset ROCR_VISIBLE_DEVICES + +#rank=$SLURM_PROCID +#rocprof -d rocprof.$rank -o rocprof.$rank/results.rank$SLURM_PROCID.csv --sys-trace $@ + +$@ diff --git a/systems/PVC/benchmarks/run-1tile.sh b/systems/PVC/benchmarks/run-1tile.sh index 0fe80247..9a29b773 100755 --- a/systems/PVC/benchmarks/run-1tile.sh +++ b/systems/PVC/benchmarks/run-1tile.sh @@ -4,7 +4,7 @@ #SBATCH -p QZ1J-ICX-PVC ##SBATCH -p QZ1J-SPR-PVC-2C -source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh +#source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh export NT=8 diff --git a/systems/PVC/benchmarks/run-2tile-mpi.sh b/systems/PVC/benchmarks/run-2tile-mpi.sh index cefab776..1db67508 100755 --- a/systems/PVC/benchmarks/run-2tile-mpi.sh +++ b/systems/PVC/benchmarks/run-2tile-mpi.sh @@ -4,7 +4,7 @@ #SBATCH -p QZ1J-ICX-PVC -source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh +#source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh export NT=16 @@ -19,16 +19,15 @@ export SYCL_DEVICE_FILTER=gpu,level_zero export I_MPI_OFFLOAD_CELL=tile export EnableImplicitScaling=0 export EnableWalkerPartition=0 -export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 -export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 +#export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 +#export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0 -for i in 0 +for i in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 do -mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768 -mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768 +mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 0 --device-mem 32768 > 1.1.1.2.log$i +mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 0 --device-mem 32768 > 2.1.1.1.log$i done -#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.1x2.log -#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.2x1.log +mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 0 diff --git a/systems/PVC/benchmarks/wrap.sh b/systems/PVC/benchmarks/wrap.sh index bb7b517d..b8806b30 100755 --- a/systems/PVC/benchmarks/wrap.sh +++ b/systems/PVC/benchmarks/wrap.sh @@ -5,10 +5,5 @@ export ZE_AFFINITY_MASK=0.$MPI_LOCALRANKID echo Ranke $MPI_LOCALRANKID ZE_AFFINITY_MASK is $ZE_AFFINITY_MASK -if [ $MPI_LOCALRANKID = "0" ] -then -# ~psteinbr/build_pti/ze_tracer -h $@ - onetrace --chrome-device-timeline $@ -else $@ -fi + diff --git a/systems/PVC/config-command b/systems/PVC/config-command index cd7bba1d..c3523c2d 100644 --- a/systems/PVC/config-command +++ b/systems/PVC/config-command @@ -1,4 +1,4 @@ -INSTALL=/nfs/site/home/azusayax/install +INSTALL=/nfs/site/home/paboylx/prereqs/ ../../configure \ --enable-simd=GPU \ --enable-gen-simd-width=64 \ diff --git a/systems/PVC/setup.sh b/systems/PVC/setup.sh index 2a6f920b..c3b97ce0 100644 --- a/systems/PVC/setup.sh +++ b/systems/PVC/setup.sh @@ -1,9 +1,16 @@ export https_proxy=http://proxy-chain.intel.com:911 -export LD_LIBRARY_PATH=/nfs/site/home/azusayax/install/lib:$LD_LIBRARY_PATH +#export LD_LIBRARY_PATH=/nfs/site/home/azusayax/install/lib:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=$HOME/prereqs/lib/:$LD_LIBRARY_PATH module load intel-release -source /opt/intel/oneapi/PVC_setup.sh +module load intel-comp-rt/embargo-ci-neo + +#source /opt/intel/oneapi/PVC_setup.sh #source /opt/intel/oneapi/ATS_setup.sh +#module load intel-nightly/20230331 +#module load intel-comp-rt/ci-neo-master/026093 + +#module load intel/mpich module load intel/mpich/pvc45.3 export PATH=~/ATS/pti-gpu/tools/onetrace/:$PATH diff --git a/systems/Perlmutter/config-command b/systems/Perlmutter/config-command index b399c535..4f7ecee3 100644 --- a/systems/Perlmutter/config-command +++ b/systems/Perlmutter/config-command @@ -1,9 +1,14 @@ +DIR=`pwd` +PREFIX=$DIR/../Prequisites/install/ ../../configure \ --enable-comms=mpi \ --enable-simd=GPU \ --enable-shm=nvlink \ --enable-gen-simd-width=64 \ --enable-accelerator=cuda \ + --enable-setdevice \ + --disable-accelerator-cshift \ + --with-gmp=$PREFIX \ --disable-fermion-reps \ --disable-unified \ --disable-gparity \ diff --git a/systems/Perlmutter/dwf4.slurm b/systems/Perlmutter/dwf4.slurm index ba198595..426573d9 100644 --- a/systems/Perlmutter/dwf4.slurm +++ b/systems/Perlmutter/dwf4.slurm @@ -1,24 +1,27 @@ #!/bin/bash -#SBATCH -A mp13 +#SBATCH -A m3886_g #SBATCH -C gpu -#SBATCH -q regular +#SBATCH -q debug #SBATCH -t 0:20:00 -#SBATCH -n 16 -#SBATCH --ntasks-per-node=4 #SBATCH -c 32 -#SBATCH --exclusive +#SBATCH -N 1 +#SBATCH -n 4 +#SBATCH --ntasks-per-node=4 #SBATCH --gpus-per-task=1 -#SBATCH --gpu-bind=map_gpu:0,1,2,3 +#SBATCH --exclusive +#SBATCH --gpu-bind=none export SLURM_CPU_BIND="cores" -export MPICH_RDMA_ENABLED_CUDA=1 export MPICH_GPU_SUPPORT_ENABLED=1 -srun ./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.2 --accelerator-threads 8 > comms.4node +export MPICH_RDMA_ENABLED_CUDA=1 +export MPICH_GPU_IPC_ENABLED=1 +export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0 +export MPICH_GPU_NO_ASYNC_MEMCPY=0 +#export MPICH_SMP_SINGLE_COPY_MODE=CMA -OPT="--comms-overlap --comms-concurrent --shm-mpi 0" -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt0 -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt0 +OPT="--comms-sequential --shm-mpi 1" +VOL=64.64.64.64 +srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.1.1 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT +#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.1.1.4 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT +#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.8 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT -OPT="--comms-overlap --comms-concurrent --shm-mpi 1" -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt1 -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt1 diff --git a/systems/Perlmutter/sourceme.sh b/systems/Perlmutter/sourceme.sh index 9359dea9..6d09b1c9 100644 --- a/systems/Perlmutter/sourceme.sh +++ b/systems/Perlmutter/sourceme.sh @@ -1,4 +1,4 @@ export CRAY_ACCEL_TARGET=nvidia80 -module load PrgEnv-gnu cpe-cuda cuda +module load PrgEnv-gnu cpe-cuda cudatoolkit/11.4 diff --git a/systems/Spock/config-command b/systems/Spock/config-command deleted file mode 100644 index 70c97c37..00000000 --- a/systems/Spock/config-command +++ /dev/null @@ -1,12 +0,0 @@ -../../configure --enable-comms=mpi-auto \ ---enable-unified=no \ ---enable-shm=nvlink \ ---enable-accelerator=hip \ ---enable-gen-simd-width=64 \ ---enable-simd=GPU \ ---disable-fermion-reps \ ---disable-gparity \ -CXX=hipcc MPICXX=mpicxx \ -CXXFLAGS="-fPIC -I/opt/rocm-4.3.0/include/ -std=c++14 -I${MPICH_DIR}/include " \ ---prefix=/ccs/home/chulwoo/Grid \ - LDFLAGS=" -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa " diff --git a/systems/Spock/dwf.slurm b/systems/Spock/dwf.slurm deleted file mode 100644 index 7144a270..00000000 --- a/systems/Spock/dwf.slurm +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# Begin LSF Directives -#SBATCH -A LGT104 -#SBATCH -t 01:00:00 -##SBATCH -U openmpThu -#SBATCH -p ecp -#SBATCH -J DWF -#SBATCH -o DWF.%J -#SBATCH -e DWF.%J -#SBATCH -N 1 -#SBATCH -n 1 - -DIR=. -module list -export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 -export MPICH_GPU_SUPPORT_ENABLED=1 -#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM -#export MPICH_SMP_SINGLE_COPY_MODE=NONE -export MPICH_SMP_SINGLE_COPY_MODE=CMA -export OMP_NUM_THREADS=8 - -AT=8 -echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -PARAMS=" --accelerator-threads ${AT} --grid 32.32.32.32 --mpi 1.1.1.1 --comms-overlap" -srun -n1 --label -c$OMP_NUM_THREADS --gpus-per-task=1 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS - diff --git a/systems/Spock/dwf4.slurm b/systems/Spock/dwf4.slurm deleted file mode 100644 index 261929ab..00000000 --- a/systems/Spock/dwf4.slurm +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# Begin LSF Directives -#SBATCH -A LGT104 -#SBATCH -t 01:00:00 -##SBATCH -U openmpThu -#SBATCH -p ecp -#SBATCH -J DWF -#SBATCH -o DWF.%J -#SBATCH -e DWF.%J -#SBATCH -N 1 -#SBATCH -n 4 - -DIR=. -module list -export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 -export MPICH_GPU_SUPPORT_ENABLED=1 -#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM -export MPICH_SMP_SINGLE_COPY_MODE=NONE -#export MPICH_SMP_SINGLE_COPY_MODE=CMA -export OMP_NUM_THREADS=8 - -AT=8 -echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -PARAMS=" --accelerator-threads ${AT} --grid 32.32.64.64 --mpi 1.1.2.2 --comms-overlap --shm 2048 --shm-mpi 0" -srun -n4 --label -c$OMP_NUM_THREADS --gpus-per-task=1 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS - diff --git a/systems/Spock/dwf8.slurm b/systems/Spock/dwf8.slurm deleted file mode 100644 index c4672db0..00000000 --- a/systems/Spock/dwf8.slurm +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# Begin LSF Directives -#SBATCH -A LGT104 -#SBATCH -t 01:00:00 -##SBATCH -U openmpThu -#SBATCH -p ecp -#SBATCH -J DWF -#SBATCH -o DWF.%J -#SBATCH -e DWF.%J -#SBATCH -N 2 -#SBATCH -n 8 - -DIR=. -module list -export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 -export MPICH_GPU_SUPPORT_ENABLED=1 -#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM -export MPICH_SMP_SINGLE_COPY_MODE=NONE -#export MPICH_SMP_SINGLE_COPY_MODE=CMA -export OMP_NUM_THREADS=8 - -AT=8 -echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -PARAMS=" --accelerator-threads ${AT} --grid 32.64.64.64 --mpi 1.2.2.2 --comms-overlap --shm 2048 --shm-mpi 0" -srun -n8 --label -c$OMP_NUM_THREADS --gpus-per-task=1 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS - diff --git a/systems/Spock/mpiwrapper.sh b/systems/Spock/mpiwrapper.sh deleted file mode 100755 index 76c4e364..00000000 --- a/systems/Spock/mpiwrapper.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -lrank=$SLURM_LOCALID - -export ROCR_VISIBLE_DEVICES=$SLURM_LOCALID - -echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES binding=$BINDING" - -$* - - - diff --git a/systems/Spock/sourceme.sh b/systems/Spock/sourceme.sh deleted file mode 100644 index 40d864b5..00000000 --- a/systems/Spock/sourceme.sh +++ /dev/null @@ -1,5 +0,0 @@ -module load PrgEnv-gnu -module load rocm/4.3.0 -module load gmp -module load cray-fftw -module load craype-accel-amd-gfx908 diff --git a/systems/Summit/comms.4node b/systems/Summit/comms.4node deleted file mode 100644 index b0df0801..00000000 --- a/systems/Summit/comms.4node +++ /dev/null @@ -1,179 +0,0 @@ -OPENMPI detected -AcceleratorCudaInit[0]: ======================== -AcceleratorCudaInit[0]: Device Number : 0 -AcceleratorCudaInit[0]: ======================== -AcceleratorCudaInit[0]: Device identifier: Tesla V100-SXM2-16GB -AcceleratorCudaInit[0]: totalGlobalMem: 16911433728 -AcceleratorCudaInit[0]: managedMemory: 1 -AcceleratorCudaInit[0]: isMultiGpuBoard: 0 -AcceleratorCudaInit[0]: warpSize: 32 -AcceleratorCudaInit[0]: pciBusID: 4 -AcceleratorCudaInit[0]: pciDeviceID: 0 -AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535) -AcceleratorCudaInit: rank 0 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 0 device 0 bus id: 0004:04:00.0 -AcceleratorCudaInit: ================================================ -SharedMemoryMpi: World communicator of size 24 -SharedMemoryMpi: Node communicator of size 6 -0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 1073741824bytes at 0x200060000000 for comms buffers -Setting up IPC - -__|__|__|__|__|__|__|__|__|__|__|__|__|__|__ -__|__|__|__|__|__|__|__|__|__|__|__|__|__|__ -__|_ | | | | | | | | | | | | _|__ -__|_ _|__ -__|_ GGGG RRRR III DDDD _|__ -__|_ G R R I D D _|__ -__|_ G R R I D D _|__ -__|_ G GG RRRR I D D _|__ -__|_ G G R R I D D _|__ -__|_ GGGG R R III DDDD _|__ -__|_ _|__ -__|__|__|__|__|__|__|__|__|__|__|__|__|__|__ -__|__|__|__|__|__|__|__|__|__|__|__|__|__|__ - | | | | | | | | | | | | | | - - -Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. -Current Grid git commit hash=7cb1ff7395a5833ded6526c43891bd07a0436290: (HEAD -> develop, origin/develop, origin/HEAD) clean - -Grid : Message : ================================================ -Grid : Message : MPI is initialised and logging filters activated -Grid : Message : ================================================ -Grid : Message : Requested 1073741824 byte stencil comms buffers -AcceleratorCudaInit: rank 1 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 1 device 1 bus id: 0004:05:00.0 -AcceleratorCudaInit: rank 2 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 2 device 2 bus id: 0004:06:00.0 -AcceleratorCudaInit: rank 5 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 5 device 5 bus id: 0035:05:00.0 -AcceleratorCudaInit: rank 4 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 4 device 4 bus id: 0035:04:00.0 -AcceleratorCudaInit: rank 3 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 3 device 3 bus id: 0035:03:00.0 -Grid : Message : MemoryManager Cache 13529146982 bytes -Grid : Message : MemoryManager::Init() setting up -Grid : Message : MemoryManager::Init() cache pool for recent allocations: SMALL 8 LARGE 2 -Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory -Grid : Message : MemoryManager::Init() Using cudaMalloc -Grid : Message : 2.137929 s : Grid is setup to use 6 threads -Grid : Message : 2.137941 s : Number of iterations to average: 250 -Grid : Message : 2.137950 s : ==================================================================================================== -Grid : Message : 2.137958 s : = Benchmarking sequential halo exchange from host memory -Grid : Message : 2.137966 s : ==================================================================================================== -Grid : Message : 2.137974 s : L Ls bytes MB/s uni MB/s bidi -AcceleratorCudaInit: rank 22 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 10 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 15 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 21 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 20 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 7 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 9 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 11 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 8 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 6 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 19 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 23 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 18 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 12 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 16 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 13 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 14 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 17 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -Grid : Message : 2.604949 s : 8 8 393216 89973.9 179947.8 -Grid : Message : 2.668249 s : 8 8 393216 18650.3 37300.5 -Grid : Message : 2.732288 s : 8 8 393216 18428.5 36857.1 -Grid : Message : 2.753565 s : 8 8 393216 55497.2 110994.4 -Grid : Message : 2.808960 s : 12 8 1327104 100181.5 200363.0 -Grid : Message : 3.226900 s : 12 8 1327104 20600.5 41201.0 -Grid : Message : 3.167459 s : 12 8 1327104 24104.6 48209.2 -Grid : Message : 3.227660 s : 12 8 1327104 66156.7 132313.5 -Grid : Message : 3.413570 s : 16 8 3145728 56174.4 112348.8 -Grid : Message : 3.802697 s : 16 8 3145728 24255.9 48511.7 -Grid : Message : 4.190498 s : 16 8 3145728 24336.7 48673.4 -Grid : Message : 4.385171 s : 16 8 3145728 48484.1 96968.2 -Grid : Message : 4.805284 s : 20 8 6144000 46380.5 92761.1 -Grid : Message : 5.562975 s : 20 8 6144000 24328.5 48656.9 -Grid : Message : 6.322562 s : 20 8 6144000 24266.7 48533.4 -Grid : Message : 6.773598 s : 20 8 6144000 40868.5 81736.9 -Grid : Message : 7.600999 s : 24 8 10616832 40198.3 80396.6 -Grid : Message : 8.912917 s : 24 8 10616832 24279.5 48559.1 -Grid : Message : 10.220961 s : 24 8 10616832 24350.2 48700.4 -Grid : Message : 11.728250 s : 24 8 10616832 37390.9 74781.8 -Grid : Message : 12.497258 s : 28 8 16859136 36792.2 73584.5 -Grid : Message : 14.585387 s : 28 8 16859136 24222.2 48444.3 -Grid : Message : 16.664783 s : 28 8 16859136 24323.4 48646.8 -Grid : Message : 17.955238 s : 28 8 16859136 39194.7 78389.4 -Grid : Message : 20.136479 s : 32 8 25165824 35718.3 71436.5 -Grid : Message : 23.241958 s : 32 8 25165824 24311.4 48622.9 -Grid : Message : 26.344810 s : 32 8 25165824 24331.9 48663.7 -Grid : Message : 28.384420 s : 32 8 25165824 37016.3 74032.7 -Grid : Message : 28.388879 s : ==================================================================================================== -Grid : Message : 28.388894 s : = Benchmarking sequential halo exchange from GPU memory -Grid : Message : 28.388909 s : ==================================================================================================== -Grid : Message : 28.388924 s : L Ls bytes MB/s uni MB/s bidi -Grid : Message : 28.553993 s : 8 8 393216 8272.4 16544.7 -Grid : Message : 28.679592 s : 8 8 393216 9395.4 18790.8 -Grid : Message : 28.811112 s : 8 8 393216 8971.0 17942.0 -Grid : Message : 28.843770 s : 8 8 393216 36145.6 72291.2 -Grid : Message : 28.981754 s : 12 8 1327104 49591.6 99183.2 -Grid : Message : 29.299764 s : 12 8 1327104 12520.8 25041.7 -Grid : Message : 29.620288 s : 12 8 1327104 12422.2 24844.4 -Grid : Message : 29.657645 s : 12 8 1327104 106637.5 213275.1 -Grid : Message : 29.952933 s : 16 8 3145728 43939.2 87878.5 -Grid : Message : 30.585411 s : 16 8 3145728 14922.1 29844.2 -Grid : Message : 31.219781 s : 16 8 3145728 14877.2 29754.4 -Grid : Message : 31.285017 s : 16 8 3145728 144724.3 289448.7 -Grid : Message : 31.706443 s : 20 8 6144000 54676.2 109352.4 -Grid : Message : 32.739205 s : 20 8 6144000 17848.0 35696.1 -Grid : Message : 33.771852 s : 20 8 6144000 17849.9 35699.7 -Grid : Message : 33.871981 s : 20 8 6144000 184141.4 368282.8 -Grid : Message : 34.536808 s : 24 8 10616832 55784.3 111568.6 -Grid : Message : 36.275648 s : 24 8 10616832 18317.6 36635.3 -Grid : Message : 37.997181 s : 24 8 10616832 18501.7 37003.4 -Grid : Message : 38.140442 s : 24 8 10616832 222383.9 444767.9 -Grid : Message : 39.177222 s : 28 8 16859136 56609.7 113219.4 -Grid : Message : 41.874755 s : 28 8 16859136 18749.9 37499.8 -Grid : Message : 44.529381 s : 28 8 16859136 19052.9 38105.8 -Grid : Message : 44.742192 s : 28 8 16859136 237717.1 475434.2 -Grid : Message : 46.184000 s : 32 8 25165824 57091.2 114182.4 -Grid : Message : 50.734740 s : 32 8 25165824 19411.0 38821.9 -Grid : Message : 53.931228 s : 32 8 25165824 19570.6 39141.2 -Grid : Message : 54.238467 s : 32 8 25165824 245765.6 491531.2 -Grid : Message : 54.268664 s : ==================================================================================================== -Grid : Message : 54.268680 s : = All done; Bye Bye -Grid : Message : 54.268691 s : ==================================================================================================== diff --git a/systems/Summit/config-command b/systems/Summit/config-command index b565addc..2a856be0 100644 --- a/systems/Summit/config-command +++ b/systems/Summit/config-command @@ -2,11 +2,12 @@ --enable-simd=GPU \ --enable-gen-simd-width=32 \ --enable-unified=no \ - --enable-shm=nvlink \ - --disable-gparity \ - --enable-setdevice \ + --enable-shm=no \ + --enable-gparity \ + --disable-setdevice \ --disable-fermion-reps \ --enable-accelerator=cuda \ + --enable-accelerator-cshift \ --prefix /ccs/home/paboyle/prefix \ CXX=nvcc \ LDFLAGS=-L/ccs/home/paboyle/prefix/lib/ \ diff --git a/systems/Summit/dwf.24.4node b/systems/Summit/dwf.24.4node index 212e471c..d0ca9697 100644 --- a/systems/Summit/dwf.24.4node +++ b/systems/Summit/dwf.24.4node @@ -10,19 +10,16 @@ AcceleratorCudaInit[0]: warpSize: 32 AcceleratorCudaInit[0]: pciBusID: 4 AcceleratorCudaInit[0]: pciDeviceID: 0 AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535) -AcceleratorCudaInit: rank 0 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes +AcceleratorCudaInit: using default device +AcceleratorCudaInit: assume user either uses +AcceleratorCudaInit: a) IBM jsrun, or +AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding +AcceleratorCudaInit: Configure options --enable-setdevice=no local rank 0 device 0 bus id: 0004:04:00.0 AcceleratorCudaInit: ================================================ SharedMemoryMpi: World communicator of size 24 -SharedMemoryMpi: Node communicator of size 6 -0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x200080000000 for comms buffers -AcceleratorCudaInit: rank 3 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 3 device 3 bus id: 0035:03:00.0 -AcceleratorCudaInit: rank 5 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 5 device 5 bus id: 0035:05:00.0 +SharedMemoryMpi: Node communicator of size 1 +0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 1073741824bytes at 0x200080000000 - 2000bfffffff for comms buffers Setting up IPC __|__|__|__|__|__|__|__|__|__|__|__|__|__|__ @@ -36,6 +33,11 @@ __|_ G GG RRRR I D D _|__ __|_ G G R R I D D _|__ __|_ GGGG R R III DDDD _|__ __|_ _|__ +local rank 5 device 0 bus id: 0035:05:00.0 +local rank 1 device 0 bus id: 0004:05:00.0 +local rank 2 device 0 bus id: 0004:06:00.0 +local rank 3 device 0 bus id: 0035:03:00.0 +local rank 4 device 0 bus id: 0035:04:00.0 __|__|__|__|__|__|__|__|__|__|__|__|__|__|__ __|__|__|__|__|__|__|__|__|__|__|__|__|__|__ | | | | | | | | | | | | | | @@ -45,15 +47,6 @@ Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli a This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -AcceleratorCudaInit: rank 4 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 4 device 4 bus id: 0035:04:00.0 -AcceleratorCudaInit: rank 1 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 1 device 1 bus id: 0004:05:00.0 -AcceleratorCudaInit: rank 2 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 2 device 2 bus id: 0004:06:00.0 the Free Software Foundation; either version 2 of the License, or (at your option) any later version. @@ -61,146 +54,63 @@ This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -Current Grid git commit hash=7cb1ff7395a5833ded6526c43891bd07a0436290: (HEAD -> develop, origin/develop, origin/HEAD) clean +Current Grid git commit hash=1713de35c0dc339564661dd7df8a72583f889e91: (HEAD -> feature/dirichlet) uncommited changes Grid : Message : ================================================ Grid : Message : MPI is initialised and logging filters activated Grid : Message : ================================================ -Grid : Message : Requested 2147483648 byte stencil comms buffers -Grid : Message : MemoryManager Cache 8388608000 bytes +Grid : Message : Requested 1073741824 byte stencil comms buffers +Grid : Message : MemoryManager Cache 4194304000 bytes Grid : Message : MemoryManager::Init() setting up Grid : Message : MemoryManager::Init() cache pool for recent allocations: SMALL 8 LARGE 2 Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory Grid : Message : MemoryManager::Init() Using cudaMalloc -Grid : Message : 1.731905 s : Grid Layout -Grid : Message : 1.731915 s : Global lattice size : 48 48 48 72 -Grid : Message : 1.731928 s : OpenMP threads : 6 -Grid : Message : 1.731938 s : MPI tasks : 2 2 2 3 -AcceleratorCudaInit: rank 9 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 23 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 22 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 21 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 18 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 6 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 7 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 10 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 8 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 11 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 20 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 19 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 13 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 12 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 14 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 16 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 15 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 17 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -Grid : Message : 2.683494 s : Making s innermost grids -Grid : Message : 2.780034 s : Initialising 4d RNG -Grid : Message : 2.833099 s : Intialising parallel RNG with unique string 'The 4D RNG' -Grid : Message : 2.833121 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1 -Grid : Message : 2.916841 s : Initialising 5d RNG -Grid : Message : 3.762880 s : Intialising parallel RNG with unique string 'The 5D RNG' -Grid : Message : 3.762902 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a -Grid : Message : 5.264345 s : Initialised RNGs -Grid : Message : 6.489904 s : Drawing gauge field -Grid : Message : 6.729262 s : Random gauge initialised -Grid : Message : 7.781273 s : Setting up Cshift based reference -Grid : Message : 8.725313 s : ***************************************************************** -Grid : Message : 8.725332 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm -Grid : Message : 8.725342 s : ***************************************************************** -Grid : Message : 8.725352 s : ***************************************************************** -Grid : Message : 8.725362 s : * Benchmarking DomainWallFermionR::Dhop -Grid : Message : 8.725372 s : * Vectorising space-time by 4 -Grid : Message : 8.725383 s : * VComplexF size is 32 B -Grid : Message : 8.725395 s : * SINGLE precision -Grid : Message : 8.725405 s : * Using Overlapped Comms/Compute -Grid : Message : 8.725415 s : * Using GENERIC Nc WilsonKernels -Grid : Message : 8.725425 s : ***************************************************************** -Grid : Message : 9.465229 s : Called warmup -Grid : Message : 58.646066 s : Called Dw 3000 times in 4.91764e+07 us -Grid : Message : 58.646121 s : mflop/s = 1.02592e+07 -Grid : Message : 58.646134 s : mflop/s per rank = 427468 -Grid : Message : 58.646145 s : mflop/s per node = 2.56481e+06 -Grid : Message : 58.646156 s : RF GiB/s (base 2) = 20846.5 -Grid : Message : 58.646166 s : mem GiB/s (base 2) = 13029.1 -Grid : Message : 58.648008 s : norm diff 1.04778e-13 -Grid : Message : 58.734885 s : #### Dhop calls report -Grid : Message : 58.734897 s : WilsonFermion5D Number of DhopEO Calls : 6002 -Grid : Message : 58.734909 s : WilsonFermion5D TotalTime /Calls : 8217.71 us -Grid : Message : 58.734922 s : WilsonFermion5D CommTime /Calls : 7109.5 us -Grid : Message : 58.734933 s : WilsonFermion5D FaceTime /Calls : 446.623 us -Grid : Message : 58.734943 s : WilsonFermion5D ComputeTime1/Calls : 18.0558 us -Grid : Message : 58.734953 s : WilsonFermion5D ComputeTime2/Calls : 731.097 us -Grid : Message : 58.734979 s : Average mflops/s per call : 4.8157e+09 -Grid : Message : 58.734989 s : Average mflops/s per call per rank : 2.00654e+08 -Grid : Message : 58.734999 s : Average mflops/s per call per node : 1.20393e+09 -Grid : Message : 58.735008 s : Average mflops/s per call (full) : 1.04183e+07 -Grid : Message : 58.735017 s : Average mflops/s per call per rank (full): 434094 -Grid : Message : 58.735026 s : Average mflops/s per call per node (full): 2.60456e+06 -Grid : Message : 58.735035 s : WilsonFermion5D Stencil -Grid : Message : 58.735043 s : WilsonFermion5D StencilEven -Grid : Message : 58.735051 s : WilsonFermion5D StencilOdd -Grid : Message : 58.735059 s : WilsonFermion5D Stencil Reporti() -Grid : Message : 58.735067 s : WilsonFermion5D StencilEven Reporti() -Grid : Message : 58.735075 s : WilsonFermion5D StencilOdd Reporti() -Grid : Message : 64.934380 s : Compare to naive wilson implementation Dag to verify correctness -Grid : Message : 64.934740 s : Called DwDag -Grid : Message : 64.934870 s : norm dag result 12.0422 -Grid : Message : 64.120756 s : norm dag ref 12.0422 -Grid : Message : 64.149389 s : norm dag diff 7.6644e-14 -Grid : Message : 64.317786 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec -Grid : Message : 64.465331 s : src_e0.499995 -Grid : Message : 64.524653 s : src_o0.500005 -Grid : Message : 64.558706 s : ********************************************************* -Grid : Message : 64.558717 s : * Benchmarking DomainWallFermionF::DhopEO -Grid : Message : 64.558727 s : * Vectorising space-time by 4 -Grid : Message : 64.558737 s : * SINGLE precision -Grid : Message : 64.558745 s : * Using Overlapped Comms/Compute -Grid : Message : 64.558753 s : * Using GENERIC Nc WilsonKernels -Grid : Message : 64.558761 s : ********************************************************* -Grid : Message : 92.702145 s : Deo mflop/s = 8.97692e+06 -Grid : Message : 92.702185 s : Deo mflop/s per rank 374038 -Grid : Message : 92.702198 s : Deo mflop/s per node 2.24423e+06 -Grid : Message : 92.702209 s : #### Dhop calls report -Grid : Message : 92.702223 s : WilsonFermion5D Number of DhopEO Calls : 3001 -Grid : Message : 92.702240 s : WilsonFermion5D TotalTime /Calls : 9377.88 us -Grid : Message : 92.702257 s : WilsonFermion5D CommTime /Calls : 8221.84 us -Grid : Message : 92.702277 s : WilsonFermion5D FaceTime /Calls : 543.548 us -Grid : Message : 92.702301 s : WilsonFermion5D ComputeTime1/Calls : 20.936 us -Grid : Message : 92.702322 s : WilsonFermion5D ComputeTime2/Calls : 732.33 us -Grid : Message : 92.702376 s : Average mflops/s per call : 4.13001e+09 -Grid : Message : 92.702387 s : Average mflops/s per call per rank : 1.72084e+08 -Grid : Message : 92.702397 s : Average mflops/s per call per node : 1.0325e+09 -Grid : Message : 92.702407 s : Average mflops/s per call (full) : 9.12937e+06 -Grid : Message : 92.702416 s : Average mflops/s per call per rank (full): 380391 -Grid : Message : 92.702426 s : Average mflops/s per call per node (full): 2.28234e+06 -Grid : Message : 92.702435 s : WilsonFermion5D Stencil -Grid : Message : 92.702443 s : WilsonFermion5D StencilEven -Grid : Message : 92.702451 s : WilsonFermion5D StencilOdd -Grid : Message : 92.702459 s : WilsonFermion5D Stencil Reporti() -Grid : Message : 92.702467 s : WilsonFermion5D StencilEven Reporti() -Grid : Message : 92.702475 s : WilsonFermion5D StencilOdd Reporti() -Grid : Message : 92.772983 s : r_e6.02121 -Grid : Message : 92.786384 s : r_o6.02102 -Grid : Message : 92.799622 s : res12.0422 -Grid : Message : 93.860500 s : norm diff 0 -Grid : Message : 93.162026 s : norm diff even 0 -Grid : Message : 93.197529 s : norm diff odd 0 + + + + + + + +Grid : Message : 0.179000 s : ++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 0.196000 s : Testing with full communication +Grid : Message : 0.211000 s : ++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 0.225000 s : Grid Layout +Grid : Message : 0.233000 s : Global lattice size : 48 48 48 72 +Grid : Message : 0.246000 s : OpenMP threads : 6 +Grid : Message : 0.255000 s : MPI tasks : 2 2 2 3 +Grid : Message : 0.182200 s : Initialising 4d RNG +Grid : Message : 0.233863 s : Intialising parallel RNG with unique string 'The 4D RNG' +Grid : Message : 0.233886 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1 +Grid : Message : 0.245805 s : Initialising 5d RNG +Grid : Message : 1.710720 s : Intialising parallel RNG with unique string 'The 5D RNG' +Grid : Message : 1.710950 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a +Grid : Message : 2.220272 s : Drawing gauge field +Grid : Message : 2.418119 s : Random gauge initialised +Grid : Message : 2.418142 s : Applying BCs for Dirichlet Block5 [0 0 0 0 0] +Grid : Message : 2.418156 s : Applying BCs for Dirichlet Block4 [0 0 0 0] +Grid : Message : 2.489588 s : Setting up Cshift based reference +Grid : Message : 13.921239 s : ***************************************************************** +Grid : Message : 13.921261 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm +Grid : Message : 13.921270 s : ***************************************************************** +Grid : Message : 13.921279 s : ***************************************************************** +Grid : Message : 13.921288 s : * Benchmarking DomainWallFermionR::Dhop +Grid : Message : 13.921296 s : * Vectorising space-time by 4 +Grid : Message : 13.921305 s : * VComplexF size is 32 B +Grid : Message : 13.921314 s : * SINGLE precision +Grid : Message : 13.921321 s : * Using Overlapped Comms/Compute +Grid : Message : 13.921328 s : * Using GENERIC Nc WilsonKernels +Grid : Message : 13.921335 s : ***************************************************************** +Grid : Message : 14.821339 s : Called warmup +Grid : Message : 23.975467 s : Called Dw 300 times in 9.15155e+06 us +Grid : Message : 23.975528 s : mflop/s = 5.51286e+06 +Grid : Message : 23.975543 s : mflop/s per rank = 229702 +Grid : Message : 23.975557 s : mflop/s per node = 229702 +Grid : Message : 23.989684 s : norm diff 5.09279e-313 Line 291 +Grid : Message : 39.450493 s : ---------------------------------------------------------------- +Grid : Message : 39.450517 s : Compare to naive wilson implementation Dag to verify correctness +Grid : Message : 39.450526 s : ---------------------------------------------------------------- +Grid : Message : 39.450534 s : Called DwDag +Grid : Message : 39.450542 s : norm dag result nan +Grid : Message : 39.451564 s : norm dag ref nan +Grid : Message : 39.455714 s : norm dag diff nan Line 354 diff --git a/systems/Summit/dwf.32.4node b/systems/Summit/dwf.32.4node index eed54f2d..fe21bad8 100644 --- a/systems/Summit/dwf.32.4node +++ b/systems/Summit/dwf.32.4node @@ -10,14 +10,21 @@ AcceleratorCudaInit[0]: warpSize: 32 AcceleratorCudaInit[0]: pciBusID: 4 AcceleratorCudaInit[0]: pciDeviceID: 0 AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535) -AcceleratorCudaInit: rank 0 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes +AcceleratorCudaInit: using default device +AcceleratorCudaInit: assume user either uses +AcceleratorCudaInit: a) IBM jsrun, or +AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding +AcceleratorCudaInit: Configure options --enable-setdevice=no local rank 0 device 0 bus id: 0004:04:00.0 AcceleratorCudaInit: ================================================ SharedMemoryMpi: World communicator of size 24 -SharedMemoryMpi: Node communicator of size 6 -0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x200080000000 for comms buffers +SharedMemoryMpi: Node communicator of size 1 +local rank 3 device 0 bus id: 0004:04:00.0 +local rank 2 device 0 bus id: 0004:04:00.0 +local rank 1 device 0 bus id: 0004:04:00.0 +0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 1073741824bytes at 0x200080000000 - 2000bfffffff for comms buffers Setting up IPC +local rank 5 device 0 bus id: 0004:04:00.0 __|__|__|__|__|__|__|__|__|__|__|__|__|__|__ __|__|__|__|__|__|__|__|__|__|__|__|__|__|__ @@ -39,168 +46,46 @@ Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli a This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by +local rank 4 device 0 bus id: 0004:04:00.0 the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -AcceleratorCudaInit: rank 2 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 2 device 2 bus id: 0004:06:00.0 -AcceleratorCudaInit: rank 1 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 1 device 1 bus id: 0004:05:00.0 -AcceleratorCudaInit: rank 4 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 4 device 4 bus id: 0035:04:00.0 -AcceleratorCudaInit: rank 3 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 3 device 3 bus id: 0035:03:00.0 -AcceleratorCudaInit: rank 5 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -local rank 5 device 5 bus id: 0035:05:00.0 GNU General Public License for more details. -Current Grid git commit hash=7cb1ff7395a5833ded6526c43891bd07a0436290: (HEAD -> develop, origin/develop, origin/HEAD) clean +Current Grid git commit hash=1713de35c0dc339564661dd7df8a72583f889e91: (HEAD -> feature/dirichlet) uncommited changes Grid : Message : ================================================ Grid : Message : MPI is initialised and logging filters activated Grid : Message : ================================================ -Grid : Message : Requested 2147483648 byte stencil comms buffers -Grid : Message : MemoryManager Cache 8388608000 bytes +Grid : Message : Requested 1073741824 byte stencil comms buffers Grid : Message : MemoryManager::Init() setting up Grid : Message : MemoryManager::Init() cache pool for recent allocations: SMALL 8 LARGE 2 -Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory -Grid : Message : MemoryManager::Init() Using cudaMalloc -Grid : Message : 1.544984 s : Grid Layout -Grid : Message : 1.544992 s : Global lattice size : 64 64 64 96 -Grid : Message : 1.545003 s : OpenMP threads : 6 -Grid : Message : 1.545011 s : MPI tasks : 2 2 2 3 -AcceleratorCudaInit: rank 8 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 6 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 11 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 16 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 17 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 13 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 12 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 21 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 23 setting device to node rank 5 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 22 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 19 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 18 setting device to node rank 0 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 7 setting device to node rank 1 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 10 setting device to node rank 4 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 9 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 14 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 15 setting device to node rank 3 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -AcceleratorCudaInit: rank 20 setting device to node rank 2 -AcceleratorCudaInit: Configure options --enable-setdevice=yes -Grid : Message : 2.994920 s : Making s innermost grids -Grid : Message : 2.232502 s : Initialising 4d RNG -Grid : Message : 2.397047 s : Intialising parallel RNG with unique string 'The 4D RNG' -Grid : Message : 2.397069 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1 -Grid : Message : 2.653140 s : Initialising 5d RNG -Grid : Message : 5.285347 s : Intialising parallel RNG with unique string 'The 5D RNG' -Grid : Message : 5.285369 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a -Grid : Message : 9.994738 s : Initialised RNGs -Grid : Message : 13.153426 s : Drawing gauge field -Grid : Message : 13.825697 s : Random gauge initialised -Grid : Message : 18.537657 s : Setting up Cshift based reference -Grid : Message : 22.296755 s : ***************************************************************** -Grid : Message : 22.296781 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm -Grid : Message : 22.296791 s : ***************************************************************** -Grid : Message : 22.296800 s : ***************************************************************** -Grid : Message : 22.296809 s : * Benchmarking DomainWallFermionR::Dhop -Grid : Message : 22.296818 s : * Vectorising space-time by 4 -Grid : Message : 22.296828 s : * VComplexF size is 32 B -Grid : Message : 22.296838 s : * SINGLE precision -Grid : Message : 22.296847 s : * Using Overlapped Comms/Compute -Grid : Message : 22.296855 s : * Using GENERIC Nc WilsonKernels -Grid : Message : 22.296863 s : ***************************************************************** -Grid : Message : 24.746452 s : Called warmup -Grid : Message : 137.525756 s : Called Dw 3000 times in 1.12779e+08 us -Grid : Message : 137.525818 s : mflop/s = 1.41383e+07 -Grid : Message : 137.525831 s : mflop/s per rank = 589097 -Grid : Message : 137.525843 s : mflop/s per node = 3.53458e+06 -Grid : Message : 137.525854 s : RF GiB/s (base 2) = 28728.7 -Grid : Message : 137.525864 s : mem GiB/s (base 2) = 17955.5 -Grid : Message : 137.693645 s : norm diff 1.04885e-13 -Grid : Message : 137.965585 s : #### Dhop calls report -Grid : Message : 137.965598 s : WilsonFermion5D Number of DhopEO Calls : 6002 -Grid : Message : 137.965612 s : WilsonFermion5D TotalTime /Calls : 18899.7 us -Grid : Message : 137.965624 s : WilsonFermion5D CommTime /Calls : 16041.4 us -Grid : Message : 137.965634 s : WilsonFermion5D FaceTime /Calls : 859.705 us -Grid : Message : 137.965644 s : WilsonFermion5D ComputeTime1/Calls : 70.5881 us -Grid : Message : 137.965654 s : WilsonFermion5D ComputeTime2/Calls : 2094.8 us -Grid : Message : 137.965682 s : Average mflops/s per call : 3.87638e+09 -Grid : Message : 137.965692 s : Average mflops/s per call per rank : 1.61516e+08 -Grid : Message : 137.965702 s : Average mflops/s per call per node : 9.69095e+08 -Grid : Message : 137.965712 s : Average mflops/s per call (full) : 1.43168e+07 -Grid : Message : 137.965721 s : Average mflops/s per call per rank (full): 596533 -Grid : Message : 137.965730 s : Average mflops/s per call per node (full): 3.5792e+06 -Grid : Message : 137.965740 s : WilsonFermion5D Stencil -Grid : Message : 137.965748 s : WilsonFermion5D StencilEven -Grid : Message : 137.965756 s : WilsonFermion5D StencilOdd -Grid : Message : 137.965764 s : WilsonFermion5D Stencil Reporti() -Grid : Message : 137.965772 s : WilsonFermion5D StencilEven Reporti() -Grid : Message : 137.965780 s : WilsonFermion5D StencilOdd Reporti() -Grid : Message : 156.554605 s : Compare to naive wilson implementation Dag to verify correctness -Grid : Message : 156.554632 s : Called DwDag -Grid : Message : 156.554642 s : norm dag result 12.0421 -Grid : Message : 156.639265 s : norm dag ref 12.0421 -Grid : Message : 156.888281 s : norm dag diff 7.62057e-14 -Grid : Message : 157.609797 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec -Grid : Message : 158.208630 s : src_e0.499996 -Grid : Message : 158.162447 s : src_o0.500004 -Grid : Message : 158.267780 s : ********************************************************* -Grid : Message : 158.267791 s : * Benchmarking DomainWallFermionF::DhopEO -Grid : Message : 158.267801 s : * Vectorising space-time by 4 -Grid : Message : 158.267811 s : * SINGLE precision -Grid : Message : 158.267820 s : * Using Overlapped Comms/Compute -Grid : Message : 158.267828 s : * Using GENERIC Nc WilsonKernels -Grid : Message : 158.267836 s : ********************************************************* -Grid : Message : 216.487829 s : Deo mflop/s = 1.37283e+07 -Grid : Message : 216.487869 s : Deo mflop/s per rank 572011 -Grid : Message : 216.487881 s : Deo mflop/s per node 3.43206e+06 -Grid : Message : 216.487893 s : #### Dhop calls report -Grid : Message : 216.487903 s : WilsonFermion5D Number of DhopEO Calls : 3001 -Grid : Message : 216.487913 s : WilsonFermion5D TotalTime /Calls : 19399.6 us -Grid : Message : 216.487923 s : WilsonFermion5D CommTime /Calls : 16475.4 us -Grid : Message : 216.487933 s : WilsonFermion5D FaceTime /Calls : 972.393 us -Grid : Message : 216.487943 s : WilsonFermion5D ComputeTime1/Calls : 49.8474 us -Grid : Message : 216.487953 s : WilsonFermion5D ComputeTime2/Calls : 2089.93 us -Grid : Message : 216.488001 s : Average mflops/s per call : 5.39682e+09 -Grid : Message : 216.488011 s : Average mflops/s per call per rank : 2.24867e+08 -Grid : Message : 216.488020 s : Average mflops/s per call per node : 1.3492e+09 -Grid : Message : 216.488030 s : Average mflops/s per call (full) : 1.39479e+07 -Grid : Message : 216.488039 s : Average mflops/s per call per rank (full): 581162 -Grid : Message : 216.488048 s : Average mflops/s per call per node (full): 3.48697e+06 -Grid : Message : 216.488057 s : WilsonFermion5D Stencil -Grid : Message : 216.488065 s : WilsonFermion5D StencilEven -Grid : Message : 216.488073 s : WilsonFermion5D StencilOdd -Grid : Message : 216.488081 s : WilsonFermion5D Stencil Reporti() -Grid : Message : 216.488089 s : WilsonFermion5D StencilEven Reporti() -Grid : Message : 216.488097 s : WilsonFermion5D StencilOdd Reporti() -Grid : Message : 217.384495 s : r_e6.02113 -Grid : Message : 217.426121 s : r_o6.02096 -Grid : Message : 217.472636 s : res12.0421 -Grid : Message : 218.200068 s : norm diff 0 -Grid : Message : 218.645673 s : norm diff even 0 -Grid : Message : 218.816561 s : norm diff odd 0 +Grid : Message : MemoryManager::Init() Unified memory space +Grid : Message : MemoryManager::Init() Using cudaMallocManaged + + + + + + + +Grid : Message : 0.139000 s : ++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 0.151000 s : Testing with full communication +Grid : Message : 0.158000 s : ++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 0.165000 s : Grid Layout +Grid : Message : 0.171000 s : Global lattice size : 64 64 64 96 +Grid : Message : 0.181000 s : OpenMP threads : 6 +Grid : Message : 0.189000 s : MPI tasks : 2 2 2 3 +Grid : Message : 0.177717 s : Initialising 4d RNG +Grid : Message : 0.342461 s : Intialising parallel RNG with unique string 'The 4D RNG' +Grid : Message : 0.342483 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1 +Grid : Message : 0.370454 s : Initialising 5d RNG +Grid : Message : 3.174160 s : Intialising parallel RNG with unique string 'The 5D RNG' +Grid : Message : 3.174420 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a +Grid : Message : 22.119339 s : Drawing gauge field +Grid : Message : 38.113060 s : Random gauge initialised +Grid : Message : 38.113320 s : Applying BCs for Dirichlet Block5 [0 0 0 0 0] +Grid : Message : 38.113470 s : Applying BCs for Dirichlet Block4 [0 0 0 0] +Grid : Message : 43.906786 s : Setting up Cshift based reference diff --git a/systems/Summit/dwf16.lsf b/systems/Summit/dwf16.lsf index ef8c21a5..3242fc86 100644 --- a/systems/Summit/dwf16.lsf +++ b/systems/Summit/dwf16.lsf @@ -1,25 +1,39 @@ #!/bin/bash #BSUB -P LGT104 -#BSUB -W 2:00 +#BSUB -W 0:20 #BSUB -nnodes 16 #BSUB -J DWF + export OMP_NUM_THREADS=6 export PAMI_IBV_ADAPTER_AFFINITY=1 export PAMI_ENABLE_STRIPING=1 -export OPT="--comms-concurrent --comms-overlap " -APP="./benchmarks/Benchmark_comms_host_device --mpi 4.4.4.3 " -jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > comms.16node.log +DIR=. +source sourceme.sh -APP="./benchmarks/Benchmark_dwf_fp32 --grid 96.96.96.72 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " -jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.24.log +echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -APP="./benchmarks/Benchmark_dwf_fp32 --grid 128.128.128.96 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " -jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.32.log +VOLS=( 32.32.32.16 32.32.32.64 64.32.32.64 64.32.64.64 64.64.64.64 64.64.64.128 64.64.64.256 64.64.64.512 128.64.64.64.512) +MPI=( 1.1.1.1 1.1.1.4 2.1.1.4 2.1.2.4 2.2.2.4 2.2.2.8 2.2.2.16 2.2.2.32 4.4.2.32 ) +RANKS=( 1 4 8 16 32 64 128 256 1024) +NODES=( 1 1 2 4 8 16 32 64 128) +INTS=( 0 1 2 3 4 5 6 7 8) +for i in 5 +do + vol=${VOLS[$i]} + nodes=${NODES[$i]} + mpi=${MPI[$i]} + ranks=${RANKS[$i]} + JSRUN="jsrun --nrs $nodes -a4 -g4 -c42 -dpacked -b packed:10 --latency_priority gpu-cpu --smpiargs=-gpu" + PARAMS=" --accelerator-threads 8 --grid $vol --mpi $mpi --comms-sequential --shm 2048 --shm-mpi 0" + $JSRUN ./benchmarks/Benchmark_dwf_fp32 $PARAMS > run.v${vol}.n${nodes}.m${mpi}.seq.ker + PARAMS=" --accelerator-threads 8 --grid $vol --mpi $mpi --comms-overlap --shm 2048 --shm-mpi 0" + $JSRUN ./benchmarks/Benchmark_dwf_fp32 $PARAMS > run.v${vol}.n${nodes}.m${mpi}.over.ker +done diff --git a/systems/Summit/dwf4.lsf b/systems/Summit/dwf4.lsf index 7d940338..51560f71 100644 --- a/systems/Summit/dwf4.lsf +++ b/systems/Summit/dwf4.lsf @@ -7,16 +7,15 @@ export OMP_NUM_THREADS=6 export PAMI_IBV_ADAPTER_AFFINITY=1 export PAMI_ENABLE_STRIPING=1 +export PAMI_DISABLE_IPC=1 export OPT="--comms-concurrent --comms-overlap " -#export GRID_ALLOC_NCACHE_LARGE=1 -export APP="./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.3 " -jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > comms.4node -APP="./benchmarks/Benchmark_dwf_fp32 --grid 48.48.48.72 --mpi 2.2.2.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " -jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.24.4node -APP="./benchmarks/Benchmark_dwf_fp32 --grid 64.64.64.96 --mpi 2.2.2.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " -jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.32.4node +APP="./wrap.sh ./benchmarks/Benchmark_dwf_fp32 --grid 48.48.48.72 --mpi 2.2.2.3 --shm 1024 --device-mem 4000 --shm-force-mpi 1 $OPT " +jsrun --nrs 24 -a1 -g1 -c6 -dpacked -b packed:6 --latency_priority gpu-cpu --smpiargs="-gpu" $APP > dwf.24.4node + +APP="./wrap.sh ./benchmarks/Benchmark_comms_host_device --grid 48.48.48.72 --mpi 2.2.2.3 --shm 1024 --device-mem 4000 --shm-force-mpi 1 $OPT " +jsrun --smpiargs="-gpu" --nrs 4 -a6 -g6 -c42 -dpacked -b packed:6 $APP > comms.24.4node diff --git a/systems/Tursa/dwf.4node.perf b/systems/Tursa/dwf.4node.perf index 9073969e..5d1c1bb8 100644 --- a/systems/Tursa/dwf.4node.perf +++ b/systems/Tursa/dwf.4node.perf @@ -1,25 +1,25 @@ -tu-c0r0n00 - 0 device=0 binding=--interleave=0,1 -tu-c0r0n00 - 1 device=1 binding=--interleave=2,3 -tu-c0r0n09 - 1 device=1 binding=--interleave=2,3 -tu-c0r0n00 - 2 device=2 binding=--interleave=4,5 -tu-c0r0n06 - 0 device=0 binding=--interleave=0,1 -tu-c0r0n06 - 1 device=1 binding=--interleave=2,3 -tu-c0r0n09 - 0 device=0 binding=--interleave=0,1 -tu-c0r0n09 - 2 device=2 binding=--interleave=4,5 -tu-c0r0n03 - 1 device=1 binding=--interleave=2,3 -tu-c0r0n06 - 2 device=2 binding=--interleave=4,5 -tu-c0r0n09 - 3 device=3 binding=--interleave=6,7 -tu-c0r0n00 - 3 device=3 binding=--interleave=6,7 -tu-c0r0n03 - 0 device=0 binding=--interleave=0,1 -tu-c0r0n03 - 2 device=2 binding=--interleave=4,5 -tu-c0r0n06 - 3 device=3 binding=--interleave=6,7 -tu-c0r0n03 - 3 device=3 binding=--interleave=6,7 +tu-c0r3n00 - 0 device=0 binding=--interleave=0,1 +tu-c0r3n00 - 1 device=1 binding=--interleave=2,3 +tu-c0r3n00 - 2 device=2 binding=--interleave=4,5 +tu-c0r3n00 - 3 device=3 binding=--interleave=6,7 +tu-c0r3n06 - 1 device=1 binding=--interleave=2,3 +tu-c0r3n06 - 3 device=3 binding=--interleave=6,7 +tu-c0r3n06 - 0 device=0 binding=--interleave=0,1 +tu-c0r3n06 - 2 device=2 binding=--interleave=4,5 +tu-c0r3n03 - 1 device=1 binding=--interleave=2,3 +tu-c0r3n03 - 2 device=2 binding=--interleave=4,5 +tu-c0r3n03 - 0 device=0 binding=--interleave=0,1 +tu-c0r3n03 - 3 device=3 binding=--interleave=6,7 +tu-c0r3n09 - 0 device=0 binding=--interleave=0,1 +tu-c0r3n09 - 1 device=1 binding=--interleave=2,3 +tu-c0r3n09 - 2 device=2 binding=--interleave=4,5 +tu-c0r3n09 - 3 device=3 binding=--interleave=6,7 OPENMPI detected AcceleratorCudaInit: using default device -AcceleratorCudaInit: assume user either uses a) IBM jsrun, or +AcceleratorCudaInit: assume user either uses +AcceleratorCudaInit: a) IBM jsrun, or AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding -AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no -AcceleratorCudaInit: ================================================ +AcceleratorCudaInit: Configure options --enable-setdevice=no OPENMPI detected AcceleratorCudaInit[0]: ======================== AcceleratorCudaInit[0]: Device Number : 0 @@ -33,11 +33,41 @@ AcceleratorCudaInit[0]: pciBusID: 3 AcceleratorCudaInit[0]: pciDeviceID: 0 AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535) AcceleratorCudaInit: using default device -AcceleratorCudaInit: assume user either uses a) IBM jsrun, or +AcceleratorCudaInit: assume user either uses +AcceleratorCudaInit: a) IBM jsrun, or AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding -AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no -AcceleratorCudaInit: ================================================ +AcceleratorCudaInit: Configure options --enable-setdevice=no OPENMPI detected +AcceleratorCudaInit: using default device +AcceleratorCudaInit: assume user either uses +AcceleratorCudaInit: a) IBM jsrun, or +AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding +AcceleratorCudaInit: Configure options --enable-setdevice=no +OPENMPI detected +AcceleratorCudaInit: using default device +AcceleratorCudaInit: assume user either uses +AcceleratorCudaInit: a) IBM jsrun, or +AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding +AcceleratorCudaInit: Configure options --enable-setdevice=no +OPENMPI detected +AcceleratorCudaInit: using default device +AcceleratorCudaInit: assume user either uses +AcceleratorCudaInit: a) IBM jsrun, or +AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding +AcceleratorCudaInit: Configure options --enable-setdevice=no +OPENMPI detected +OPENMPI detected +AcceleratorCudaInit: using default device +AcceleratorCudaInit: assume user either uses +AcceleratorCudaInit: a) IBM jsrun, or +AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding +AcceleratorCudaInit: Configure options --enable-setdevice=no +OPENMPI detected +AcceleratorCudaInit: using default device +AcceleratorCudaInit: assume user either uses +AcceleratorCudaInit: a) IBM jsrun, or +AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding +AcceleratorCudaInit: Configure options --enable-setdevice=no AcceleratorCudaInit[0]: ======================== AcceleratorCudaInit[0]: Device Number : 0 AcceleratorCudaInit[0]: ======================== @@ -50,43 +80,25 @@ AcceleratorCudaInit[0]: pciBusID: 3 AcceleratorCudaInit[0]: pciDeviceID: 0 AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535) AcceleratorCudaInit: using default device -AcceleratorCudaInit: assume user either uses a) IBM jsrun, or +AcceleratorCudaInit: assume user either uses +AcceleratorCudaInit: a) IBM jsrun, or AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding -AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no +AcceleratorCudaInit: Configure options --enable-setdevice=no +local rank 1 device 0 bus id: 0000:44:00.0 AcceleratorCudaInit: ================================================ -OPENMPI detected -AcceleratorCudaInit: using default device -AcceleratorCudaInit: assume user either uses a) IBM jsrun, or -AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding -AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no +local rank 0 device 0 bus id: 0000:03:00.0 AcceleratorCudaInit: ================================================ -OPENMPI detected -AcceleratorCudaInit: using default device -AcceleratorCudaInit: assume user either uses a) IBM jsrun, or -AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding -AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no AcceleratorCudaInit: ================================================ -OPENMPI detected -AcceleratorCudaInit: using default device -AcceleratorCudaInit: assume user either uses a) IBM jsrun, or -AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding -AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no AcceleratorCudaInit: ================================================ -OPENMPI detected -AcceleratorCudaInit: using default device -AcceleratorCudaInit: assume user either uses a) IBM jsrun, or -AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding -AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no AcceleratorCudaInit: ================================================ -OPENMPI detected -AcceleratorCudaInit: using default device -AcceleratorCudaInit: assume user either uses a) IBM jsrun, or -AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding -AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no AcceleratorCudaInit: ================================================ +local rank 0 device 0 bus id: 0000:03:00.0 +AcceleratorCudaInit: ================================================ +AcceleratorCudaInit: ================================================ +local rank 2 device 0 bus id: 0000:84:00.0 SharedMemoryMpi: World communicator of size 16 SharedMemoryMpi: Node communicator of size 4 -0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x7fcd80000000 for comms buffers +0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x153960000000 for comms buffers Setting up IPC __|__|__|__|__|__|__|__|__|__|__|__|__|__|__ @@ -116,7 +128,7 @@ This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -Current Grid git commit hash=9d2238148c56e3fbadfa95dcabf2b83d4bde14cd: (HEAD -> develop) uncommited changes +Current Grid git commit hash=da06d15f73184ceb15d66d4e7e702b02fed7b940: (HEAD -> feature/dirichlet, develop) uncommited changes Grid : Message : ================================================ Grid : Message : MPI is initialised and logging filters activated @@ -124,122 +136,102 @@ Grid : Message : ================================================ Grid : Message : Requested 2147483648 byte stencil comms buffers Grid : Message : MemoryManager Cache 34004218675 bytes Grid : Message : MemoryManager::Init() setting up -Grid : Message : MemoryManager::Init() cache pool for recent allocations: SMALL 32 LARGE 8 +Grid : Message : MemoryManager::Init() cache pool for recent allocations: SMALL 8 LARGE 2 Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory Grid : Message : MemoryManager::Init() Using cudaMalloc -Grid : Message : 1.198523 s : Grid Layout -Grid : Message : 1.198530 s : Global lattice size : 64 64 64 64 -Grid : Message : 1.198534 s : OpenMP threads : 4 -Grid : Message : 1.198535 s : MPI tasks : 2 2 2 2 -Grid : Message : 1.397615 s : Making s innermost grids -Grid : Message : 1.441828 s : Initialising 4d RNG -Grid : Message : 1.547973 s : Intialising parallel RNG with unique string 'The 4D RNG' -Grid : Message : 1.547998 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1 -Grid : Message : 1.954777 s : Initialising 5d RNG -Grid : Message : 3.633825 s : Intialising parallel RNG with unique string 'The 5D RNG' -Grid : Message : 3.633869 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a -Grid : Message : 12.162710 s : Initialised RNGs -Grid : Message : 15.882520 s : Drawing gauge field -Grid : Message : 15.816362 s : Random gauge initialised -Grid : Message : 17.279671 s : Setting up Cshift based reference -Grid : Message : 26.331426 s : ***************************************************************** -Grid : Message : 26.331452 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm -Grid : Message : 26.331454 s : ***************************************************************** -Grid : Message : 26.331456 s : ***************************************************************** -Grid : Message : 26.331458 s : * Benchmarking DomainWallFermionR::Dhop -Grid : Message : 26.331459 s : * Vectorising space-time by 8 -Grid : Message : 26.331463 s : * VComplexF size is 64 B -Grid : Message : 26.331465 s : * SINGLE precision -Grid : Message : 26.331467 s : * Using Overlapped Comms/Compute -Grid : Message : 26.331468 s : * Using GENERIC Nc WilsonKernels -Grid : Message : 26.331469 s : ***************************************************************** -Grid : Message : 28.413717 s : Called warmup -Grid : Message : 56.418423 s : Called Dw 3000 times in 2.80047e+07 us -Grid : Message : 56.418476 s : mflop/s = 3.79581e+07 -Grid : Message : 56.418479 s : mflop/s per rank = 2.37238e+06 -Grid : Message : 56.418481 s : mflop/s per node = 9.48953e+06 -Grid : Message : 56.418483 s : RF GiB/s (base 2) = 77130 -Grid : Message : 56.418485 s : mem GiB/s (base 2) = 48206.3 -Grid : Message : 56.422076 s : norm diff 1.03481e-13 -Grid : Message : 56.456894 s : #### Dhop calls report -Grid : Message : 56.456899 s : WilsonFermion5D Number of DhopEO Calls : 6002 -Grid : Message : 56.456903 s : WilsonFermion5D TotalTime /Calls : 4710.93 us -Grid : Message : 56.456905 s : WilsonFermion5D CommTime /Calls : 3196.15 us -Grid : Message : 56.456908 s : WilsonFermion5D FaceTime /Calls : 494.392 us -Grid : Message : 56.456910 s : WilsonFermion5D ComputeTime1/Calls : 44.4107 us -Grid : Message : 56.456912 s : WilsonFermion5D ComputeTime2/Calls : 1037.75 us -Grid : Message : 56.456921 s : Average mflops/s per call : 3.55691e+09 -Grid : Message : 56.456925 s : Average mflops/s per call per rank : 2.22307e+08 -Grid : Message : 56.456928 s : Average mflops/s per call per node : 8.89228e+08 -Grid : Message : 56.456930 s : Average mflops/s per call (full) : 3.82915e+07 -Grid : Message : 56.456933 s : Average mflops/s per call per rank (full): 2.39322e+06 -Grid : Message : 56.456952 s : Average mflops/s per call per node (full): 9.57287e+06 -Grid : Message : 56.456954 s : WilsonFermion5D Stencil -Grid : Message : 56.457016 s : Stencil calls 3001 -Grid : Message : 56.457022 s : Stencil halogtime 0 -Grid : Message : 56.457024 s : Stencil gathertime 55.9154 -Grid : Message : 56.457026 s : Stencil gathermtime 20.1073 -Grid : Message : 56.457028 s : Stencil mergetime 18.5585 -Grid : Message : 56.457030 s : Stencil decompresstime 0.0639787 -Grid : Message : 56.457032 s : Stencil comms_bytes 4.02653e+08 -Grid : Message : 56.457034 s : Stencil commtime 6379.93 -Grid : Message : 56.457036 s : Stencil 63.1124 GB/s per rank -Grid : Message : 56.457038 s : Stencil 252.45 GB/s per node -Grid : Message : 56.457040 s : WilsonFermion5D StencilEven -Grid : Message : 56.457048 s : WilsonFermion5D StencilOdd -Grid : Message : 56.457062 s : WilsonFermion5D Stencil Reporti() -Grid : Message : 56.457065 s : WilsonFermion5D StencilEven Reporti() -Grid : Message : 56.457066 s : WilsonFermion5D StencilOdd Reporti() -Grid : Message : 79.259261 s : Compare to naive wilson implementation Dag to verify correctness -Grid : Message : 79.259287 s : Called DwDag -Grid : Message : 79.259288 s : norm dag result 12.0421 -Grid : Message : 79.271740 s : norm dag ref 12.0421 -Grid : Message : 79.287759 s : norm dag diff 7.63236e-14 -Grid : Message : 79.328100 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec -Grid : Message : 79.955951 s : src_e0.499997 -Grid : Message : 80.633620 s : src_o0.500003 -Grid : Message : 80.164163 s : ********************************************************* -Grid : Message : 80.164168 s : * Benchmarking DomainWallFermionF::DhopEO -Grid : Message : 80.164170 s : * Vectorising space-time by 8 -Grid : Message : 80.164172 s : * SINGLE precision -Grid : Message : 80.164174 s : * Using Overlapped Comms/Compute -Grid : Message : 80.164177 s : * Using GENERIC Nc WilsonKernels -Grid : Message : 80.164178 s : ********************************************************* -Grid : Message : 93.797635 s : Deo mflop/s = 3.93231e+07 -Grid : Message : 93.797670 s : Deo mflop/s per rank 2.45769e+06 -Grid : Message : 93.797672 s : Deo mflop/s per node 9.83077e+06 -Grid : Message : 93.797674 s : #### Dhop calls report -Grid : Message : 93.797675 s : WilsonFermion5D Number of DhopEO Calls : 3001 -Grid : Message : 93.797677 s : WilsonFermion5D TotalTime /Calls : 4542.83 us -Grid : Message : 93.797679 s : WilsonFermion5D CommTime /Calls : 2978.97 us -Grid : Message : 93.797681 s : WilsonFermion5D FaceTime /Calls : 602.287 us -Grid : Message : 93.797683 s : WilsonFermion5D ComputeTime1/Calls : 67.1416 us -Grid : Message : 93.797685 s : WilsonFermion5D ComputeTime2/Calls : 1004.07 us -Grid : Message : 93.797713 s : Average mflops/s per call : 3.30731e+09 -Grid : Message : 93.797717 s : Average mflops/s per call per rank : 2.06707e+08 -Grid : Message : 93.797719 s : Average mflops/s per call per node : 8.26827e+08 -Grid : Message : 93.797721 s : Average mflops/s per call (full) : 3.97084e+07 -Grid : Message : 93.797727 s : Average mflops/s per call per rank (full): 2.48178e+06 -Grid : Message : 93.797732 s : Average mflops/s per call per node (full): 9.92711e+06 -Grid : Message : 93.797735 s : WilsonFermion5D Stencil -Grid : Message : 93.797746 s : WilsonFermion5D StencilEven -Grid : Message : 93.797758 s : WilsonFermion5D StencilOdd -Grid : Message : 93.797769 s : Stencil calls 3001 -Grid : Message : 93.797773 s : Stencil halogtime 0 -Grid : Message : 93.797776 s : Stencil gathertime 56.7458 -Grid : Message : 93.797780 s : Stencil gathermtime 22.6504 -Grid : Message : 93.797782 s : Stencil mergetime 21.1913 -Grid : Message : 93.797786 s : Stencil decompresstime 0.0556481 -Grid : Message : 93.797788 s : Stencil comms_bytes 2.01327e+08 -Grid : Message : 93.797791 s : Stencil commtime 2989.33 -Grid : Message : 93.797795 s : Stencil 67.3484 GB/s per rank -Grid : Message : 93.797798 s : Stencil 269.394 GB/s per node -Grid : Message : 93.797801 s : WilsonFermion5D Stencil Reporti() -Grid : Message : 93.797803 s : WilsonFermion5D StencilEven Reporti() -Grid : Message : 93.797805 s : WilsonFermion5D StencilOdd Reporti() -Grid : Message : 93.873429 s : r_e6.02111 -Grid : Message : 93.879931 s : r_o6.02102 -Grid : Message : 93.885912 s : res12.0421 -Grid : Message : 94.876555 s : norm diff 0 -Grid : Message : 95.485643 s : norm diff even 0 -Grid : Message : 95.581236 s : norm diff odd 0 +Grid : Message : 1.875883 s : Grid Layout +Grid : Message : 1.875893 s : Global lattice size : 64 64 64 64 +Grid : Message : 1.875897 s : OpenMP threads : 4 +Grid : Message : 1.875898 s : MPI tasks : 2 2 2 2 +Grid : Message : 1.993571 s : Initialising 4d RNG +Grid : Message : 2.881990 s : Intialising parallel RNG with unique string 'The 4D RNG' +Grid : Message : 2.882370 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1 +Grid : Message : 2.495044 s : Initialising 5d RNG +Grid : Message : 4.120900 s : Intialising parallel RNG with unique string 'The 5D RNG' +Grid : Message : 4.121350 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a +Grid : Message : 15.268010 s : Drawing gauge field +Grid : Message : 16.234025 s : Random gauge initialised +Grid : Message : 16.234057 s : Applying BCs +Grid : Message : 16.365565 s : Setting up Cshift based reference +Grid : Message : 44.512418 s : ***************************************************************** +Grid : Message : 44.512448 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm +Grid : Message : 44.512450 s : ***************************************************************** +Grid : Message : 44.512451 s : ***************************************************************** +Grid : Message : 44.512452 s : * Benchmarking DomainWallFermionR::Dhop +Grid : Message : 44.512453 s : * Vectorising space-time by 8 +Grid : Message : 44.512454 s : * VComplexF size is 64 B +Grid : Message : 44.512456 s : * SINGLE precision +Grid : Message : 44.512459 s : * Using Overlapped Comms/Compute +Grid : Message : 44.512460 s : * Using GENERIC Nc WilsonKernels +Grid : Message : 44.512461 s : ***************************************************************** +Grid : Message : 46.389070 s : Called warmup +Grid : Message : 49.211265 s : Called Dw 300 times in 2.82203e+06 us +Grid : Message : 49.211295 s : mflop/s = 3.76681e+07 +Grid : Message : 49.211297 s : mflop/s per rank = 2.35425e+06 +Grid : Message : 49.211299 s : mflop/s per node = 9.41702e+06 +Grid : Message : 49.211301 s : RF GiB/s (base 2) = 76540.6 +Grid : Message : 49.211308 s : mem GiB/s (base 2) = 47837.9 +Grid : Message : 49.214868 s : norm diff 1.06409e-13 +Grid : Message : 92.647781 s : Compare to naive wilson implementation Dag to verify correctness +Grid : Message : 92.647816 s : Called DwDag +Grid : Message : 92.647817 s : norm dag result 12.0421 +Grid : Message : 92.801806 s : norm dag ref 12.0421 +Grid : Message : 92.817724 s : norm dag diff 7.21921e-14 +Grid : Message : 92.858973 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec +Grid : Message : 93.210378 s : src_e0.499997 +Grid : Message : 93.583286 s : src_o0.500003 +Grid : Message : 93.682468 s : ********************************************************* +Grid : Message : 93.682471 s : * Benchmarking DomainWallFermionF::DhopEO +Grid : Message : 93.682472 s : * Vectorising space-time by 8 +Grid : Message : 93.682473 s : * SINGLE precision +Grid : Message : 93.682475 s : * Using Overlapped Comms/Compute +Grid : Message : 93.682476 s : * Using GENERIC Nc WilsonKernels +Grid : Message : 93.682477 s : ********************************************************* +Grid : Message : 95.162342 s : Deo mflop/s = 3.92487e+07 +Grid : Message : 95.162387 s : Deo mflop/s per rank 2.45305e+06 +Grid : Message : 95.162389 s : Deo mflop/s per node 9.81219e+06 +Grid : Message : 95.232801 s : r_e6.02111 +Grid : Message : 95.240061 s : r_o6.02102 +Grid : Message : 95.245975 s : res12.0421 +Grid : Message : 95.833402 s : norm diff 0 +Grid : Message : 96.573829 s : norm diff even 0 +Grid : Message : 96.868272 s : norm diff odd 0 + Dirichlet block [0 64 64 32 32] +Grid : Message : 97.756909 s : Grid Layout +Grid : Message : 97.756911 s : Global lattice size : 64 64 64 64 +Grid : Message : 97.756921 s : OpenMP threads : 4 +Grid : Message : 97.756922 s : MPI tasks : 2 2 2 2 +Grid : Message : 97.897085 s : Initialising 4d RNG +Grid : Message : 97.965061 s : Intialising parallel RNG with unique string 'The 4D RNG' +Grid : Message : 97.965097 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1 +Grid : Message : 98.367431 s : Initialising 5d RNG +Grid : Message : 99.752745 s : Intialising parallel RNG with unique string 'The 5D RNG' +Grid : Message : 99.752790 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a +Grid : Message : 111.290148 s : Drawing gauge field +Grid : Message : 112.349289 s : Random gauge initialised +Grid : Message : 112.349320 s : Applying BCs +Grid : Message : 113.948740 s : Setting up Cshift based reference +Grid : Message : 140.320415 s : ***************************************************************** +Grid : Message : 140.320443 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm +Grid : Message : 140.320444 s : ***************************************************************** +Grid : Message : 140.320445 s : ***************************************************************** +Grid : Message : 140.320446 s : * Benchmarking DomainWallFermionR::Dhop +Grid : Message : 140.320447 s : * Vectorising space-time by 8 +Grid : Message : 140.320448 s : * VComplexF size is 64 B +Grid : Message : 140.320450 s : * SINGLE precision +Grid : Message : 140.320451 s : * Using Overlapped Comms/Compute +Grid : Message : 140.320452 s : * Using GENERIC Nc WilsonKernels +Grid : Message : 140.320453 s : ***************************************************************** +Grid : Message : 142.296150 s : Called warmup +Grid : Message : 144.397678 s : Called Dw 300 times in 2.36719e+06 us +Grid : Message : 144.397700 s : mflop/s = 4.49058e+07 +Grid : Message : 144.397702 s : mflop/s per rank = 2.80661e+06 +Grid : Message : 144.397704 s : mflop/s per node = 1.12265e+07 +Grid : Message : 144.397706 s : RF GiB/s (base 2) = 91247.6 +Grid : Message : 144.397708 s : mem GiB/s (base 2) = 57029.7 +Grid : Message : 144.401269 s : norm diff 9.78944e-14 +Grid : Message : 186.885460 s : Compare to naive wilson implementation Dag to verify correctness +Grid : Message : 186.885492 s : Called DwDag +Grid : Message : 186.885493 s : norm dag result 10.4157 +Grid : Message : 186.897154 s : norm dag ref 11.2266 +Grid : Message : 186.912538 s : norm dag diff 0.484633 diff --git a/systems/Tursa/dwf4.slurm b/systems/Tursa/dwf4.slurm index 65191398..5940ac05 100644 --- a/systems/Tursa/dwf4.slurm +++ b/systems/Tursa/dwf4.slurm @@ -1,14 +1,13 @@ #!/bin/bash #SBATCH -J dslash -#SBATCH -A tc002 -#SBATCH -t 2:20:00 -#SBATCH --nodelist=tu-c0r0n[00,03,06,09] +#SBATCH -A dp207 #SBATCH --exclusive #SBATCH --nodes=4 #SBATCH --ntasks=16 +#SBATCH --qos=standard #SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=8 -#SBATCH --time=12:00:00 +#SBATCH --time=0:05:00 #SBATCH --partition=gpu #SBATCH --gres=gpu:4 #SBATCH --output=%x.%j.out diff --git a/tests/IO/Test_field_array_io.cc b/tests/IO/Test_field_array_io.cc new file mode 100644 index 00000000..51ea7893 --- /dev/null +++ b/tests/IO/Test_field_array_io.cc @@ -0,0 +1,184 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/IO/Test_field_array_io.cc + + Copyright (C) 2015 + +Author: Christopher Kelly +Author: Peter Boyle + + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +//This test demonstrates and checks a single-file write of an arbitrary array of fields + +uint64_t writeHeader(const uint32_t size, const uint32_t checksum, const std::string &format, const std::string &file){ + std::ofstream fout(file,std::ios::out|std::ios::in); + fout.seekp(0,std::ios::beg); + fout << std::setw(10) << size << std::endl; + fout << std::hex << std::setw(10) << checksum << std::endl; + fout << format << std::endl; + return fout.tellp(); +} + +uint64_t readHeader(uint32_t &size, uint32_t &checksum, std::string &format, const std::string &file){ + std::ifstream fin(file); + std::string line; + getline(fin,line); + { + std::stringstream ss; ss <> size; + } + getline(fin,line); + { + std::stringstream ss; ss <> std::hex >> checksum; + } + getline(fin,format); + removeWhitespace(format); + + return fin.tellg(); +} + +template +void writeFieldArray(const std::string &file, const std::vector &data){ + typedef typename FieldType::vector_object vobj; + typedef typename FieldType::scalar_object sobj; + GridBase* grid = data[0].Grid(); //assume all fields have the same Grid + BinarySimpleMunger munge; //straight copy + + //We need a 2-pass header write, first to establish the size, the second pass writes the checksum + std::string format = getFormatString(); + + uint64_t offset; //leave 64 bits for header + if ( grid->IsBoss() ) { + NerscIO::truncate(file); + offset = writeHeader(data.size(), 0, format, file); + } + grid->Broadcast(0,(void *)&offset,sizeof(offset)); //use as a barrier + + std::cout << "Data offset write " << offset << std::endl; + std::cout << "Data size write " << data.size() << std::endl; + uint64_t field_size = uint64_t(grid->gSites()) * sizeof(sobj); + std::cout << "Field size = " << field_size << " B" << std::endl; + + uint32_t checksum = 0; + for(int i=0;i(const_cast(data[i]),file,munge,offset,format, + nersc_csum,scidac_csuma,scidac_csumb); + offset += field_size; + checksum ^= nersc_csum + 0x9e3779b9 + (checksum<<6) + (checksum>>2); + } + std::cout << "Write checksum " << checksum << std::endl; + + if ( grid->IsBoss() ) { + writeHeader(data.size(), checksum, format, file); + } +} + + +template +void readFieldArray(std::vector &data, const std::string &file){ + typedef typename FieldType::vector_object vobj; + typedef typename FieldType::scalar_object sobj; + assert(data.size() > 0); + GridBase* grid = data[0].Grid(); //assume all fields have the same Grid + BinarySimpleUnmunger munge; //straight copy + + uint32_t hdr_checksum, hdr_size; + std::string format; + uint64_t offset = readHeader(hdr_size, hdr_checksum, format, file); + + std::cout << "Data offset read " << offset << std::endl; + std::cout << "Data size read " << hdr_size << std::endl; + assert(data.size() == hdr_size); + + uint64_t field_size = uint64_t(grid->gSites()) * sizeof(sobj); + + uint32_t checksum = 0; + + for(int i=0;i(data[i],file,munge,offset,format, + nersc_csum,scidac_csuma,scidac_csumb); + offset += field_size; + checksum ^= nersc_csum + 0x9e3779b9 + (checksum<<6) + (checksum>>2); + } + + std::cout << "Header checksum " << hdr_checksum << std::endl; + std::cout << "Read checksum " << checksum << std::endl; + + + assert( hdr_checksum == checksum ); +} + + + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + Coordinate latt = GridDefaultLatt(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); + + const int Ls=8; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt, simd_layout, mpi_layout); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + typedef DomainWallFermionD::FermionField FermionField; + + int nfield = 20; + std::vector data(nfield, FGrid); + + for(int i=0;i data_r(nfield, FGrid); + readFieldArray(data_r, file); + + for(int i=0;i using namespace std; using namespace Grid; - ; - -template -struct scal { - d internal; -}; - - Gamma::Algebra Gmu [] = { - Gamma::Algebra::GammaX, - Gamma::Algebra::GammaY, - Gamma::Algebra::GammaZ, - Gamma::Algebra::GammaT - }; template void TestWhat(What & Ddwf, @@ -86,10 +73,15 @@ int main (int argc, char ** argv) RealD M5 =1.8; std::cout<(Ddwf,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); + std::vector boundary = {1,1,1,-1}; + DomainWallFermionD::ImplParams Params(boundary); + // Coordinate Dirichlet({0,8,8,16,32}); + // Params.dirichlet=Dirichlet; + + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,Params); + TestWhat(Ddwf,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); RealD b=1.5;// Scale factor b+c=2, b-c=1 RealD c=0.5; @@ -97,54 +89,54 @@ int main (int argc, char ** argv) std::cout<(Dmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); + MobiusFermionD Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); + TestWhat(Dmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); std::cout< gamma(Ls,std::complex(1.0,0.0)); - ZMobiusFermionR zDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,gamma,b,c); - TestWhat(zDmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); + ZMobiusFermionD zDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,gamma,b,c); + TestWhat(zDmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dzolo,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); + TestWhat(Dzolo,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dsham,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); + TestWhat(Dsham,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dshamz,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); + TestWhat(Dshamz,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dov,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); + TestWhat(Dov,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dovz,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); + TestWhat(Dovz,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); std::cout< HermOp(Ddwf); + ZMobiusFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); + SchurDiagTwoOperator HermOp(Ddwf); // Eigenvector storage LanczosParams fine =Params.FineParams; diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc index da0b54cd..cbc573d1 100644 --- a/tests/Test_dwf_mixedcg_prec.cc +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -46,7 +46,7 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); - const int Ls=8; + const int Ls=12; std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl; @@ -94,13 +94,40 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl; MixedPrecisionConjugateGradient mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); - mCG(src_o,result_o); - + double t1,t2,flops; + double MdagMsiteflops = 1452; // Mobius (real coeffs) + // CG overhead: 8 inner product, 4+8 axpy_norm, 4+4 linear comb (2 of) + double CGsiteflops = (8+4+8+4+4)*Nc*Ns ; + std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops< CG(1.0e-8,10000); - CG(HermOpEO,src_o,result_o_2); - - MemoryManager::Print(); + for(int i=0;i<1;i++){ + result_o_2 = Zero(); + t1=usecond(); + CG(HermOpEO,src_o,result_o_2); + t2=usecond(); + iters = CG.IterationsToComplete; + flops = MdagMsiteflops*4*FrbGrid->gSites()*iters; + flops+= CGsiteflops*FrbGrid->gSites()*iters; + + std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.< +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ + +//Test the shifting of the gauge field that respects the boundary conditions +#include + +using namespace Grid; + ; + +typedef ConjugateGimplR Gimpl; //can choose periodic / charge conjugate directions at wil +typedef Gimpl::GaugeField GaugeField; +typedef Gimpl::GaugeLinkField GaugeLinkField; +typedef Gimpl::SiteGaugeField SiteGaugeField; +typedef Gimpl::SiteGaugeLink SiteGaugeLink; + +GaugeField CshiftGaugeField(const GaugeField &U, const int dir, const int shift){ + GridBase *Grid = U.Grid(); + + GaugeField out(Grid); + GaugeLinkField Umu(Grid); + for(int mu=0;muNd();mu++){ + Umu = PeekIndex(U, mu); + Umu = Gimpl::CshiftLink(Umu,dir,shift); + PokeIndex(out,Umu,mu); + } + return out; +} + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + auto latt_size = GridDefaultLatt(); + auto simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); + auto mpi_layout = GridDefaultMpi(); + + std::vector conj_dirs = {1,1,0,0}; + Gimpl::setDirections(conj_dirs); + + GridCartesian Fine(latt_size,simd_layout,mpi_layout); + + GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + + + GaugeField U(&Fine); + GaugeField ShiftU(&Fine); + + GaugeLinkField link_field(&Fine), link_field_2(&Fine); + + //Like Test_cshift we put the lex coordinate index on each site but make it imaginary + //so we can tell when it was complex conjugated + LatticeComplex lex(&Fine); + lex=Zero(); + U = Zero(); + { + LatticeComplex coor(&Fine); + Integer stride =1; + for(int d=0;d<4;d++){ + LatticeCoordinate(coor,d); + lex = lex + coor*stride; + stride=stride*latt_size[d]; + } + PokeIndex(link_field, lex, 0,0); //place on 0,0 element of link + + for(int mu=0;mu(U, link_field_2, mu); + } + } + + std::stringstream ss; + ss<<"error"; + for(int d=0;d 0 && coor[dir] >= latt_size[dir]-shift && conj_dirs[dir] ) + || + ( shift < 0 && coor[dir] <= -shift-1 && conj_dirs[dir] ) + ) + scm = conjugate(scm); //CC if pulled over boundary + + cm = um(mu)()(0,0); + + RealD nrm = abs(scm-cm()()()); + //std::cout << cm << " " << scm << std::endl; + + Coordinate peer(4); + Complex tmp =cm; + Integer index=real(tmp); + + Integer cm_mu = index / vol4d; + index = index % vol4d; + Lexicographic::CoorFromIndex(peer,index,latt_size); + + if (nrm > 0){ + ferr<<"FAIL mu " << mu << " shift "<< shift<<" in dir "<< dir<<" ["< using namespace std; using namespace Grid; - ; int main(int argc, char ** argv) { Grid_init(&argc, &argv); @@ -80,7 +79,8 @@ int main(int argc, char ** argv) { Foo=lex; } - typedef CartesianStencil Stencil; + typedef CartesianStencil Stencil; + SimpleStencilParams p; for(int dir=0;dir<4;dir++){ for(int disp=0;disp directions(npoint,dir); std::vector displacements(npoint,disp); - Stencil myStencil(&Fine,npoint,0,directions,displacements,0); + Stencil myStencil(&Fine,npoint,0,directions,displacements,p); Coordinate ocoor(4); for(int o=0;o directions(npoint,dir); std::vector displacements(npoint,disp); - Stencil EStencil(&rbFine,npoint,Even,directions,displacements,0); - Stencil OStencil(&rbFine,npoint,Odd,directions,displacements,0); + Stencil EStencil(&rbFine,npoint,Even,directions,displacements,p); + Stencil OStencil(&rbFine,npoint,Odd,directions,displacements,p); Coordinate ocoor(4); for(int o=0;o HermIndefOp(Dcf); + OverlapWilsonContFracTanhFermionD Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + HermitianLinearOperator HermIndefOp(Dcf); HermIndefOp.Op(src,ref); HermIndefOp.OpDiag(src,result); @@ -92,8 +92,8 @@ int main (int argc, char ** argv) } { - OverlapWilsonPartialFractionTanhFermionR Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); - HermitianLinearOperator HermIndefOp(Dpf); + OverlapWilsonPartialFractionTanhFermionD Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + HermitianLinearOperator HermIndefOp(Dpf); HermIndefOp.Op(src,ref); HermIndefOp.OpDiag(src,result); diff --git a/tests/core/Test_checker.cc b/tests/core/Test_checker.cc index f87133e5..c2382e91 100644 --- a/tests/core/Test_checker.cc +++ b/tests/core/Test_checker.cc @@ -140,14 +140,14 @@ int main (int argc, char ** argv) // RealD mass=0.1; // RealD M5=1.8; - // DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + // DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); // LatticeFermion src_o(FrbGrid); // LatticeFermion result_o(FrbGrid); // pickCheckerboard(Odd,src_o,src); // result_o=Zero(); - // SchurDiagMooeeOperator HermOpEO(Ddwf); + // SchurDiagMooeeOperator HermOpEO(Ddwf); // ConjugateGradient CG(1.0e-8,10000); // CG(HermOpEO,src_o,result_o); diff --git a/tests/core/Test_compact_wilson_clover_speedup.cc b/tests/core/Test_compact_wilson_clover_speedup.cc index 7a74ab19..695c8061 100644 --- a/tests/core/Test_compact_wilson_clover_speedup.cc +++ b/tests/core/Test_compact_wilson_clover_speedup.cc @@ -53,7 +53,7 @@ static int readInt(int* argc, char*** argv, std::string&& option, int defaultVal static float readFloat(int* argc, char*** argv, std::string&& option, float defaultValue) { std::string arg; - float ret = defaultValue; + double ret = defaultValue; if(checkPresent(argc, argv, option)) { arg = getContent(argc, argv, option); GridCmdOptionFloat(arg, ret); diff --git a/tests/core/Test_contfrac_even_odd.cc b/tests/core/Test_contfrac_even_odd.cc index 42bfe361..5731719a 100644 --- a/tests/core/Test_contfrac_even_odd.cc +++ b/tests/core/Test_contfrac_even_odd.cc @@ -76,20 +76,20 @@ int main (int argc, char ** argv) RealD M5 =1.8; std::cout<(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + OverlapWilsonContFracTanhFermionD Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + TestWhat(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + OverlapWilsonContFracZolotarevFermionD Dcfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0); + TestWhat(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dpf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + OverlapWilsonPartialFractionTanhFermionD Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + TestWhat(Dpf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dpfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + OverlapWilsonPartialFractionZolotarevFermionD Dpfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0); + TestWhat(Dpfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); Grid_finalize(); } diff --git a/tests/core/Test_dwf_eofa_even_odd.cc b/tests/core/Test_dwf_eofa_even_odd.cc index 7812ebb8..532c740a 100644 --- a/tests/core/Test_dwf_eofa_even_odd.cc +++ b/tests/core/Test_dwf_eofa_even_odd.cc @@ -90,7 +90,7 @@ int main (int argc, char ** argv) RealD shift = 0.1234; RealD M5 = 1.8; int pm = 1; - DomainWallEOFAFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mq1, mq2, mq3, shift, pm, M5); + DomainWallEOFAFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mq1, mq2, mq3, shift, pm, M5); LatticeFermion src_e (FrbGrid); LatticeFermion src_o (FrbGrid); @@ -216,7 +216,7 @@ int main (int argc, char ** argv) pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Odd , phi_o, phi); - SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO(Ddwf); HermOpEO.MpcDagMpc(chi_e, dchi_e); HermOpEO.MpcDagMpc(chi_o, dchi_o); diff --git a/tests/core/Test_dwf_even_odd.cc b/tests/core/Test_dwf_even_odd.cc index 924eb3b7..f915b439 100644 --- a/tests/core/Test_dwf_even_odd.cc +++ b/tests/core/Test_dwf_even_odd.cc @@ -86,7 +86,7 @@ int main (int argc, char ** argv) RealD mass=0.1; RealD M5 =1.8; - DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); LatticeFermion src_e (FrbGrid); LatticeFermion src_o (FrbGrid); @@ -213,7 +213,7 @@ int main (int argc, char ** argv) pickCheckerboard(Even,phi_e,phi); pickCheckerboard(Odd ,phi_o,phi); - SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO(Ddwf); HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_o,dchi_o); diff --git a/tests/core/Test_fft_gfix.cc b/tests/core/Test_fft_gfix.cc index 87dbc242..6d617e25 100644 --- a/tests/core/Test_fft_gfix.cc +++ b/tests/core/Test_fft_gfix.cc @@ -29,14 +29,10 @@ Author: Peter Boyle #include using namespace Grid; - ; -int main (int argc, char ** argv) -{ +template +void run(double alpha, bool do_fft_gfix){ std::vector seeds({1,2,3,4}); - - Grid_init(&argc,&argv); - int threads = GridThread::GetThreads(); Coordinate latt_size = GridDefaultLatt(); @@ -55,10 +51,7 @@ int main (int argc, char ** argv) FFT theFFT(&GRID); std::cout<::ColdConfiguration(pRNG,Umu); // Unit gauge Uorg=Umu; + + Real init_plaq=WilsonLoops::avgPlaquette(Umu); + std::cout << " Initial plaquette "<< init_plaq << std::endl; + + //Apply a random gauge transformation to the unit gauge config Urnd=Umu; + SU::RandomGaugeTransform(pRNG,Urnd,g); - SU::RandomGaugeTransform(pRNG,Urnd,g); // Unit gauge - - Real plaq=WilsonLoops::avgPlaquette(Umu); - std::cout << " Initial plaquette "<::SteepestDescentGaugeFix(Umu,xform1,alpha,10000,1.0e-12, 1.0e-12,false); + FourierAcceleratedGaugeFixer::SteepestDescentGaugeFix(Umu,xform1,alpha,10000,1.0e-12, 1.0e-12,false); // Check the gauge xform matrices Utmp=Urnd; - SU::GaugeTransform(Utmp,xform1); + SU::GaugeTransform(Utmp,xform1); Utmp = Utmp - Umu; - std::cout << " Norm Difference of xformed gauge "<< norm2(Utmp) << std::endl; + std::cout << " Check the output gauge transformation matrices applied to the original field produce the xformed field "<< norm2(Utmp) << " (expect 0)" << std::endl; - plaq=WilsonLoops::avgPlaquette(Umu); - std::cout << " Final plaquette "<::avgPlaquette(Umu); + std::cout << " Final plaquette "<::SteepestDescentGaugeFix(Umu,xform2,alpha,10000,1.0e-12, 1.0e-12,true); + + Utmp=Urnd; + SU::GaugeTransform(Utmp,xform2); + Utmp = Utmp - Umu; + std::cout << " Check the output gauge transformation matrices applied to the original field produce the xformed field "<< norm2(Utmp) << " (expect 0)" << std::endl; - std::cout<< "*****************************************************************" <::SteepestDescentGaugeFix(Umu,xform2,alpha,10000,1.0e-12, 1.0e-12,true); + plaq=WilsonLoops::avgPlaquette(Umu); + std::cout << " Final plaquette "<::GaugeTransform(Utmp,xform2); - Utmp = Utmp - Umu; - std::cout << " Norm Difference of xformed gauge "<< norm2(Utmp) << std::endl; + std::cout<< "******************************************************************************************" <::HotConfiguration(pRNG,Umu); - plaq=WilsonLoops::avgPlaquette(Umu); - std::cout << " Final plaquette "<::avgPlaquette(Umu); + std::cout << " Initial plaquette "<< init_plaq << std::endl; - std::cout<< "*****************************************************************" <::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,false); - SU::HotConfiguration(pRNG,Umu); // Unit gauge + plaq=WilsonLoops::avgPlaquette(Umu); + std::cout << " Final plaquette "<::avgPlaquette(Umu); - std::cout << " Initial plaquette "<::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,true); + SU::HotConfiguration(pRNG,Umu); - plaq=WilsonLoops::avgPlaquette(Umu); - std::cout << " Final plaquette "<::avgPlaquette(Umu); + std::cout << " Initial plaquette "<< init_plaq << std::endl; - std::cout<< "*****************************************************************" <::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,true); + + plaq=WilsonLoops::avgPlaquette(Umu); + std::cout << " Final plaquette "<::HotConfiguration(pRNG,Umu); // Unit gauge + SU::HotConfiguration(pRNG,Umu); - plaq=WilsonLoops::avgPlaquette(Umu); - std::cout << " Initial plaquette "<::avgPlaquette(Umu); + std::cout << " Initial plaquette "<< init_plaq << std::endl; - FourierAcceleratedGaugeFixer::SteepestDescentGaugeFix(Umu,xform3,alpha,10000,1.0e-12, 1.0e-12,true,coulomb_dir); + FourierAcceleratedGaugeFixer::SteepestDescentGaugeFix(Umu,xform3,alpha,10000,1.0e-12, 1.0e-12,false,coulomb_dir); - std::cout << Umu<::avgPlaquette(Umu); + std::cout << " Final plaquette "<::avgPlaquette(Umu); - std::cout << " Final plaquette "<::HotConfiguration(pRNG,Umu); + + init_plaq=WilsonLoops::avgPlaquette(Umu); + std::cout << " Initial plaquette "<< init_plaq << std::endl; + + FourierAcceleratedGaugeFixer::SteepestDescentGaugeFix(Umu,xform3,alpha,10000,1.0e-12, 1.0e-12,true,coulomb_dir); + + plaq=WilsonLoops::avgPlaquette(Umu); + std::cout << " Final plaquette "<> alpha; + } + } + + + if(gimpl == "periodic"){ + std::cout << GridLogMessage << "Using periodic boundary condition" << std::endl; + run(alpha, do_fft_gfix); + }else{ + std::vector conjdirs = {1,1,0,0}; //test with 2 conjugate dirs and 2 not + std::cout << GridLogMessage << "Using complex conjugate boundary conditions in dimensions "; + for(int i=0;i(alpha, do_fft_gfix); + } + Grid_finalize(); } diff --git a/tests/core/Test_fft_matt.cc b/tests/core/Test_fft_matt.cc deleted file mode 100644 index 55234601..00000000 --- a/tests/core/Test_fft_matt.cc +++ /dev/null @@ -1,270 +0,0 @@ - /************************************************************************************* - grid` physics library, www.github.com/paboyle/Grid - - Source file: ./tests/Test_cshift.cc - - Copyright (C) 2015 - -Author: Azusa Yamaguchi -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include - -using namespace Grid; - -Gamma::Algebra Gmu [] = { - Gamma::Algebra::GammaX, - Gamma::Algebra::GammaY, - Gamma::Algebra::GammaZ, - Gamma::Algebra::GammaT, - Gamma::Algebra::Gamma5 -}; - -int main (int argc, char ** argv) -{ - Grid_init(&argc,&argv); - - int threads = GridThread::GetThreads(); - std::cout< seeds({1,2,3,4}); - GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds); // naughty seeding - GridParallelRNG pRNG(&GRID); - pRNG.SeedFixedIntegers(seeds); - - LatticeGaugeFieldD Umu(&GRID); - SU::ColdConfiguration(pRNG,Umu); // Unit gauge - - //////////////////////////////////////////////////// - // Wilson test - //////////////////////////////////////////////////// - { - LatticeFermionD src(&GRID); gaussian(pRNG,src); - LatticeFermionD src_p(&GRID); - LatticeFermionD tmp(&GRID); - LatticeFermionD ref(&GRID); - LatticeFermionD result(&GRID); - - RealD mass=0.1; - WilsonFermionD Dw(Umu,GRID,RBGRID,mass); - - Dw.M(src,ref); - std::cout << "Norm src "< 1/2 gmu (eip - emip) = i sinp gmu - Kinetic = Kinetic + sin(kmu)*ci*(Gamma(Gmu[mu])*src_p); - - } - - W = mass + sk2; - Kinetic = Kinetic + W * src_p; - - std::cout<<"Momentum space src "<< norm2(src_p)< HermOp(Dw); - ConjugateGradient CG(1.0e-10,10000); - CG(HermOp,src,result); - - //////////////////////////////////////////////////////////////////////// - std::cout << " Taking difference" <::RandomGaugeTransform(pRNG,U_GT,g); // Unit gauge - - LatticeFermionD src(&GRID); - LatticeFermionD tmp(&GRID); - LatticeFermionD ref(&GRID); - LatticeFermionD diff(&GRID); - - // could loop over colors - src=Zero(); - Coordinate point(4,0); // 0,0,0,0 - SpinColourVectorD ferm; - ferm=Zero(); - ferm()(0)(0) = ComplexD(1.0); - pokeSite(ferm,src,point); - - RealD mass=0.1; - WilsonFermionD Dw(U_GT,GRID,RBGRID,mass); - - // Momentum space prop - std::cout << " Solving by FFT and Feynman rules" < HermOp(Dw); - ConjugateGradient CG(1.0e-10,10000); - CG(HermOp,src,result); - - //////////////////////////////////////////////////////////////////////// - std::cout << " Taking difference" <> nu; std::cout << GridLogMessage << "Set Gparity direction to " << nu << std::endl; + }else if(std::string(argv[i]) == "--Tbc-APRD"){ + tbc_aprd = 1; + std::cout << GridLogMessage << "Using antiperiodic BCs in the time direction" << std::endl; } } @@ -155,13 +159,18 @@ int main (int argc, char ** argv) //Coordinate grid for reference LatticeInteger xcoor_1f5(FGrid_1f); - LatticeCoordinate(xcoor_1f5,1+nu); + LatticeCoordinate(xcoor_1f5,1+nu); //note '1+nu'! This is because for 5D fields the s-direction is direction 0 Replicate(src,src_1f); src_1f = where( xcoor_1f5 >= Integer(L), 2.0*src_1f,src_1f ); RealD mass=0.0; RealD M5=1.8; - StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS); + + //Standard Dirac op + AcceleratorVector bc_std(Nd, 1.0); + if(tbc_aprd) bc_std[Nd-1] = -1.; //antiperiodic time BC + StandardDiracOp::ImplParams std_params(bc_std); + StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS, std_params); StandardFermionField src_o_1f(FrbGrid_1f); StandardFermionField result_o_1f(FrbGrid_1f); @@ -172,9 +181,11 @@ int main (int argc, char ** argv) ConjugateGradient CG(1.0e-8,10000); CG(HermOpEO,src_o_1f,result_o_1f); - // const int nu = 3; + //Gparity Dirac op std::vector twists(Nd,0); twists[nu] = 1; + if(tbc_aprd) twists[Nd-1] = 1; + GparityDiracOp::ImplParams params; params.twists = twists; GparityDiracOp GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5 DOP_PARAMS,params); @@ -271,8 +282,11 @@ int main (int argc, char ** argv) std::cout << "2f cb "<(result_o_2f,0); - res1o = PeekIndex<0>(result_o_2f,1); + res0o = PeekIndex<0>(result_o_2f,0); //flavor 0, odd cb + res1o = PeekIndex<0>(result_o_2f,1); //flavor 1, odd cb std::cout << "res cb "<= Integer(L), replica1,replica0 ); replica0 = Zero(); setCheckerboard(replica0,result_o_1f); - std::cout << "Norm2 solutions is " < +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ +#include + +using namespace Grid; + +static constexpr double tolerance = 1.0e-6; +static std::array testAlgebra; + +void print(const GparityFlavourMatrix &g) +{ + for(int i = 0; i < Ngp; i++) + { + std::cout << GridLogMessage << "("; + for(int j=0;j testg; + const Complex I(0., 1.), mI(0., -1.); + + // 0 1 + // 1 0 + testg[0] = Zero(); + testg[0](0, 1)()() = 1.; + testg[0](1, 0)()() = 1.; + std::cout << GridLogMessage << "test SigmaX= " << std::endl; + print(testg[0]); + + // 0 -i + // i 0 + testg[1] = Zero(); + testg[1](0, 1)()() = mI; + testg[1](1, 0)()() = I; + std::cout << GridLogMessage << "test SigmaY= " << std::endl; + print(testg[1]); + + // 1 0 + // 0 -1 + testg[2] = Zero(); + testg[2](0, 0)()() = 1.0; + testg[2](1, 1)()() = -1.0; + std::cout << GridLogMessage << "test SigmaZ= " << std::endl; + print(testg[2]); + + +#define DEFINE_TEST_G(g, exp)\ +testAlgebra[GparityFlavour::Algebra::g] = exp; \ +testAlgebra[GparityFlavour::Algebra::Minus##g] = -exp; + + DEFINE_TEST_G(SigmaX , testg[0]); + DEFINE_TEST_G(SigmaY , testg[1]); + DEFINE_TEST_G(SigmaZ , testg[2]); + DEFINE_TEST_G(Identity , 1.); + + GparityFlavourMatrix pplus; + pplus = 1.0; + pplus = pplus + testg[1]; + pplus = pplus * 0.5; + + DEFINE_TEST_G(ProjPlus , pplus); + + GparityFlavourMatrix pminus; + pminus = 1.0; + pminus = pminus - testg[1]; + pminus = pminus * 0.5; + + DEFINE_TEST_G(ProjMinus , pminus); + +#undef DEFINE_TEST_G +} + +template +void test(const Expr &a, const Expr &b) +{ + if (norm2(a - b) < tolerance) + { + std::cout << "[OK] "; + } + else + { + std::cout << "[fail]" << std::endl; + std::cout << GridLogError << "a= " << a << std::endl; + std::cout << GridLogError << "is different (tolerance= " << tolerance << ") from " << std::endl; + std::cout << GridLogError << "b= " << b << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkSigma(const GparityFlavour::Algebra a, GridSerialRNG &rng) +{ + GparityFlavourVector v; + GparityFlavourMatrix m, &testg = testAlgebra[a]; + GparityFlavour g(a); + + random(rng, v); + random(rng, m); + + std::cout << GridLogMessage << "Checking " << GparityFlavour::name[a] << ": "; + std::cout << "vecmul "; + test(g*v, testg*v); + std::cout << "matlmul "; + test(g*m, testg*m); + std::cout << "matrmul "; + test(m*g, m*testg); + std::cout << std::endl; +} + +int main(int argc, char *argv[]) +{ + Grid_init(&argc,&argv); + + Coordinate latt_size = GridDefaultLatt(); + Coordinate simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridSerialRNG sRNG; + + sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + + std::cout << GridLogMessage << "======== Test algebra" << std::endl; + createTestAlgebra(); + std::cout << GridLogMessage << "======== Multiplication operators check" << std::endl; + for (int i = 0; i < GparityFlavour::nSigma; ++i) + { + checkSigma(i, sRNG); + } + std::cout << GridLogMessage << std::endl; + + Grid_finalize(); + + return EXIT_SUCCESS; +} diff --git a/tests/core/Test_gpwilson_even_odd.cc b/tests/core/Test_gpwilson_even_odd.cc index d510657e..c8587435 100644 --- a/tests/core/Test_gpwilson_even_odd.cc +++ b/tests/core/Test_gpwilson_even_odd.cc @@ -52,7 +52,7 @@ int main (int argc, char ** argv) // pRNG.SeedFixedIntegers(seeds); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - typedef typename GparityWilsonFermionR::FermionField FermionField; + typedef typename GparityWilsonFermionD::FermionField FermionField; FermionField src (&Grid); random(pRNG,src); FermionField phi (&Grid); random(pRNG,phi); @@ -80,10 +80,10 @@ int main (int argc, char ** argv) RealD mass=0.1; - GparityWilsonFermionR::ImplParams params; + GparityWilsonFermionD::ImplParams params; std::vector twists(Nd,0); twists[1] = 1; params.twists = twists; - GparityWilsonFermionR Dw(Umu,Grid,RBGrid,mass,params); + GparityWilsonFermionD Dw(Umu,Grid,RBGrid,mass,params); FermionField src_e (&RBGrid); FermionField src_o (&RBGrid); @@ -199,7 +199,7 @@ int main (int argc, char ** argv) pickCheckerboard(Even,phi_e,phi); pickCheckerboard(Odd ,phi_o,phi); - SchurDiagMooeeOperator HermOpEO(Dw); + SchurDiagMooeeOperator HermOpEO(Dw); HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_o,dchi_o); diff --git a/tests/core/Test_mobius_eofa_even_odd.cc b/tests/core/Test_mobius_eofa_even_odd.cc index 68ffe624..65d55896 100644 --- a/tests/core/Test_mobius_eofa_even_odd.cc +++ b/tests/core/Test_mobius_eofa_even_odd.cc @@ -92,7 +92,7 @@ int main (int argc, char ** argv) RealD shift = 0.1234; RealD M5 = 1.8; int pm = 1; - MobiusEOFAFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mq1, mq2, mq3, shift, pm, M5, b, c); + MobiusEOFAFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mq1, mq2, mq3, shift, pm, M5, b, c); LatticeFermion src_e (FrbGrid); LatticeFermion src_o (FrbGrid); @@ -218,7 +218,7 @@ int main (int argc, char ** argv) pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Odd , phi_o, phi); - SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO(Ddwf); HermOpEO.MpcDagMpc(chi_e, dchi_e); HermOpEO.MpcDagMpc(chi_o, dchi_o); diff --git a/tests/core/Test_mobius_even_odd.cc b/tests/core/Test_mobius_even_odd.cc index e210f236..91125ac6 100644 --- a/tests/core/Test_mobius_even_odd.cc +++ b/tests/core/Test_mobius_even_odd.cc @@ -108,8 +108,8 @@ int main (int argc, char ** argv) omegas.push_back( std::complex(0.0686324988446592,-0.0550658530827402) ); #endif - MobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, 0.5,0.5); - // DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + MobiusFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, 0.5,0.5); + // DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); LatticeFermion src_e (FrbGrid); LatticeFermion src_o (FrbGrid); @@ -264,7 +264,7 @@ int main (int argc, char ** argv) pickCheckerboard(Odd ,phi_o,phi); - SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO(Ddwf); HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_o,dchi_o); diff --git a/tests/core/Test_prec_change.cc b/tests/core/Test_prec_change.cc new file mode 100644 index 00000000..06b9ae5c --- /dev/null +++ b/tests/core/Test_prec_change.cc @@ -0,0 +1,124 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/core/Test_prec_change.cc + + Copyright (C) 2015 + +Author: Christopher Kelly +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int Ls = 12; + Coordinate latt4 = GridDefaultLatt(); + + GridCartesian * UGridD = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGridD = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridD); + GridCartesian * FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD); + GridRedBlackCartesian * FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD); + + GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF); + GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF); + + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; + GridParallelRNG RNG5(FGridD); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG5F(FGridF); RNG5F.SeedFixedIntegers(seeds5); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; + + LatticeFermionD field_d(FGridD), tmp_d(FGridD); + random(RNG5,field_d); + RealD norm2_field_d = norm2(field_d); + + LatticeFermionD2 field_d2(FGridF), tmp_d2(FGridF); + random(RNG5F,field_d2); + RealD norm2_field_d2 = norm2(field_d2); + + LatticeFermionF field_f(FGridF); + + //Test original implementation + { + std::cout << GridLogMessage << "Testing original implementation" << std::endl; + field_f = Zero(); + precisionChangeOrig(field_f,field_d); + RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl; + tmp_d = Zero(); + precisionChangeOrig(tmp_d, field_f); + Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl; + } + //Test new implementation with pregenerated workspace + { + std::cout << GridLogMessage << "Testing new implementation with pregenerated workspace" << std::endl; + precisionChangeWorkspace wk_sp_to_dp(field_d.Grid(),field_f.Grid()); + precisionChangeWorkspace wk_dp_to_sp(field_f.Grid(),field_d.Grid()); + + field_f = Zero(); + precisionChange(field_f,field_d,wk_dp_to_sp); + RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl; + tmp_d = Zero(); + precisionChange(tmp_d, field_f,wk_sp_to_dp); + Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl; + } + //Test new implementation without pregenerated workspace + { + std::cout << GridLogMessage << "Testing new implementation without pregenerated workspace" << std::endl; + field_f = Zero(); + precisionChange(field_f,field_d); + RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl; + tmp_d = Zero(); + precisionChange(tmp_d, field_f); + Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl; + } + //Test fast implementation + { + std::cout << GridLogMessage << "Testing fast (double2) implementation" << std::endl; + field_f = Zero(); + precisionChangeFast(field_f,field_d2); + RealD Ndiff = (norm2_field_d2 - norm2(field_f))/norm2_field_d2; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl; + tmp_d2 = Zero(); + precisionChangeFast(tmp_d2, field_f); + Ndiff = norm2( LatticeFermionD2(tmp_d2-field_d2) ) / norm2_field_d2; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl; + } + std::cout << "Done" << std::endl; + + Grid_finalize(); +} diff --git a/tests/core/Test_staggered.cc b/tests/core/Test_staggered.cc index ba615ad2..f38815ae 100644 --- a/tests/core/Test_staggered.cc +++ b/tests/core/Test_staggered.cc @@ -53,9 +53,9 @@ int main (int argc, char ** argv) pRNG.SeedFixedIntegers(seeds); // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); - typedef typename ImprovedStaggeredFermionR::FermionField FermionField; - typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField; - typename ImprovedStaggeredFermionR::ImplParams params; + typedef typename ImprovedStaggeredFermionD::FermionField FermionField; + typedef typename ImprovedStaggeredFermionD::ComplexField ComplexField; + typename ImprovedStaggeredFermionD::ImplParams params; FermionField src (&Grid); random(pRNG,src); FermionField result(&Grid); result=Zero(); @@ -130,7 +130,7 @@ int main (int argc, char ** argv) // ref = ref + mass * src; } - ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params); + ImprovedStaggeredFermionD Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params); std::cout< HermOpEO(Ds); + SchurDiagMooeeOperator HermOpEO(Ds); HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_o,dchi_o); diff --git a/tests/core/Test_staggered5D.cc b/tests/core/Test_staggered5D.cc index b1b3be1d..32ad0d17 100644 --- a/tests/core/Test_staggered5D.cc +++ b/tests/core/Test_staggered5D.cc @@ -60,9 +60,9 @@ int main (int argc, char ** argv) pRNG4.SeedFixedIntegers(seeds); pRNG5.SeedFixedIntegers(seeds); - typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField; - typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField; - typename ImprovedStaggeredFermion5DR::ImplParams params; + typedef typename ImprovedStaggeredFermion5DD::FermionField FermionField; + typedef typename ImprovedStaggeredFermion5DD::ComplexField ComplexField; + typename ImprovedStaggeredFermion5DD::ImplParams params; FermionField src (FGrid); @@ -148,7 +148,7 @@ int main (int argc, char ** argv) } } - ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0,params); + ImprovedStaggeredFermion5DD Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0,params); std::cout< HermOpEO(Ds); + SchurDiagMooeeOperator HermOpEO(Ds); HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_o,dchi_o); diff --git a/tests/core/Test_staggered_naive.cc b/tests/core/Test_staggered_naive.cc index d8ca9d5f..9d32ad46 100644 --- a/tests/core/Test_staggered_naive.cc +++ b/tests/core/Test_staggered_naive.cc @@ -52,9 +52,9 @@ int main (int argc, char ** argv) pRNG.SeedFixedIntegers(seeds); // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); - typedef typename NaiveStaggeredFermionR::FermionField FermionField; - typedef typename NaiveStaggeredFermionR::ComplexField ComplexField; - typename NaiveStaggeredFermionR::ImplParams params; + typedef typename NaiveStaggeredFermionD::FermionField FermionField; + typedef typename NaiveStaggeredFermionD::ComplexField ComplexField; + typename NaiveStaggeredFermionD::ImplParams params; FermionField src (&Grid); random(pRNG,src); FermionField result(&Grid); result=Zero(); @@ -120,7 +120,7 @@ int main (int argc, char ** argv) // ref = ref + mass * src; } - NaiveStaggeredFermionR Ds(Umu,Grid,RBGrid,mass,c1,u0,params); + NaiveStaggeredFermionD Ds(Umu,Grid,RBGrid,mass,c1,u0,params); std::cout< HermOpEO(Ds); + SchurDiagMooeeOperator HermOpEO(Ds); HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_o,dchi_o); diff --git a/tests/core/Test_wilson_clover.cc b/tests/core/Test_wilson_clover.cc index 8f143070..0ce0513f 100644 --- a/tests/core/Test_wilson_clover.cc +++ b/tests/core/Test_wilson_clover.cc @@ -52,8 +52,8 @@ int main(int argc, char **argv) pRNG.SeedFixedIntegers(seeds); // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); - typedef typename WilsonCloverFermionR::FermionField FermionField; - typename WilsonCloverFermionR::ImplParams params; + typedef typename WilsonCloverFermionD::FermionField FermionField; + typename WilsonCloverFermionD::ImplParams params; WilsonAnisotropyCoefficients anis; FermionField src(&Grid); @@ -88,8 +88,8 @@ int main(int argc, char **argv) RealD csw_r = 1.0; RealD csw_t = 1.0; - WilsonCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); - CompactWilsonCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); + WilsonCloverFermionD Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); + CompactWilsonCloverFermionD Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl; @@ -324,8 +324,8 @@ int main(int argc, char **argv) } ///////////////// - WilsonCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); - CompactWilsonCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); + WilsonCloverFermionD Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); + CompactWilsonCloverFermionD Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); tmp = Omega * src; pickCheckerboard(Even, src_e, tmp); @@ -377,14 +377,14 @@ int main(int argc, char **argv) chi = Zero(); phi = Zero(); - WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params); + WilsonFermionD Dw(Umu, Grid, RBGrid, mass, params); Dw.M(src, result); Dwc.M(src, chi); Dwc_prime.M(Omega * src, phi); - WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params); + WilsonFermionD Dw_prime(U_prime, Grid, RBGrid, mass, params); Dw_prime.M(Omega * src, result2); err = result - adj(Omega) * result2; @@ -411,7 +411,7 @@ int main(int argc, char **argv) chi = Zero(); phi = Zero(); err = Zero(); - WilsonCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 + WilsonCloverFermionD Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Odd, phi_o, phi); @@ -437,7 +437,7 @@ int main(int argc, char **argv) chi = Zero(); phi = Zero(); err = Zero(); - CompactWilsonCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0 + CompactWilsonCloverFermionD Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0 pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Odd, phi_o, phi); diff --git a/tests/core/Test_wilson_conserved_current.cc b/tests/core/Test_wilson_conserved_current.cc index 3ee1a271..c66bf940 100644 --- a/tests/core/Test_wilson_conserved_current.cc +++ b/tests/core/Test_wilson_conserved_current.cc @@ -74,7 +74,7 @@ int main (int argc, char ** argv) SU::HotConfiguration(RNG4,Umu); } - typename WilsonCloverFermionR::ImplParams params; + typename WilsonCloverFermionD::ImplParams params; WilsonAnisotropyCoefficients anis; RealD mass = 0.1; RealD csw_r = 1.0; @@ -83,32 +83,32 @@ int main (int argc, char ** argv) std::cout<(Dw,Umu,UGrid,UrbGrid,&RNG4); + WilsonFermionD Dw(Umu,*UGrid,*UrbGrid,mass,params); + TestConserved(Dw,Umu,UGrid,UrbGrid,&RNG4); std::cout<(Dwc,Umu,UGrid,UrbGrid,&RNG4); + WilsonCloverFermionD Dwc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, anis, params); + TestConserved(Dwc,Umu,UGrid,UrbGrid,&RNG4); std::cout<(Dwcc,Umu,UGrid,UrbGrid,&RNG4); + CompactWilsonCloverFermionD Dwcc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, 1.0, anis, params); + TestConserved(Dwcc,Umu,UGrid,UrbGrid,&RNG4); std::cout<(Dewc,Umu,UGrid,UrbGrid,&RNG4); + WilsonExpCloverFermionD Dewc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, anis, params); + TestConserved(Dewc,Umu,UGrid,UrbGrid,&RNG4); std::cout<(Dewcc,Umu,UGrid,UrbGrid,&RNG4); + CompactWilsonExpCloverFermionD Dewcc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, 1.0, anis, params); + TestConserved(Dewcc,Umu,UGrid,UrbGrid,&RNG4); Grid_finalize(); } diff --git a/tests/core/Test_wilson_even_odd.cc b/tests/core/Test_wilson_even_odd.cc index 4d240b80..81081bd0 100644 --- a/tests/core/Test_wilson_even_odd.cc +++ b/tests/core/Test_wilson_even_odd.cc @@ -89,7 +89,7 @@ int main (int argc, char ** argv) RealD mass=0.1; - WilsonFermionR Dw(Umu,Grid,RBGrid,mass); + WilsonFermionD Dw(Umu,Grid,RBGrid,mass); LatticeFermion src_e (&RBGrid); LatticeFermion src_o (&RBGrid); @@ -205,7 +205,7 @@ int main (int argc, char ** argv) pickCheckerboard(Even,phi_e,phi); pickCheckerboard(Odd ,phi_o,phi); - SchurDiagMooeeOperator HermOpEO(Dw); + SchurDiagMooeeOperator HermOpEO(Dw); HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_o,dchi_o); diff --git a/tests/core/Test_wilson_exp_clover.cc b/tests/core/Test_wilson_exp_clover.cc index 8516d0dc..017d8823 100644 --- a/tests/core/Test_wilson_exp_clover.cc +++ b/tests/core/Test_wilson_exp_clover.cc @@ -52,8 +52,8 @@ int main(int argc, char **argv) pRNG.SeedFixedIntegers(seeds); // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); - typedef typename WilsonExpCloverFermionR::FermionField FermionField; - typename WilsonExpCloverFermionR::ImplParams params; + typedef typename WilsonExpCloverFermionD::FermionField FermionField; + typename WilsonExpCloverFermionD::ImplParams params; WilsonAnisotropyCoefficients anis; FermionField src(&Grid); @@ -88,8 +88,8 @@ int main(int argc, char **argv) RealD csw_r = 1.0; RealD csw_t = 1.0; - WilsonExpCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); - CompactWilsonExpCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); + WilsonExpCloverFermionD Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); + CompactWilsonExpCloverFermionD Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl; @@ -324,8 +324,8 @@ int main(int argc, char **argv) } ///////////////// - WilsonExpCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); - CompactWilsonExpCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); + WilsonExpCloverFermionD Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); + CompactWilsonExpCloverFermionD Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); tmp = Omega * src; pickCheckerboard(Even, src_e, tmp); @@ -377,14 +377,14 @@ int main(int argc, char **argv) chi = Zero(); phi = Zero(); - WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params); + WilsonFermionD Dw(Umu, Grid, RBGrid, mass, params); Dw.M(src, result); Dwc.M(src, chi); Dwc_prime.M(Omega * src, phi); - WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params); + WilsonFermionD Dw_prime(U_prime, Grid, RBGrid, mass, params); Dw_prime.M(Omega * src, result2); err = result - adj(Omega) * result2; @@ -411,7 +411,7 @@ int main(int argc, char **argv) chi = Zero(); phi = Zero(); err = Zero(); - WilsonExpCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 + WilsonExpCloverFermionD Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Odd, phi_o, phi); @@ -437,7 +437,7 @@ int main(int argc, char **argv) chi = Zero(); phi = Zero(); err = Zero(); - CompactWilsonExpCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0 + CompactWilsonExpCloverFermionD Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0 pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Odd, phi_o, phi); diff --git a/tests/core/Test_wilson_twisted_mass_even_odd.cc b/tests/core/Test_wilson_twisted_mass_even_odd.cc index d9e798c3..0351f7cd 100644 --- a/tests/core/Test_wilson_twisted_mass_even_odd.cc +++ b/tests/core/Test_wilson_twisted_mass_even_odd.cc @@ -90,7 +90,7 @@ int main (int argc, char ** argv) RealD mass=0.1; RealD mu = 0.1; - WilsonTMFermionR Dw(Umu,Grid,RBGrid,mass,mu); + WilsonTMFermionD Dw(Umu,Grid,RBGrid,mass,mu); LatticeFermion src_e (&RBGrid); LatticeFermion src_o (&RBGrid); @@ -206,7 +206,7 @@ int main (int argc, char ** argv) pickCheckerboard(Even,phi_e,phi); pickCheckerboard(Odd ,phi_o,phi); - SchurDiagMooeeOperator HermOpEO(Dw); + SchurDiagMooeeOperator HermOpEO(Dw); HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_o,dchi_o); diff --git a/tests/core/Test_zmobius_even_odd.cc b/tests/core/Test_zmobius_even_odd.cc index f6e18934..ee6fe860 100644 --- a/tests/core/Test_zmobius_even_odd.cc +++ b/tests/core/Test_zmobius_even_odd.cc @@ -123,7 +123,7 @@ int main (int argc, char ** argv) RealD _mass,RealD _M5, std::vector &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) : */ - ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,RealD(1.),RealD(0.)); + ZMobiusFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,RealD(1.),RealD(0.)); LatticeFermion src_e (FrbGrid); LatticeFermion src_o (FrbGrid); @@ -278,7 +278,7 @@ int main (int argc, char ** argv) pickCheckerboard(Odd ,phi_o,phi); - SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO(Ddwf); HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_o,dchi_o); diff --git a/tests/debug/Test_cayley_cg.cc b/tests/debug/Test_cayley_cg.cc index 5418a8af..74492fd9 100644 --- a/tests/debug/Test_cayley_cg.cc +++ b/tests/debug/Test_cayley_cg.cc @@ -125,10 +125,10 @@ int main (int argc, char ** argv) std::cout<(Ddwf,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); - TestReconstruct5DFA(Ddwf,DdwfF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestCGinversions(Ddwf,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestReconstruct5DFA(Ddwf,DdwfF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); RealD b=1.5;// Scale factor b+c=2, b-c=1 RealD c=0.5; @@ -137,54 +137,54 @@ int main (int argc, char ** argv) std::cout<(Dmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); - TestReconstruct5DFA(Dmob,DmobF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestCGinversions(Dmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestReconstruct5DFA(Dmob,DmobF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(ZDmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); - TestReconstruct5D(ZDmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + ZMobiusFermionD ZDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,gamma,b,c); + TestCGinversions(ZDmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestReconstruct5D(ZDmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dzolo,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); - TestReconstruct5D(Dzolo,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + MobiusZolotarevFermionD Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0); + TestCGinversions(Dzolo,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestReconstruct5D(Dzolo,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dsham,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); - TestReconstruct5DFA(Dsham,DshamF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestCGinversions(Dsham,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestReconstruct5DFA(Dsham,DshamF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dshamz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); - TestReconstruct5D(Dshamz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + ShamirZolotarevFermionD Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0); + TestCGinversions(Dshamz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestReconstruct5D(Dshamz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dov,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); - TestReconstruct5DFA(Dov,DovF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestCGinversions(Dov,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestReconstruct5DFA(Dov,DovF,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dovz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); - TestReconstruct5D(Dovz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + OverlapWilsonCayleyZolotarevFermionD Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0); + TestCGinversions(Dovz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + TestReconstruct5D(Dovz,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); Grid_finalize(); } diff --git a/tests/debug/Test_cayley_coarsen_support.cc b/tests/debug/Test_cayley_coarsen_support.cc index b2f691d7..2190a9b0 100644 --- a/tests/debug/Test_cayley_coarsen_support.cc +++ b/tests/debug/Test_cayley_coarsen_support.cc @@ -95,8 +95,8 @@ int main (int argc, char ** argv) RealD mass=0.5; RealD M5=1.8; - DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); HermIndefOp.Op(src,ref); HermIndefOp.OpDiag(src,result); @@ -118,7 +118,7 @@ int main (int argc, char ** argv) std::cout< HermDefOp(Ddwf); + MdagMLinearOperator HermDefOp(Ddwf); typedef Aggregation Subspace; Subspace Aggregates(Coarse5d,FGrid,cb); diff --git a/tests/debug/Test_cayley_even_odd.cc b/tests/debug/Test_cayley_even_odd.cc index 5e800b26..b6eecc0f 100644 --- a/tests/debug/Test_cayley_even_odd.cc +++ b/tests/debug/Test_cayley_even_odd.cc @@ -76,41 +76,41 @@ int main (int argc, char ** argv) RealD mass=0.1; RealD M5 =1.8; std::cout<(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + TestWhat(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); RealD b=1.5;// Scale factor b+c=2, b-c=1 RealD c=0.5; std::vector gamma(Ls,ComplexD(1.0,0.1)); std::cout<(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + MobiusFermionD Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); + TestWhat(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(ZDmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + ZMobiusFermionD ZDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,gamma,b,c); + TestWhat(ZDmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + MobiusZolotarevFermionD Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0); + TestWhat(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + ScaledShamirFermionD Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0); + TestWhat(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + ShamirZolotarevFermionD Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0); + TestWhat(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + OverlapWilsonCayleyTanhFermionD Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + TestWhat(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + OverlapWilsonCayleyZolotarevFermionD Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0); + TestWhat(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); Grid_finalize(); } diff --git a/tests/debug/Test_cayley_ldop_cr.cc b/tests/debug/Test_cayley_ldop_cr.cc index 416017e5..997b8df5 100644 --- a/tests/debug/Test_cayley_ldop_cr.cc +++ b/tests/debug/Test_cayley_ldop_cr.cc @@ -83,8 +83,8 @@ int main (int argc, char ** argv) std::cout< HermIndefOp(Ddwf); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); const int nbasis = 8; @@ -95,7 +95,7 @@ int main (int argc, char ** argv) std::cout< HermDefOp(Ddwf); + MdagMLinearOperator HermDefOp(Ddwf); Subspace Aggregates(Coarse5d,FGrid,cb); Aggregates.CreateSubspace(RNG5,HermDefOp); diff --git a/tests/debug/Test_cayley_mres.cc b/tests/debug/Test_cayley_mres.cc index bfbc3cf7..26d3dc60 100644 --- a/tests/debug/Test_cayley_mres.cc +++ b/tests/debug/Test_cayley_mres.cc @@ -128,8 +128,8 @@ int main (int argc, char ** argv) std::cout<(Ddwf,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + TestConserved(Ddwf,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); RealD b=1.5;// Scale factor b+c=2, b-c=1 RealD c=0.5; @@ -138,23 +138,23 @@ int main (int argc, char ** argv) std::cout<(Dmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + MobiusFermionD Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); + TestConserved(Dmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dsham,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + ScaledShamirFermionD Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0); + TestConserved(Dsham,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(ZDmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5,&ZDmobrev); + ZMobiusFermionD ZDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,omegas,b,c); + ZMobiusFermionD ZDmobrev(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,omegasrev,b,c); + TestConserved(ZDmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5,&ZDmobrev); Grid_finalize(); } @@ -290,7 +290,7 @@ void TestConserved(Action & Ddwf, const RealD DmuPAmu{real(TensorRemove(sumPA[t]-sumPA[(t-1+Nt)%Nt]))}; std::cout< sumPAref; @@ -565,8 +565,8 @@ void TestConserved1(Action & Ddwf, Action & Ddwfrev, std::cout <<" PAc action "<oSites(),{ diff --git a/tests/debug/Test_heatbath_dwf_eofa.cc b/tests/debug/Test_heatbath_dwf_eofa.cc index e1c18021..5920054d 100644 --- a/tests/debug/Test_heatbath_dwf_eofa.cc +++ b/tests/debug/Test_heatbath_dwf_eofa.cc @@ -77,8 +77,8 @@ int main(int argc, char** argv) LatticeGaugeField Umu(UGrid); SU::HotConfiguration(RNG4, Umu); - DomainWallEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5); - DomainWallEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5); + DomainWallEOFAFermionD Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5); + DomainWallEOFAFermionD Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5); // Construct the action and test the heatbath (zero initial guess) { diff --git a/tests/debug/Test_heatbath_dwf_eofa_gparity.cc b/tests/debug/Test_heatbath_dwf_eofa_gparity.cc index 7eabfc65..982f35db 100644 --- a/tests/debug/Test_heatbath_dwf_eofa_gparity.cc +++ b/tests/debug/Test_heatbath_dwf_eofa_gparity.cc @@ -41,7 +41,7 @@ using namespace Grid; ; typedef GparityWilsonImplR FermionImplPolicy; -typedef GparityDomainWallEOFAFermionR FermionAction; +typedef GparityDomainWallEOFAFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; // Parameters for test @@ -82,7 +82,7 @@ int main(int argc, char** argv) LatticeGaugeField Umu(UGrid); SU::HotConfiguration(RNG4, Umu); - // GparityDomainWallFermionR::ImplParams params; + // GparityDomainWallFermionD::ImplParams params; FermionAction::ImplParams params; FermionAction Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, params); FermionAction Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, params); diff --git a/tests/debug/Test_heatbath_mobius_eofa.cc b/tests/debug/Test_heatbath_mobius_eofa.cc index 48806642..3824daab 100644 --- a/tests/debug/Test_heatbath_mobius_eofa.cc +++ b/tests/debug/Test_heatbath_mobius_eofa.cc @@ -79,8 +79,8 @@ int main(int argc, char** argv) LatticeGaugeField Umu(UGrid); SU::HotConfiguration(RNG4, Umu); - MobiusEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c); - MobiusEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c); + MobiusEOFAFermionD Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c); + MobiusEOFAFermionD Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c); // Construct the action and test the heatbath (zero initial guess) { diff --git a/tests/debug/Test_heatbath_mobius_eofa_gparity.cc b/tests/debug/Test_heatbath_mobius_eofa_gparity.cc index 52447e5e..fd3d96f8 100644 --- a/tests/debug/Test_heatbath_mobius_eofa_gparity.cc +++ b/tests/debug/Test_heatbath_mobius_eofa_gparity.cc @@ -41,7 +41,7 @@ using namespace Grid; ; typedef GparityWilsonImplR FermionImplPolicy; -typedef GparityMobiusEOFAFermionR FermionAction; +typedef GparityMobiusEOFAFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; // Parameters for test diff --git a/tests/debug/Test_reweight_dwf_eofa.cc b/tests/debug/Test_reweight_dwf_eofa.cc index a150b18f..6a5452c7 100644 --- a/tests/debug/Test_reweight_dwf_eofa.cc +++ b/tests/debug/Test_reweight_dwf_eofa.cc @@ -105,10 +105,10 @@ int main(int argc, char **argv) SU::HotConfiguration(RNG4, Umu); // Initialize RHMC fermion operators - DomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5); - DomainWallFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5); - SchurDiagMooeeOperator MdagM(Ddwf_f); - SchurDiagMooeeOperator VdagV(Ddwf_b); + DomainWallFermionD Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5); + DomainWallFermionD Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5); + SchurDiagMooeeOperator MdagM(Ddwf_f); + SchurDiagMooeeOperator VdagV(Ddwf_b); // Degree 12 rational approximations to x^(1/4) and x^(-1/4) double lo = 0.0001; @@ -153,10 +153,10 @@ int main(int argc, char **argv) RealD shift_L = 0.0; RealD shift_R = -1.0; int pm = 1; - DomainWallEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5); - DomainWallEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5); - MdagMLinearOperator LdagL(Deofa_L); - MdagMLinearOperator RdagR(Deofa_R); + DomainWallEOFAFermionD Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5); + DomainWallEOFAFermionD Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5); + MdagMLinearOperator LdagL(Deofa_L); + MdagMLinearOperator RdagR(Deofa_R); // Stochastically estimate reweighting factor via EOFA RealD k = Deofa_L.k; diff --git a/tests/debug/Test_reweight_dwf_eofa_gparity.cc b/tests/debug/Test_reweight_dwf_eofa_gparity.cc index df2d95a0..70ae94aa 100644 --- a/tests/debug/Test_reweight_dwf_eofa_gparity.cc +++ b/tests/debug/Test_reweight_dwf_eofa_gparity.cc @@ -33,7 +33,7 @@ using namespace std; using namespace Grid; ; -typedef typename GparityDomainWallFermionR::FermionField FermionField; +typedef typename GparityDomainWallFermionD::FermionField FermionField; // parameters for test const std::vector grid_dim = { 8, 8, 8, 8 }; @@ -107,11 +107,11 @@ int main(int argc, char **argv) SU::HotConfiguration(RNG4, Umu); // Initialize RHMC fermion operators - GparityDomainWallFermionR::ImplParams params; - GparityDomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, params); - GparityDomainWallFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, params); - SchurDiagMooeeOperator MdagM(Ddwf_f); - SchurDiagMooeeOperator VdagV(Ddwf_b); + GparityDomainWallFermionD::ImplParams params; + GparityDomainWallFermionD Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, params); + GparityDomainWallFermionD Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, params); + SchurDiagMooeeOperator MdagM(Ddwf_f); + SchurDiagMooeeOperator VdagV(Ddwf_b); // Degree 12 rational approximations to x^(1/4) and x^(-1/4) double lo = 0.0001; @@ -156,10 +156,10 @@ int main(int argc, char **argv) RealD shift_L = 0.0; RealD shift_R = -1.0; int pm = 1; - GparityDomainWallEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, params); - GparityDomainWallEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, params); - MdagMLinearOperator LdagL(Deofa_L); - MdagMLinearOperator RdagR(Deofa_R); + GparityDomainWallEOFAFermionD Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, params); + GparityDomainWallEOFAFermionD Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, params); + MdagMLinearOperator LdagL(Deofa_L); + MdagMLinearOperator RdagR(Deofa_R); // Stochastically estimate reweighting factor via EOFA RealD k = Deofa_L.k; diff --git a/tests/debug/Test_reweight_mobius_eofa.cc b/tests/debug/Test_reweight_mobius_eofa.cc index 88ecab7d..744dd302 100644 --- a/tests/debug/Test_reweight_mobius_eofa.cc +++ b/tests/debug/Test_reweight_mobius_eofa.cc @@ -107,10 +107,10 @@ int main(int argc, char **argv) SU::HotConfiguration(RNG4, Umu); // Initialize RHMC fermion operators - MobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c); - MobiusFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, b, c); - SchurDiagMooeeOperator MdagM(Ddwf_f); - SchurDiagMooeeOperator VdagV(Ddwf_b); + MobiusFermionD Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c); + MobiusFermionD Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, b, c); + SchurDiagMooeeOperator MdagM(Ddwf_f); + SchurDiagMooeeOperator VdagV(Ddwf_b); // Degree 12 rational approximations to x^(1/4) and x^(-1/4) double lo = 0.0001; @@ -155,10 +155,10 @@ int main(int argc, char **argv) RealD shift_L = 0.0; RealD shift_R = -1.0; int pm = 1; - MobiusEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, b, c); - MobiusEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, b, c); - MdagMLinearOperator LdagL(Deofa_L); - MdagMLinearOperator RdagR(Deofa_R); + MobiusEOFAFermionD Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, b, c); + MobiusEOFAFermionD Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, b, c); + MdagMLinearOperator LdagL(Deofa_L); + MdagMLinearOperator RdagR(Deofa_R); // Stochastically estimate reweighting factor via EOFA RealD k = Deofa_L.k; diff --git a/tests/debug/Test_reweight_mobius_eofa_gparity.cc b/tests/debug/Test_reweight_mobius_eofa_gparity.cc index 31708265..e2a4fb47 100644 --- a/tests/debug/Test_reweight_mobius_eofa_gparity.cc +++ b/tests/debug/Test_reweight_mobius_eofa_gparity.cc @@ -33,7 +33,7 @@ using namespace std; using namespace Grid; ; -typedef typename GparityDomainWallFermionR::FermionField FermionField; +typedef typename GparityDomainWallFermionD::FermionField FermionField; // parameters for test const std::vector grid_dim = { 8, 8, 8, 8 }; @@ -109,11 +109,11 @@ int main(int argc, char **argv) SU::HotConfiguration(RNG4, Umu); // Initialize RHMC fermion operators - GparityDomainWallFermionR::ImplParams params; - GparityMobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c, params); - GparityMobiusFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, b, c, params); - SchurDiagMooeeOperator MdagM(Ddwf_f); - SchurDiagMooeeOperator VdagV(Ddwf_b); + GparityDomainWallFermionD::ImplParams params; + GparityMobiusFermionD Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c, params); + GparityMobiusFermionD Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, b, c, params); + SchurDiagMooeeOperator MdagM(Ddwf_f); + SchurDiagMooeeOperator VdagV(Ddwf_b); // Degree 12 rational approximations to x^(1/4) and x^(-1/4) double lo = 0.0001; @@ -158,10 +158,10 @@ int main(int argc, char **argv) RealD shift_L = 0.0; RealD shift_R = -1.0; int pm = 1; - GparityMobiusEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, b, c, params); - GparityMobiusEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, b, c, params); - MdagMLinearOperator LdagL(Deofa_L); - MdagMLinearOperator RdagR(Deofa_R); + GparityMobiusEOFAFermionD Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, b, c, params); + GparityMobiusEOFAFermionD Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, b, c, params); + MdagMLinearOperator LdagL(Deofa_L); + MdagMLinearOperator RdagR(Deofa_R); // Stochastically estimate reweighting factor via EOFA RealD k = Deofa_L.k; diff --git a/tests/forces/Test_bdy.cc b/tests/forces/Test_bdy.cc new file mode 100644 index 00000000..c2c97d0d --- /dev/null +++ b/tests/forces/Test_bdy.cc @@ -0,0 +1,305 @@ +/* + + 2f Full det MdagM 10^6 force ~ 1.3e7 +rid : Message : 1767.283471 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 1767.283476 s : S1 : 1.52885e+09 +Grid : Message : 1767.283480 s : S2 : 1.52886e+09 +Grid : Message : 1767.283482 s : dS : 8877.34 +Grid : Message : 1767.283483 s : dSpred : 8877.7 +Grid : Message : 1767.283484 s : diff : -0.360484 +Grid : Message : 1767.283485 s : ********************************************************* + + 2f Full det MpcdagMpc 10^6 force ~ 1.8e6 +Grid : Message : 2399.576962 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 2399.576968 s : S1 : 1.52885e+09 +Grid : Message : 2399.576972 s : S2 : 1.52886e+09 +Grid : Message : 2399.576974 s : dS : 9728.49 +Grid : Message : 2399.576975 s : dSpred : 9726.58 +Grid : Message : 2399.576976 s : diff : 1.90683 +Grid : Message : 2399.576977 s : ********************************************************* + + 2f bdy MdagM 1500 force Force ~ 2800 +Grid : Message : 4622.385061 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 4622.385067 s : S1 : 1.52885e+09 +Grid : Message : 4622.385071 s : S2 : 1.52885e+09 +Grid : Message : 4622.385072 s : dS : 25.4944 +Grid : Message : 4622.385073 s : dSpred : 25.4672 +Grid : Message : 4622.385074 s : diff : 0.0271414 +Grid : Message : 4622.385075 s : ********************************************************* + + 2f bdy MpcdagMpc 10^6 force Force ~ 2200 +Grid : Message : 4622.385061 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 4622.385067 s : S1 : 1.52885e+09 +Grid : Message : 4622.385071 s : S2 : 1.52885e+09 +Grid : Message : 4622.385072 s : dS : 25.4944 +Grid : Message : 4622.385073 s : dSpred : 25.4672 +Grid : Message : 4622.385074 s : diff : 0.0271414 +Grid : Message : 4622.385075 s : ********************************************************* + + 1f Bdy Det + Optimisation log: looser rational AND MD tolerances sloppy +MobiusForce.221179 -- same as HMC. dS is mispredicted Forece ~2.8 +Grid : Message : 6582.258991 s : dS : 0.024478 +Grid : Message : 6582.258992 s : dSpred : 0.00791876 +Grid : Message : 6582.258994 s : diff : 0.0165592 + +MobiusForce.221193 -- tight rational AND MD tolerances to 1e-8 ~ 2.8 same +Grid : Message : 1964.939209 s : S1 : 7.64404e+08 +Grid : Message : 1964.939213 s : S2 : 7.64404e+08 +Grid : Message : 1964.939215 s : dS : -0.00775838 <--- too loose even on action +Grid : Message : 1964.939216 s : dSpred : -0.00416793 +Grid : Message : 1964.939217 s : diff : -0.00359045 + +MobiusForce.221394 -- looser rational, MD tol 1e-8 ~ 2.8 same +Grid : Message : 1198.346720 s : S1 : 764404649.48886 +Grid : Message : 1198.346760 s : S2 : 764404649.5133 +Grid : Message : 1198.346780 s : dS : 0.024440884590149 +Grid : Message : 1198.346800 s : dSpred : 0.0079145154465184 +Grid : Message : 1198.346810 s : diff : 0.016526369143631 + +MobiusForce.221394 -- tight rational, MD tol sloppy Force ~ 2.8 +Grid : Message : 2376.921950 s : S1 : 764404436.44069 +Grid : Message : 2376.921954 s : S2 : 764404436.43299 +Grid : Message : 2376.921956 s : dS : -0.0076971054077148 +Grid : Message : 2376.921958 s : dSpred : -0.0041610472282526 +Grid : Message : 2376.921959 s : diff : -0.0035360581794623 + +*/ + +// +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_double_ratio.cc + + Copyright (C) 2022 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +typedef MobiusFermionD FermionAction; +typedef WilsonImplD FimplD; +typedef WilsonImplD FermionImplPolicy; + +template +void ForceTest(Action &action,LatticeGaugeField & U,MomentumFilterBase &Filter) +{ + GridBase *UGrid = U.Grid(); + + std::vector seeds({1,2,3,5}); + GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds); + + LatticeColourMatrix Pmu(UGrid); + LatticeGaugeField P(UGrid); + LatticeGaugeField UdSdU(UGrid); + + std::cout << GridLogMessage << "*********************************************************"<(UdSdU,mu); + Pmu= PeekIndex(P,mu); + dS = dS - trace(Pmu*UdSdUmu)*eps*2.0*2.0; + } + ComplexD dSpred = sum(dS); + RealD diff = S2-S1-dSpred.real(); + + std::cout<< GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt_size[0]/mpi_layout[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt_size[1]/mpi_layout[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt_size[2]/mpi_layout[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt_size[3]/mpi_layout[3] * shm[3]; + + Coordinate Block4(Nd); + Block4[0] = Dirichlet[1]; + Block4[1] = Dirichlet[2]; + Block4[2] = Dirichlet[3]; + Block4[3] = Dirichlet[4]; + + std::vector boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + FermionAction::ImplParams ParamsDir(boundary); + Params.dirichlet=NonDirichlet; + ParamsDir.dirichlet=Dirichlet; + ParamsDir.partialDirichlet=1; + + ///////////////////// Gauge Field and Gauge Forces //////////////////////////// + LatticeGaugeField U(UGrid); + + RealD beta=6.0; + WilsonGaugeActionR PlaqAction(beta); + IwasakiGaugeActionR RectAction(beta); + + MomentumFilterNone FilterNone; + ForceTest(PlaqAction,U,FilterNone); + ForceTest(RectAction,U,FilterNone); + + //////////////////////////////////// + // Action + //////////////////////////////////// + RealD mass=0.00078; + RealD pvmass=1.0; + RealD M5=1.8; + RealD b=1.5; + RealD c=0.5; + + // Double versions + FermionAction DdwfPeriodic(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,Params); + FermionAction PVPeriodic (U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,pvmass,M5,b,c,Params); + FermionAction DdwfDirichlet(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,ParamsDir); + + double StoppingCondition = 1.0e-8; + double MaxCGIterations = 50000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + + //////////////////// Two Flavour Determinant Ratio /////////////////////////////// + TwoFlavourRatioPseudoFermionAction Nf2(PVPeriodic, DdwfPeriodic,CG,CG); + // ForceTest(Nf2,U,FilterNone); + + //////////////////// Two Flavour Determinant force test Even Odd /////////////////////////////// + TwoFlavourEvenOddRatioPseudoFermionAction Nf2eo(PVPeriodic, DdwfPeriodic,CG,CG); + // ForceTest(Nf2eo,U,FilterNone); + + //////////////////// Domain forces //////////////////// + int Width=4; + DDHMCFilter DDHMCFilter(Block4,Width); + + //////////////////// Two flavour boundary det //////////////////// + TwoFlavourRatioPseudoFermionAction BdyNf2(DdwfDirichlet, DdwfPeriodic,CG,CG); + // ForceTest(BdyNf2,U,DDHMCFilter); + + //////////////////// Two flavour eo boundary det //////////////////// + TwoFlavourEvenOddRatioPseudoFermionAction BdyNf2eo(DdwfDirichlet, DdwfPeriodic,CG,CG); + // ForceTest(BdyNf2eo,U,DDHMCFilter); + + //////////////////// One flavour boundary det //////////////////// + OneFlavourRationalParams OFRp; // Up/down + OFRp.lo = 4.0e-5; + OFRp.hi = 90.0; + OFRp.MaxIter = 60000; + OFRp.tolerance= 1.0e-8; + OFRp.mdtolerance= 1.0e-6; + OFRp.degree = 18; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + std::vector ActionTolByPole({ + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8 + }); + std::vector MDTolByPole({ + 1.0e-6,3.0e-7,1.0e-7,1.0e-7, // Orig sloppy + // 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8 + }); + OneFlavourEvenOddRatioRationalPseudoFermionAction BdySqrt(DdwfDirichlet,DdwfPeriodic,OFRp); + ForceTest(BdySqrt,U,DDHMCFilter); + + Grid_finalize(); +} diff --git a/tests/forces/Test_contfrac_force.cc b/tests/forces/Test_contfrac_force.cc index dc9eedce..526cde12 100644 --- a/tests/forces/Test_contfrac_force.cc +++ b/tests/forces/Test_contfrac_force.cc @@ -66,7 +66,7 @@ int main (int argc, char ** argv) //////////////////////////////////// RealD mass=0.01; RealD M5=1.8; - OverlapWilsonContFracTanhFermionR Dcf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + OverlapWilsonContFracTanhFermionD Dcf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); Dcf.M (phi,Mphi); ComplexD S = innerProduct(Mphi,Mphi); // pdag MdagM p diff --git a/tests/forces/Test_double_ratio.cc b/tests/forces/Test_double_ratio.cc new file mode 100644 index 00000000..a2b16719 --- /dev/null +++ b/tests/forces/Test_double_ratio.cc @@ -0,0 +1,542 @@ +/* + 2f Full det MdagM 10^6 force ~ 1.3e7 +rid : Message : 1767.283471 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 1767.283476 s : S1 : 1.52885e+09 +Grid : Message : 1767.283480 s : S2 : 1.52886e+09 +Grid : Message : 1767.283482 s : dS : 8877.34 +Grid : Message : 1767.283483 s : dSpred : 8877.7 +Grid : Message : 1767.283484 s : diff : -0.360484 +Grid : Message : 1767.283485 s : ********************************************************* + + 2f Full det MpcdagMpc 10^6 force ~ 1.8e6 +Grid : Message : 2399.576962 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 2399.576968 s : S1 : 1.52885e+09 +Grid : Message : 2399.576972 s : S2 : 1.52886e+09 +Grid : Message : 2399.576974 s : dS : 9728.49 +Grid : Message : 2399.576975 s : dSpred : 9726.58 +Grid : Message : 2399.576976 s : diff : 1.90683 +Grid : Message : 2399.576977 s : ********************************************************* + + 2f bdy MdagM 1500 force Force ~ 2800 +Grid : Message : 4622.385061 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 4622.385067 s : S1 : 1.52885e+09 +Grid : Message : 4622.385071 s : S2 : 1.52885e+09 +Grid : Message : 4622.385072 s : dS : 25.4944 +Grid : Message : 4622.385073 s : dSpred : 25.4672 +Grid : Message : 4622.385074 s : diff : 0.0271414 +Grid : Message : 4622.385075 s : ********************************************************* + + 2f bdy MpcdagMpc 10^6 force Force ~ 2200 +Grid : Message : 4622.385061 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 4622.385067 s : S1 : 1.52885e+09 +Grid : Message : 4622.385071 s : S2 : 1.52885e+09 +Grid : Message : 4622.385072 s : dS : 25.4944 +Grid : Message : 4622.385073 s : dSpred : 25.4672 +Grid : Message : 4622.385074 s : diff : 0.0271414 +Grid : Message : 4622.385075 s : ********************************************************* + + 1f Bdy Det +// +// These all had tol set by OFRp, not through MDpoles +// So assumptions it was Remez might be wrong. +// +Optimisation log: looser rational AND MD tolerances sloppy +MobiusForce.221179 -- same as HMC. dS is mispredicted Forece ~2.8 +Grid : Message : 6582.258991 s : dS : 0.024478 +Grid : Message : 6582.258992 s : dSpred : 0.00791876 +Grid : Message : 6582.258994 s : diff : 0.0165592 + +MobiusForce.221193 -- tight rational AND MD tolerances to 1e-8 ~ 2.8 same +Grid : Message : 1964.939209 s : S1 : 7.64404e+08 +Grid : Message : 1964.939213 s : S2 : 7.64404e+08 +Grid : Message : 1964.939215 s : dS : -0.00775838 <--- too loose even on action +Grid : Message : 1964.939216 s : dSpred : -0.00416793 +Grid : Message : 1964.939217 s : diff : -0.00359045 + +MobiusForce.221394 -- tight rational, MD tol sloppy Force ~ 2.8 +Grid : Message : 2376.921950 s : S1 : 764404436.44069 +Grid : Message : 2376.921954 s : S2 : 764404436.43299 +Grid : Message : 2376.921956 s : dS : -0.0076971054077148 +Grid : Message : 2376.921958 s : dSpred : -0.0041610472282526 +Grid : Message : 2376.921959 s : diff : -0.0035360581794623 + + +MobiusForce.221587 -- slightly sloppier action, coming from tol array + -- much sloppier force + -- degree 18 + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-6,3.0e-7,1.0e-7,1.0e-7, // Orig sloppy +Grid : Message : 2438.875507 s : S1 : 764404436.42251 +Grid : Message : 2438.875512 s : S2 : 764404436.4148 +Grid : Message : 2438.875514 s : dS : -0.0077102184295654 +Grid : Message : 2438.875516 s : dSpred : -0.0075684496959103 +Grid : Message : 2438.875517 s : diff : -0.00014176873365508 + +MobiusForce.221639 3.0e-6,1.0e-6,1.0e-7,1.0e-7, // soften convergence more + +Grid : Message : 2373.927550 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 2373.927600 s : S1 : 764404436.42251 +Grid : Message : 2373.927640 s : S2 : 764404436.4148 +Grid : Message : 2373.927660 s : dS : -0.0077102184295654 +Grid : Message : 2373.927680 s : dSpred : -0.0075993463919849 +Grid : Message : 2373.927690 s : diff : -0.00011087203758051 +Grid : Message : 2373.927700 s : ********************************************************* + + +Grid : Message : 69.269319 s : ApproxPowerMD shift[0] pole 9.5166866092503e-06 residue -2.0047722631555e-08 tol 3e-06 +Grid : Message : 69.269321 s : ApproxPowerMD shift[1] pole 4.7123486192778e-05 residue -1.316766030683e-07 tol 1e-06 +Grid : Message : 69.269323 s : ApproxPowerMD shift[2] pole 0.00014860967743736 residue -6.109883117444e-07 tol 1e-07 +Grid : Message : 69.269325 s : ApproxPowerMD shift[3] pole 0.00041055696132763 residue -2.6088717433891e-06 tol 1e-07 +Grid : Message : 69.269327 s : ApproxPowerMD shift[4] pole 0.0010822555692906 residue -1.0853799412802e-05 tol 1e-08 +Grid : Message : 69.269329 s : ApproxPowerMD shift[5] pole 0.0028029613512087 residue -4.4741734470158e-05 tol 1e-08 +Grid : Message : 69.269331 s : ApproxPowerMD shift[6] pole 0.0072103567378527 residue -0.00018380499193253 tol 1e-08 + +rusher 96I]$ more MobiusForce.221887 + 1.0e-5,3.0e-6,3.0e-7,1.0e-7, // soften convergence more more +// <-- this is the dirichlet solve, why poorer conditioned??? +Grid : Message : 1627.226206 s : ConjugateGradientMultiShift k=3643 Shift 3 has converged +Grid : Message : 1667.373045 s : ConjugateGradientMultiShift k=5381 Shift 2 has converged +Grid : Message : 1705.236992 s : ConjugateGradientMultiShift k=7063 Shift 1 has converged +Grid : Message : 1752.493182 s : ConjugateGradientMultiShift k=9220 Shift 0 has converged +// +//Grid : Message : 1414.837250 s : OneFlavourEvenOddRatioRationalPseudoFermionAction deriv: doing (M^dag M)^{-1/2} ( (V^dag V)^{1/4} Phi) +Grid : Message : 1523.416680 s : ConjugateGradientMultiShift k=3846 Shift 2 has converged +Grid : Message : 1530.798503 s : ConjugateGradientMultiShift k=4143 Shift 1 has converged +Grid : Message : 1536.153421 s : ConjugateGradientMultiShift k=4353 Shift 0 has converged <-- this is the non-dirichlet solve + +Grid : Message : 2339.927565 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 2339.927571 s : S1 : 764404436.42251 +Grid : Message : 2339.927575 s : S2 : 764404436.4148 +Grid : Message : 2339.927577 s : dS : -0.0077102184295654 +Grid : Message : 2339.927579 s : dSpred : -0.0068752425267964 +Grid : Message : 2339.927580 s : diff : -0.00083497590276901 +Grid : Message : 2339.927581 s : ********************************************************* +Grid : Message : 2339.927582 s : Done +Grid : Message : 2339.927582 s : ********************************************************* + +Force 76 S {S {S {(9.0175185326468,-3.5764415623768e-36)}}} +Force 77 S {S {S {(4.1289977678493,-4.3364721285803e-37)}}} +Force 78 S {S {S {(3.2299269465841,6.0391022273495e-37)}}} +Force 79 S {S {S {(3.0051199649288,-9.6243599973575e-37)}}} +Force 80 S {S {S {(2.8924316727872,-1.3371248240604e-37)}}} +Force 81 S {S {S {(2.8270868791781,1.792628885004e-37)}}} +Force 82 S {S {S {(2.8676819960087,-1.3518185034456e-36)}}} +Force 83 S {S {S {(2.7724152154523,1.4950818774521e-37)}}} +Force 84 S {S {S {(3.0204624534964,-9.6475025423893e-36)}}} +Force 85 S {S {S {(2.8631304063459,2.2426228161781e-37)}}} +Force 86 S {S {S {(2.9025673908905,-1.3942465026706e-36)}}} +Force 87 S {S {S {(2.8553405232646,-2.0938493124022e-38)}}} +Force 88 S {S {S {(3.2820184381375,-1.422348164495e-36)}}} +Force 89 S {S {S {(3.8974980085791,1.1682209795266e-35)}}} +Force 90 S {S {S {(4.660053618223,-1.4399805797573e-37)}}} +Force 91 S {S {S {(6.7993872372366,1.4524702072348e-36)}}} +Full +Grid : Message : 1523.416680 s : ConjugateGradientMultiShift k=3846 Shift 2 has converged +Grid : Message : 1530.798503 s : ConjugateGradientMultiShift k=4143 Shift 1 has converged +Grid : Message : 1536.153421 s : ConjugateGradientMultiShift k=4353 Shift 0 has converged +PV solve depth 3 +Grid : Message : 1667.373045 s : ConjugateGradientMultiShift k=5381 Shift 2 has converged +Grid : Message : 1705.236992 s : ConjugateGradientMultiShift k=7063 Shift 1 has converged +Grid : Message : 1752.493182 s : ConjugateGradientMultiShift k=9220 Shift 0 has converged + +MobiusForce.222490 depth 1 +Grid : Message : 2155.595070 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 2155.595076 s : S1 : 764404436.37475 +Grid : Message : 2155.595080 s : S2 : 764404436.21131 +Grid : Message : 2155.595082 s : dS : -0.16344606876373 +Grid : Message : 2155.595084 s : dSpred : -0.16235663327375 +Grid : Message : 2155.595085 s : diff : -0.0010894354899788 + +Force 4 S {S {S {(24.512489110423,-7.4203080895657e-36)}}} +Force 5 S {S {S {(14.442663101577,7.3909207307951e-37)}}} +Force 6 S {S {S {(12.298567945213,2.1989091200069e-36)}}} +Force 7 S {S {S {(11.582362859271,-2.2540104177017e-36)}}} +Force 8 S {S {S {(11.465725500906,-2.9512255045332e-36)}}} +Force 9 S {S {S {(10.869067954412,-2.8388188572358e-36)}}} +Force 10 S {S {S {(10.937111429576,-3.3530976357206e-36)}}} +Force 11 S {S {S {(11.23500117508,-1.4487967873885e-36)}}} +Force 12 S {S {S {(10.900736551834,5.1427877848475e-36)}}} Force is bigger +Force 13 S {S {S {(10.951921323651,-1.2098775605838e-35)}}} +Force 14 S {S {S {(10.676529230575,-2.50527233519e-36)}}} +Force 15 S {S {S {(10.98568474467,3.2193851533145e-36)}}} +Force 16 S {S {S {(11.931707726568,-8.5223340434616e-37)}}} +Force 17 S {S {S {(13.751904678482,7.6337337826369e-36)}}} +Force 18 S {S {S {(17.518955473833,1.8073225643893e-36)}}} +Force 19 S {S {S {(20.36519304598,-2.5184966466368e-36)}}} +Full solve +Grid : Message : 1441.297575 s : ConjugateGradientMultiShift k=3846 Shift 2 has converged +Grid : Message : 1449.206520 s : ConjugateGradientMultiShift k=4143 Shift 1 has converged +Grid : Message : 1454.352909 s : ConjugateGradientMultiShift k=4353 Shift 0 has converged + +Dirichlet solve -- why so expensive?? +Spectral radius worse? +Grid : Message : 1571.887003 s : ConjugateGradientMultiShift k=5195 Shift 2 has converged +Grid : Message : 1599.543760 s : ConjugateGradientMultiShift k=6508 Shift 1 has converged +Grid : Message : 1625.368198 s : ConjugateGradientMultiShift k=7819 Shift 0 has converged + + +dS is much bigger. + + +MobiusForce.223606 +Grid : Message : 1123.276405 s : ConjugateGradientMultiShift k=3273 Shift 0 has converged +Grid : Message : 1125.945359 s : ConjugateGradientMultiShift k=3407 Shift 1 has converged +Grid : Message : 1127.896580 s : ConjugateGradientMultiShift k=3508 Shift 2 has converged <-- 2 takes longer +first (bdy) hasenbusch mass raised to 0.005 -- reduces Dirchlet solve cost +Force looks ok still +Grid : Message : 1510.884960 s : OneFlavourEvenOddRatioRationalPseudoFermionAction compute action: complete +Grid : Message : 1510.969380 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 1510.969440 s : S1 : 764404436.37475 +Grid : Message : 1510.969480 s : S2 : 764404436.17379 +Grid : Message : 1510.969500 s : dS : -0.20095825195312 +Grid : Message : 1510.969520 s : dSpred : -0.20025674631954 +Grid : Message : 1510.969530 s : diff : -0.00070150563358654 +Force 76 S {S {S {(24.161229317675,2.0147973173094e-35)}}} +Force 77 S {S {S {(15.841085162729,3.983456481349e-36)}}} +Force 78 S {S {S {(11.031761776856,9.0394046210295e-35)}}} +Force 79 S {S {S {(12.177830066719,1.583978637733e-36)}}} +Force 80 S {S {S {(9.8372072482222,6.4284847310594e-37)}}} +Force 81 S {S {S {(9.6588863493149,1.0501572656659e-35)}}} +Force 82 S {S {S {(10.623076227724,-4.4161853392455e-35)}}} +Force 83 S {S {S {(8.9477003784221,-7.067659784319e-37)}}} +Force 84 S {S {S {(9.7663166497594,-2.1014900256825e-35)}}} +Force 85 S {S {S {(8.9992648919057,-4.7107936109203e-36)}}} +Force 86 S {S {S {(9.0399987268337,6.4652189295226e-37)}}} +Force 87 S {S {S {(9.1319052497073,7.9566273871284e-37)}}} +Force 88 S {S {S {(10.094569606113,-1.263656427134e-37)}}} +Force 89 S {S {S {(11.563679905523,-1.2777623593438e-35)}}} +Force 90 S {S {S {(13.653150474463,2.9093485182852e-37)}}} +Force 91 S {S {S {(16.303719912019,2.9857556510886e-36)}}} + +MobiusForce.223749 +first (bdy) hasenbusch mass raised to 0.01 -- reduces Dirchlet solve cost +Grid : Message : 1374.472462 s : OneFlavourEvenOddRatioRationalPseudoFermionAction compute action: complete +Grid : Message : 1374.479206 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 1374.479211 s : S1 : 764404436.37428 +Grid : Message : 1374.479215 s : S2 : 764404436.20009 +Grid : Message : 1374.479217 s : dS : -0.17418932914734 +Grid : Message : 1374.479219 s : dSpred : -0.17358090105485 +Grid : Message : 1374.479220 s : diff : -0.00060842809248995 +Force 76 S {S {S {(27.006858541753,4.2141472476979e-36)}}} +Force 77 S {S {S {(19.388701462694,-5.1620365048422e-35)}}} +Force 78 S {S {S {(13.502424539662,-2.4038859474316e-35)}}} +Force 79 S {S {S {(15.555776987064,6.0567346426118e-36)}}} +Force 80 S {S {S {(12.752116522904,-2.3720006631655e-35)}}} +Force 81 S {S {S {(12.656857824233,1.6912424972456e-35)}}} +Force 82 S {S {S {(15.159284452724,5.0898905390605e-36)}}} +Force 83 S {S {S {(12.222695136014,-2.2061824913027e-35)}}} +Force 84 S {S {S {(12.92077598466,9.6287681011731e-36)}}} +Force 85 S {S {S {(11.884630495484,2.822655809912e-36)}}} +Force 86 S {S {S {(11.896353116174,1.0926219990893e-35)}}} +Force 87 S {S {S {(11.557019282287,2.1532117771187e-35)}}} +Force 88 S {S {S {(11.945108384613,-3.0210204816133e-36)}}} +Force 89 S {S {S {(13.295373801078,7.3115748621146e-36)}}} +Force 90 S {S {S {(15.373728471417,-7.4923071185536e-36)}}} +Force 91 S {S {S {(17.348173714234,1.0344350287236e-36)}}} + +MobiusForce.223829 + 1.0e-5,5.0e-6,1.0e-6,1.0e-7, // soften convergence more more +Grid : Message : 1000.951387 s : ConjugateGradientMultiShift k=1881 Shift 0 has converged +Grid : Message : 1002.619542 s : ConjugateGradientMultiShift k=1960 Shift 1 has converged +Grid : Message : 1003.726982 s : ConjugateGradientMultiShift k=2014 Shift 4 has converged +Grid : Message : 1005.698741 s : ConjugateGradientMultiShift k=2113 Shift 2 has converged +Grid : Message : 1007.320875 s : ConjugateGradientMultiShift k=2197 Shift 3 has converged +Grid : Message : 1351.171259 s : S1 : 764404436.37428 +Grid : Message : 1351.171263 s : S2 : 764404436.20009 +Grid : Message : 1351.171265 s : dS : -0.17418932914734 +Grid : Message : 1351.171266 s : dSpred : -0.1743248065338 +Grid : Message : 1351.171267 s : diff : 0.00013547738646566 +Force 76 S {S {S {(27.004288088317,6.035575744297e-35)}}} +Force 77 S {S {S {(19.388023720604,-6.9736202362532e-36)}}} +Force 78 S {S {S {(13.502663916173,6.4067380855692e-35)}}} +Force 79 S {S {S {(15.55135748152,1.7219522871608e-35)}}} +Force 80 S {S {S {(12.75135802213,-1.1303847551095e-35)}}} +Force 81 S {S {S {(12.655732786276,1.689773129307e-36)}}} +Force 82 S {S {S {(15.158469055699,-6.7205950772387e-35)}}} +Force 83 S {S {S {(12.222907191126,-1.6775773754173e-35)}}} +Force 84 S {S {S {(12.916025368247,-1.9641041234302e-35)}}} +Force 85 S {S {S {(11.881879452577,-2.3054382955502e-36)}}} +Force 86 S {S {S {(11.897253557199,-3.3617669065579e-35)}}} +Force 87 S {S {S {(11.55717723524,-1.8690360178074e-36)}}} +Force 88 S {S {S {(11.945590605851,-6.7208889508264e-36)}}} +Force 89 S {S {S {(13.298173932749,-1.0322309768158e-35)}}} +Force 90 S {S {S {(15.373845416836,7.4158999857501e-36)}}} +Force 91 S {S {S {(17.348058307158,-1.8514036025451e-36)}}} +-- could make the stopping condition mandatory if shift 0 is converged. +-- Save 20% of iterations and single tunable +*/ + +// +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_double_ratio.cc + + Copyright (C) 2022 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +typedef MobiusFermionD FermionAction; +typedef WilsonImplD FimplD; +typedef WilsonImplD FermionImplPolicy; + +template +void ForceTest(Action &action,LatticeGaugeField & U,MomentumFilterBase &Filter) +{ + GridBase *UGrid = U.Grid(); + + std::vector seeds({1,2,3,5}); + GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds); + + LatticeColourMatrix Pmu(UGrid); + LatticeGaugeField P(UGrid); + LatticeGaugeField UdSdU(UGrid); + + std::cout << GridLogMessage << "*********************************************************"<(UdSdU,mu); + Pmu= PeekIndex(P,mu); + dS = dS - trace(Pmu*UdSdUmu)*eps*2.0*2.0; + } + ComplexD dSpred = sum(dS); + RealD diff = S2-S1-dSpred.real(); + + std::cout<< GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt_size[0]/mpi_layout[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt_size[1]/mpi_layout[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt_size[2]/mpi_layout[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt_size[3]/mpi_layout[3] * shm[3]; + + Coordinate Block4(Nd); + Block4[0] = Dirichlet[1]; + Block4[1] = Dirichlet[2]; + Block4[2] = Dirichlet[3]; + Block4[3] = Dirichlet[4]; + + std::vector boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + FermionAction::ImplParams ParamsDir(boundary); + Params.dirichlet=NonDirichlet; + ParamsDir.dirichlet=Dirichlet; + ParamsDir.partialDirichlet=1; + + ///////////////////// Gauge Field and Gauge Forces //////////////////////////// + LatticeGaugeField U(UGrid); + + RealD beta=6.0; + WilsonGaugeActionR PlaqAction(beta); + IwasakiGaugeActionR RectAction(beta); + + MomentumFilterNone FilterNone; + ForceTest(PlaqAction,U,FilterNone); + ForceTest(RectAction,U,FilterNone); + + //////////////////////////////////// + // Action + //////////////////////////////////// + RealD mass=0.00078; + RealD dmass=0.01; + RealD pvmass=1.0; + RealD M5=1.8; + RealD b=1.5; + RealD c=0.5; + + // Double versions + FermionAction DdwfPeriodic(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,Params); + FermionAction PVPeriodic (U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,pvmass,M5,b,c,Params); + FermionAction DdwfDirichlet(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,dmass,M5,b,c,ParamsDir); + + double StoppingCondition = 1.0e-8; + double MaxCGIterations = 50000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + + //////////////////// Two Flavour Determinant Ratio /////////////////////////////// + TwoFlavourRatioPseudoFermionAction Nf2(PVPeriodic, DdwfPeriodic,CG,CG); + // ForceTest(Nf2,U,FilterNone); + + //////////////////// Two Flavour Determinant force test Even Odd /////////////////////////////// + TwoFlavourEvenOddRatioPseudoFermionAction Nf2eo(PVPeriodic, DdwfPeriodic,CG,CG); + // ForceTest(Nf2eo,U,FilterNone); + + //////////////////// Domain forces //////////////////// + int Width=4; + DDHMCFilter DDHMCFilter(Block4,Width); + + //////////////////// Two flavour boundary det //////////////////// + TwoFlavourRatioPseudoFermionAction BdyNf2(DdwfDirichlet, DdwfPeriodic,CG,CG); + // ForceTest(BdyNf2,U,DDHMCFilter); + + //////////////////// Two flavour eo boundary det //////////////////// + TwoFlavourEvenOddRatioPseudoFermionAction BdyNf2eo(DdwfDirichlet, DdwfPeriodic,CG,CG); + // ForceTest(BdyNf2eo,U,DDHMCFilter); + + //////////////////// One flavour boundary det //////////////////// + /* + RationalActionParams OFRp; // Up/down + int SP_iters = 3000; + OFRp.lo = 6.0e-5; + OFRp.hi = 90.0; + OFRp.inv_pow = 2; + OFRp.MaxIter = SP_iters; // get most shifts by 2000, stop sharing space + OFRp.action_tolerance= 1.0e-8; + OFRp.action_degree = 18; + OFRp.md_tolerance= 1.0e-5; + OFRp.md_degree = 14; + // OFRp.degree = 20; converges + // OFRp.degree = 16; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + */ + OneFlavourRationalParams OFRp; // Up/down + OFRp.lo = 4.0e-5; + OFRp.hi = 90.0; + OFRp.MaxIter = 60000; + OFRp.tolerance= 1.0e-9; + OFRp.mdtolerance= 1.0e-8; + OFRp.degree = 18; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + std::vector ActionTolByPole({ + 1.0e-7,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8 + }); + std::vector MDTolByPole({ + 1.6e-5,5.0e-6,1.0e-6,3.0e-7, // soften convergence more more + // 1.0e-6,3.0e-7,1.0e-7,1.0e-7, + // 3.0e-6,1.0e-6,1.0e-7,1.0e-7, // soften convergence + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8 + }); + /* + std::vector ActionTolByPole({ + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8 + }); + std::vector MDTolByPole({ + 1.0e-5,5.0e-6,1.0e-6,1.0e-7, // soften convergence more more + // 3.0e-6,1.0e-6,1.0e-7,1.0e-7, // soften convergence more + // 1.0e-6,3.0e-7,1.0e-7,1.0e-7, // Orig sloppy + // 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8 + }); + */ + OneFlavourEvenOddRatioRationalPseudoFermionAction BdySqrt(DdwfDirichlet,DdwfPeriodic,OFRp); + BdySqrt.SetTolerances(ActionTolByPole,MDTolByPole); + ForceTest(BdySqrt,U,DDHMCFilter); + + Grid_finalize(); +} diff --git a/tests/forces/Test_dwf_force.cc b/tests/forces/Test_dwf_force.cc index e7d17347..1ae28bb2 100644 --- a/tests/forces/Test_dwf_force.cc +++ b/tests/forces/Test_dwf_force.cc @@ -67,7 +67,7 @@ int main (int argc, char ** argv) //////////////////////////////////// RealD mass=0.01; RealD M5=1.8; - DomainWallFermionR Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionD Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); Ddwf.M (phi,Mphi); ComplexD S = innerProduct(Mphi,Mphi); // pdag MdagM p diff --git a/tests/forces/Test_dwf_force_eofa.cc b/tests/forces/Test_dwf_force_eofa.cc index 525178d0..d820573b 100644 --- a/tests/forces/Test_dwf_force_eofa.cc +++ b/tests/forces/Test_dwf_force_eofa.cc @@ -80,8 +80,8 @@ int main (int argc, char** argv) RealD mf = 0.01; RealD mb = 1.0; RealD M5 = 1.8; - DomainWallEOFAFermionR Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5); - DomainWallEOFAFermionR Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5); + DomainWallEOFAFermionD Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5); + DomainWallEOFAFermionD Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5); OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12); ConjugateGradient CG(1.0e-12, 5000); ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, CG, CG, CG, CG, Params, true); diff --git a/tests/forces/Test_dwf_gpforce.cc b/tests/forces/Test_dwf_gpforce.cc index 1fa1c6e4..72d30369 100644 --- a/tests/forces/Test_dwf_gpforce.cc +++ b/tests/forces/Test_dwf_gpforce.cc @@ -47,7 +47,7 @@ int main (int argc, char ** argv) GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - typedef typename GparityDomainWallFermionR::FermionField FermionField; + typedef typename GparityDomainWallFermionD::FermionField FermionField; int threads = GridThread::GetThreads(); std::cout< twists(Nd,0); // twists[nu] = 1; - // GparityDomainWallFermionR::ImplParams params; params.twists = twists; - // GparityDomainWallFermionR Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params); - // DomainWallFermionR Dw (U, Grid,RBGrid,mass,M5); - - const int nu = 3; + const int nu = 0; //gparity direction std::vector twists(Nd,0); twists[nu] = 1; - GparityDomainWallFermionR::ImplParams params; + twists[Nd-1] = 1; //antiperiodic in time + GparityDomainWallFermionD::ImplParams params; params.twists = twists; - - /* - params.boundary_phases[0] = 1.0; - params.boundary_phases[1] = 1.0; - params.boundary_phases[2] = 1.0; - params.boundary_phases[3] =- 1.0; - */ - - GparityDomainWallFermionR Dw(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params); + + GparityDomainWallFermionD Dw(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params); Dw.M (phi,Mphi); diff --git a/tests/forces/Test_dwf_gpforce_eofa.cc b/tests/forces/Test_dwf_gpforce_eofa.cc index fd47d33c..08923faa 100644 --- a/tests/forces/Test_dwf_gpforce_eofa.cc +++ b/tests/forces/Test_dwf_gpforce_eofa.cc @@ -33,7 +33,7 @@ using namespace std; using namespace Grid; typedef GparityWilsonImplR FermionImplPolicy; -typedef GparityDomainWallEOFAFermionR FermionAction; +typedef GparityDomainWallEOFAFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; int main (int argc, char** argv) diff --git a/tests/forces/Test_gpdwf_force.cc b/tests/forces/Test_gpdwf_force.cc index d6744080..58dbfc47 100644 --- a/tests/forces/Test_gpdwf_force.cc +++ b/tests/forces/Test_gpdwf_force.cc @@ -56,7 +56,7 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); std::cout< twists(Nd,0); twists[nu] = 1; - GparityDomainWallFermionR::ImplParams params; params.twists = twists; - GparityDomainWallFermionR Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params); + const int nu = 1; + std::vector twists(Nd,0); + twists[nu] = 1; + twists[3] = 1; + GparityDomainWallFermionD::ImplParams params; params.twists = twists; + GparityDomainWallFermionD Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params); Ddwf.M (phi,Mphi); ComplexD S = innerProduct(Mphi,Mphi); // pdag MdagM p diff --git a/tests/forces/Test_gpdwf_force_1f_2f.cc b/tests/forces/Test_gpdwf_force_1f_2f.cc new file mode 100644 index 00000000..c343b7ac --- /dev/null +++ b/tests/forces/Test_gpdwf_force_1f_2f.cc @@ -0,0 +1,446 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./forces/Test_gpdwf_force_1f_2f.cc + + Copyright (C) 2015 + +Author: Christopher Kelly +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +//Here we test the G-parity action and force between the 1f (doubled-lattice) and 2f approaches + + +void copyConjGauge(LatticeGaugeFieldD &Umu_1f, const LatticeGaugeFieldD &Umu_2f, const int nu){ + GridBase* UGrid_2f = Umu_2f.Grid(); + GridBase* UGrid_1f = Umu_1f.Grid(); + + Replicate(Umu_2f,Umu_1f); + + int L_2f = UGrid_2f->FullDimensions()[nu]; + int L_1f = UGrid_1f->FullDimensions()[nu]; + assert(L_1f == 2 * L_2f); + + //Coordinate grid for reference + LatticeInteger xcoor_1f(UGrid_1f); + LatticeCoordinate(xcoor_1f,nu); + + //Copy-conjugate the gauge field + //First C-shift the lattice by Lx/2 + { + LatticeGaugeField Umu_shift = conjugate( Cshift(Umu_1f,nu,L_2f) ); + Umu_1f = where( xcoor_1f >= Integer(L_2f), Umu_shift, Umu_1f ); + + //We use the in built APBC + //Make the gauge field antiperiodic in nu-direction + //decltype(PeekIndex(Umu_1f,nu)) Unu(UGrid_1f); + //Unu = PeekIndex(Umu_1f,nu); + //Unu = where(xcoor_1f == Integer(2*L_2f-1), -Unu, Unu); + //PokeIndex(Umu_1f,Unu,nu); + } +} + +template +void convertFermion1f_from_2f(FermionField1f &out_1f, const FermionField2f &in_2f, const int nu, bool is_4d){ + GridBase* FGrid_1f = out_1f.Grid(); + GridBase* FGrid_2f = in_2f.Grid(); + + int nuoff = is_4d ? 0 : 1; //s in 0 direction + + Integer L_2f = FGrid_2f->FullDimensions()[nu+nuoff]; + Integer L_1f = FGrid_1f->FullDimensions()[nu+nuoff]; + assert(L_1f == 2 * L_2f); + + auto in_f0_2fgrid = PeekIndex(in_2f,0); //flavor 0 on 2f Grid + FermionField1f in_f0_1fgrid(FGrid_1f); + Replicate(in_f0_2fgrid, in_f0_1fgrid); //has flavor 0 on both halves + + auto in_f1_2fgrid = PeekIndex(in_2f,1); //flavor 1 on 2f Grid + FermionField1f in_f1_1fgrid(FGrid_1f); + Replicate(in_f1_2fgrid, in_f1_1fgrid); //has flavor 1 on both halves + + LatticeInteger xcoor_1f(FGrid_1f); + LatticeCoordinate(xcoor_1f,nu+nuoff); + + out_1f = where(xcoor_1f < L_2f, in_f0_1fgrid, in_f1_1fgrid); +} + +template +class RatioActionSetupBase{ +protected: + TwoFlavourEvenOddRatioPseudoFermionAction *pf_1f; + TwoFlavourEvenOddRatioPseudoFermionAction *pf_2f; + + GparityAction* action_2f; + GparityAction* action_PV_2f; + StandardAction* action_1f; + StandardAction* action_PV_1f; + + ConjugateGradient CG_1f; + ConjugateGradient CG_2f; + + RatioActionSetupBase(): CG_1f(1.0e-8,10000), CG_2f(1.0e-8,10000){} + + void setupPseudofermion(){ + pf_1f = new TwoFlavourEvenOddRatioPseudoFermionAction(*action_PV_1f, *action_1f, CG_1f, CG_1f); + pf_2f = new TwoFlavourEvenOddRatioPseudoFermionAction(*action_PV_2f, *action_2f, CG_2f, CG_2f); + } + +public: + GparityAction & action2f(){ return *action_2f; } + StandardAction & action1f(){ return *action_1f; } + + void refreshAction(LatticeGaugeField &Umu_2f, typename GparityAction::FermionField &eta_2f, + LatticeGaugeField &Umu_1f, typename StandardAction::FermionField &eta_1f){ + pf_1f->refresh(Umu_1f, eta_1f); + pf_2f->refresh(Umu_2f, eta_2f); + + //Compare PhiOdd + RealD norm_1f = norm2(pf_1f->getPhiOdd()); + RealD norm_2f = norm2(pf_2f->getPhiOdd()); + + std::cout << "Test PhiOdd 2f: " << norm_2f << " 1f: " << norm_1f << std::endl; + } + + void computeAction(RealD &S_2f, RealD &S_1f, LatticeGaugeField &Umu_2f, LatticeGaugeField &Umu_1f){ + S_1f = pf_1f->S(Umu_1f); + S_2f = pf_2f->S(Umu_2f); + } + + void computeDeriv(LatticeGaugeField &deriv_2f, LatticeGaugeField &deriv_1f, LatticeGaugeField &Umu_2f, LatticeGaugeField &Umu_1f){ + pf_1f->deriv(Umu_1f, deriv_1f); + pf_2f->deriv(Umu_2f, deriv_2f); + } + +}; + + + + +template +struct setupAction{}; + +template<> +struct setupAction: public RatioActionSetupBase{ + typedef GparityWilsonTMFermionD GparityAction; + typedef WilsonTMFermionD StandardAction; + + setupAction(GridCartesian* UGrid_2f, GridRedBlackCartesian* UrbGrid_2f, GridCartesian* FGrid_2f, GridRedBlackCartesian* FrbGrid_2f, + GridCartesian* UGrid_1f, GridRedBlackCartesian* UrbGrid_1f, GridCartesian* FGrid_1f, GridRedBlackCartesian* FrbGrid_1f, + LatticeGaugeField &Umu_2f, LatticeGaugeField &Umu_1f, int nu): RatioActionSetupBase(){ + RealD mass=-1.8; + //Use same DSDR twists as https://arxiv.org/pdf/1208.4412.pdf + RealD epsilon_f = 0.02; //numerator (in determinant) + RealD epsilon_b = 0.5; + + std::vector twists(Nd,0); + twists[nu] = 1; //GPBC in y + twists[3] = 1; //APBC + GparityAction::ImplParams params_2f; params_2f.twists = twists; + action_2f = new GparityWilsonTMFermionD(Umu_2f,*UGrid_2f,*UrbGrid_2f, mass, epsilon_f, params_2f); + action_PV_2f = new GparityWilsonTMFermionD(Umu_2f,*UGrid_2f,*UrbGrid_2f, mass, epsilon_b, params_2f); + + DomainWallFermionD::ImplParams params_1f; + params_1f.boundary_phases[nu] = -1; + params_1f.boundary_phases[3] = -1; + + action_1f = new WilsonTMFermionD(Umu_1f,*UGrid_1f,*UrbGrid_1f, mass, epsilon_f, params_1f); + action_PV_1f = new WilsonTMFermionD(Umu_1f,*UGrid_1f,*UrbGrid_1f, mass, epsilon_b, params_1f); + + setupPseudofermion(); + } + + static bool is4d(){ return true; } +}; + + +template<> +struct setupAction: public RatioActionSetupBase{ + typedef GparityDomainWallFermionD GparityAction; + typedef DomainWallFermionD StandardAction; + + setupAction(GridCartesian* UGrid_2f, GridRedBlackCartesian* UrbGrid_2f, GridCartesian* FGrid_2f, GridRedBlackCartesian* FrbGrid_2f, + GridCartesian* UGrid_1f, GridRedBlackCartesian* UrbGrid_1f, GridCartesian* FGrid_1f, GridRedBlackCartesian* FrbGrid_1f, + LatticeGaugeField &Umu_2f, LatticeGaugeField &Umu_1f, int nu): RatioActionSetupBase(){ + RealD mass=0.01; + RealD M5=1.8; + + std::vector twists(Nd,0); + twists[nu] = 1; //GPBC in y + twists[3] = 1; //APBC + GparityDomainWallFermionD::ImplParams params_2f; params_2f.twists = twists; + action_2f = new GparityDomainWallFermionD(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5,params_2f); + action_PV_2f = new GparityDomainWallFermionD(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,1.0,M5,params_2f); + + DomainWallFermionD::ImplParams params_1f; + params_1f.boundary_phases[nu] = -1; + params_1f.boundary_phases[3] = -1; + + action_1f = new DomainWallFermionD(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5,params_1f); + action_PV_1f = new DomainWallFermionD(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,1.0,M5,params_1f); + + setupPseudofermion(); + } + + static bool is4d(){ return false; } +}; + + + + + +//For EOFA we need a different pseudofermion type +template<> +struct setupAction{ + typedef GparityDomainWallEOFAFermionD GparityAction; + typedef DomainWallEOFAFermionD StandardAction; + + ExactOneFlavourRatioPseudoFermionAction *pf_1f; + ExactOneFlavourRatioPseudoFermionAction *pf_2f; + + GparityAction* action_2f; + GparityAction* action_PV_2f; + StandardAction* action_1f; + StandardAction* action_PV_1f; + + ConjugateGradient CG_1f; + ConjugateGradient CG_2f; + +public: + GparityAction & action2f(){ return *action_2f; } + StandardAction & action1f(){ return *action_1f; } + + void refreshAction(LatticeGaugeField &Umu_2f, typename GparityAction::FermionField &eta_2f, + LatticeGaugeField &Umu_1f, typename StandardAction::FermionField &eta_1f){ + pf_1f->refresh(Umu_1f, eta_1f); + pf_2f->refresh(Umu_2f, eta_2f); + + //Compare PhiOdd + RealD norm_1f = norm2(pf_1f->getPhi()); + RealD norm_2f = norm2(pf_2f->getPhi()); + + std::cout << "Test Phi 2f: " << norm_2f << " 1f: " << norm_1f << std::endl; + } + + void computeAction(RealD &S_2f, RealD &S_1f, LatticeGaugeField &Umu_2f, LatticeGaugeField &Umu_1f){ + S_1f = pf_1f->S(Umu_1f); + S_2f = pf_2f->S(Umu_2f); + } + + void computeDeriv(LatticeGaugeField &deriv_2f, LatticeGaugeField &deriv_1f, LatticeGaugeField &Umu_2f, LatticeGaugeField &Umu_1f){ + pf_1f->deriv(Umu_1f, deriv_1f); + pf_2f->deriv(Umu_2f, deriv_2f); + } + + + setupAction(GridCartesian* UGrid_2f, GridRedBlackCartesian* UrbGrid_2f, GridCartesian* FGrid_2f, GridRedBlackCartesian* FrbGrid_2f, + GridCartesian* UGrid_1f, GridRedBlackCartesian* UrbGrid_1f, GridCartesian* FGrid_1f, GridRedBlackCartesian* FrbGrid_1f, + LatticeGaugeField &Umu_2f, LatticeGaugeField &Umu_1f, int nu): CG_1f(1.0e-8,10000), CG_2f(1.0e-8,10000){ + RealD mass=0.01; + RealD M5=1.8; + + std::vector twists(Nd,0); + twists[nu] = 1; //GPBC in y + twists[3] = 1; //APBC + GparityAction::ImplParams params_2f; params_2f.twists = twists; + action_2f = new GparityAction(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f, mass, mass, 1.0, 0.0, -1, M5, params_2f); + action_PV_2f = new GparityAction(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f, 1.0, mass, 1.0, -1.0, 1, M5, params_2f); //cf Test_dwf_gpforce_eofa.cc + + StandardAction::ImplParams params_1f; + params_1f.boundary_phases[nu] = -1; + params_1f.boundary_phases[3] = -1; + + action_1f = new StandardAction(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f, mass, mass, 1.0, 0.0, -1, M5, params_1f); + action_PV_1f = new StandardAction(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f, 1.0, mass, 1.0, -1.0, 1, M5, params_1f); + + OneFlavourRationalParams RationalParams(0.95, 100.0, 5000, 1.0e-12, 12); + + pf_1f = new ExactOneFlavourRatioPseudoFermionAction(*action_1f, *action_PV_1f, CG_1f, CG_1f, CG_1f, CG_1f, CG_1f, RationalParams, true); + pf_2f = new ExactOneFlavourRatioPseudoFermionAction(*action_2f, *action_PV_2f, CG_2f, CG_2f, CG_2f, CG_2f, CG_2f, RationalParams, true); + } + + static bool is4d(){ return false; } +}; + + +template +void runTest(int argc, char** argv){ + Grid_init(&argc,&argv); + + const int nu = 1; + Coordinate latt_2f = GridDefaultLatt(); + Coordinate latt_1f = latt_2f; + latt_1f[nu] *= 2; + + Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); + + const int Ls=8; + + GridCartesian * UGrid_1f = SpaceTimeGrid::makeFourDimGrid(latt_1f, simd_layout, mpi_layout); + GridRedBlackCartesian * UrbGrid_1f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_1f); + GridCartesian * FGrid_1f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_1f); + GridRedBlackCartesian * FrbGrid_1f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_1f); + + + GridCartesian * UGrid_2f = SpaceTimeGrid::makeFourDimGrid(latt_2f, simd_layout, mpi_layout); + GridRedBlackCartesian * UrbGrid_2f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_2f); + GridCartesian * FGrid_2f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_2f); + GridRedBlackCartesian * FrbGrid_2f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_2f); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5_2f(FGrid_2f); RNG5_2f.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu_2f(UGrid_2f); + SU::HotConfiguration(RNG4_2f,Umu_2f); + + LatticeGaugeField Umu_1f(UGrid_1f); + copyConjGauge(Umu_1f, Umu_2f, nu); + + typedef typename GparityAction::FermionField GparityFermionField; + typedef typename StandardAction::FermionField StandardFermionField; + + setupAction setup(UGrid_2f, UrbGrid_2f, FGrid_2f, FrbGrid_2f, + UGrid_1f, UrbGrid_1f, FGrid_1f, FrbGrid_1f, + Umu_2f, Umu_1f, nu); + GridBase* FGrid_2f_a = setup.action2f().FermionGrid(); + GridBase* FGrid_1f_a = setup.action1f().FermionGrid(); + GridBase* FrbGrid_2f_a = setup.action2f().FermionRedBlackGrid(); + GridBase* FrbGrid_1f_a = setup.action1f().FermionRedBlackGrid(); + bool is_4d = setup.is4d(); + + //Check components by doing an inversion + { + setup.action2f().ImportGauge(Umu_2f); + setup.action1f().ImportGauge(Umu_1f); + + GparityFermionField src_2f(FGrid_2f_a); + gaussian(is_4d ? RNG4_2f : RNG5_2f, src_2f); + + StandardFermionField src_1f(FGrid_1f_a); + convertFermion1f_from_2f(src_1f, src_2f, nu, is_4d); + + StandardFermionField src_o_1f(FrbGrid_1f_a); + StandardFermionField result_o_1f(FrbGrid_1f_a); + pickCheckerboard(Odd,src_o_1f,src_1f); + result_o_1f=Zero(); + + SchurDiagMooeeOperator HermOpEO_1f(setup.action1f()); + ConjugateGradient CG_1f(1.0e-8,10000); + CG_1f(HermOpEO_1f,src_o_1f,result_o_1f); + + + GparityFermionField src_o_2f(FrbGrid_2f_a); + GparityFermionField result_o_2f(FrbGrid_2f_a); + pickCheckerboard(Odd,src_o_2f,src_2f); + result_o_2f=Zero(); + + SchurDiagMooeeOperator HermOpEO_2f(setup.action2f()); + ConjugateGradient CG_2f(1.0e-8,10000); + CG_2f(HermOpEO_2f,src_o_2f,result_o_2f); + + RealD norm_1f = norm2(result_o_1f); + RealD norm_2f = norm2(result_o_2f); + + std::cout << "Test fermion inversion 2f: " << norm_2f << " 1f: " << norm_1f << std::endl; + } + + //Generate eta + RealD scale = std::sqrt(0.5); + + GparityFermionField eta_2f(FGrid_2f_a); + gaussian(is_4d ? RNG4_2f : RNG5_2f,eta_2f); eta_2f = eta_2f * scale; + + StandardFermionField eta_1f(FGrid_1f_a); + convertFermion1f_from_2f(eta_1f, eta_2f, nu, is_4d); + + setup.refreshAction(Umu_2f, eta_2f, Umu_1f, eta_1f); + + //Initial action is just |eta^2| + RealD S_1f, S_2f; + + setup.computeAction(S_2f, S_1f, Umu_2f, Umu_1f); + + std::cout << "Test Initial action 2f: " << S_2f << " 1f: " << S_1f << " diff: " << S_2f - S_1f << std::endl; + + //Do a random gauge field refresh + SU::HotConfiguration(RNG4_2f,Umu_2f); + copyConjGauge(Umu_1f, Umu_2f, nu); + + //Compute the action again + setup.computeAction(S_2f, S_1f, Umu_2f, Umu_1f); + + std::cout << "Test Action after gauge field randomize 2f: " << S_2f << " 1f: " << S_1f << " diff: " << S_2f - S_1f << std::endl; + + //Compute the derivative and test the conjugate relation + LatticeGaugeField deriv_2f(UGrid_2f); + LatticeGaugeField deriv_1f(UGrid_1f); + setup.computeDeriv(deriv_2f, deriv_1f, Umu_2f, Umu_1f); + + //Have to combine the two forces on the 1f by symmetrizing under the complex conjugate + { + RealD norm2_pre = norm2(deriv_1f); + LatticeGaugeField deriv_1f_shift = conjugate( Cshift(deriv_1f, nu, latt_2f[nu]) ); + deriv_1f = deriv_1f + deriv_1f_shift; + std::cout << "Test combine/symmetrize forces on 1f lattice, dS/dU : " << norm2_pre << " -> " << norm2(deriv_1f) << std::endl; + } + + LatticeGaugeField deriv_1f_from_2f(UGrid_1f); + copyConjGauge(deriv_1f_from_2f, deriv_2f, nu); + std::cout << "Test copy-conj 2f dS/dU to obtain equivalent 1f force : " << norm2(deriv_2f) << " -> " << norm2(deriv_1f_from_2f) << std::endl; + + LatticeGaugeField diff_deriv_1f = deriv_1f - deriv_1f_from_2f; + + std::cout << "Test dS/dU 1f constructed from 2f derivative: " << norm2(deriv_1f_from_2f) << " dS/dU 1f actual: " << norm2(deriv_1f) << " Norm of difference: " << norm2(diff_deriv_1f) << std::endl; + + std::cout<< GridLogMessage << "Done" <(argc, argv); + }else if(action == "EOFA"){ + runTest(argc, argv); + }else if(action == "DSDR"){ + runTest(argc,argv); + }else{ + assert(0); + } +} diff --git a/tests/forces/Test_gpwilson_force.cc b/tests/forces/Test_gpwilson_force.cc index d731f27a..4c3380fe 100644 --- a/tests/forces/Test_gpwilson_force.cc +++ b/tests/forces/Test_gpwilson_force.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); std::cout< twists(Nd,0); twists[nu] = 1; - GparityWilsonFermionR::ImplParams params; params.twists = twists; - GparityWilsonFermionR Wil(U,*UGrid,*UrbGrid,mass,params); + const int nu = 1; + const int Lnu=latt_size[nu]; + + std::vector twists(Nd,0); + twists[nu] = 1; + twists[3]=1; + GparityWilsonFermionD::ImplParams params; params.twists = twists; + GparityWilsonFermionD Wil(U,*UGrid,*UrbGrid,mass,params); Wil.M (phi,Mphi); ComplexD S = innerProduct(Mphi,Mphi); // pdag MdagM p diff --git a/tests/forces/Test_mobius_force.cc b/tests/forces/Test_mobius_force.cc index d2326a81..3518007c 100644 --- a/tests/forces/Test_mobius_force.cc +++ b/tests/forces/Test_mobius_force.cc @@ -76,7 +76,7 @@ int main (int argc, char ** argv) p.boundary_phases[2] = 1.0; p.boundary_phases[3] =- 1.0; - MobiusFermionR Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,p); + MobiusFermionD Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,p); Ddwf.M (phi,Mphi); ComplexD S = innerProduct(Mphi,Mphi); // pdag MdagM p diff --git a/tests/forces/Test_mobius_force_eofa.cc b/tests/forces/Test_mobius_force_eofa.cc index eea3e3f4..a8871faa 100644 --- a/tests/forces/Test_mobius_force_eofa.cc +++ b/tests/forces/Test_mobius_force_eofa.cc @@ -82,14 +82,56 @@ int main (int argc, char** argv) RealD mf = 0.01; RealD mb = 1.0; RealD M5 = 1.8; - MobiusEOFAFermionR Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5, b, c); - MobiusEOFAFermionR Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, b, c); + MobiusEOFAFermionD Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5, b, c); + MobiusEOFAFermionD Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, b, c); OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12); ConjugateGradient CG(1.0e-12, 5000); ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, CG, CG, CG, CG, Params, false); GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds4); + + //Check the rational approximation + { + RealD scale = std::sqrt(0.5); + LatticeFermion eta (Lop.FermionGrid()); + gaussian(RNG5,eta); eta = eta * scale; + + Meofa.refresh(U, eta); + + //Phi = M^{-1/2} eta + //M is Hermitian + //(Phi, M Phi) = eta^\dagger M^{-1/2} M M^{-1/2} eta = eta^\dagger eta + LatticeFermion phi = Meofa.getPhi(); + LatticeFermion Mphi(FGrid); + + Meofa.Meofa(U, phi, Mphi); + std::cout << "Computing inner product" << std::endl; + ComplexD inner = innerProduct(phi, Mphi); + ComplexD test = inner - norm2(eta); + + std::cout << "(phi, Mphi) - (eta,eta): " << test << " expect 0" << std::endl; + + assert(test.real() < 1e-8); + assert(test.imag() < 1e-8); + + //Another test is to use heatbath twice to apply M^{-1/2} to Phi then apply M + // M Phi' + //= M M^{-1/2} Phi + //= M M^{-1/2} M^{-1/2} eta + //= eta + Meofa.refresh(U, phi); + LatticeFermion phi2 = Meofa.getPhi(); + LatticeFermion test2(FGrid); + Meofa.Meofa(U, phi2, test2); + test2 = test2 - eta; + RealD test2_norm = norm2(test2); + std::cout << "|M M^{-1/2} M^{-1/2} eta - eta|^2 = " << test2_norm << " expect 0" << std::endl; + assert( test2_norm < 1e-8 ); + } + + Meofa.refresh(U, sRNG, RNG5 ); + RealD S = Meofa.S(U); // pdag M p // get the deriv of phidag M phi with respect to "U" diff --git a/tests/forces/Test_mobius_gparity_eofa_mixed.cc b/tests/forces/Test_mobius_gparity_eofa_mixed.cc new file mode 100644 index 00000000..d490e838 --- /dev/null +++ b/tests/forces/Test_mobius_gparity_eofa_mixed.cc @@ -0,0 +1,233 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/forces/Test_mobius_gparity_eofa_mixed.cc + +Copyright (C) 2017 + +Author: Christopher Kelly +Author: Peter Boyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +using namespace std; +using namespace Grid; + ; + +typedef GparityWilsonImplD FermionImplPolicyD; +typedef GparityMobiusEOFAFermionD FermionActionD; +typedef typename FermionActionD::FermionField FermionFieldD; + +typedef GparityWilsonImplF FermionImplPolicyF; +typedef GparityMobiusEOFAFermionF FermionActionF; +typedef typename FermionActionF::FermionField FermionFieldF; + +NAMESPACE_BEGIN(Grid); + + template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.) + { + }; + + void operator()(LinearOperatorBase &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); + assert(&(SchurOpU->_Mat)==&(LinOpD._Mat)); + + precisionChange(FermOpF.Umu, FermOpD.Umu); + + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// + MixedPrecisionConjugateGradient MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); + MPCG.InnerTolerance = InnerTolerance; + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < seeds4({1,2,3,5}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGridD); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGridD); RNG4.SeedFixedIntegers(seeds4); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + LatticeGaugeFieldD Ud(UGridD); + SU::HotConfiguration(RNG4,Ud); + + LatticeGaugeFieldF Uf(UGridF); + precisionChange(Uf, Ud); + + RealD b = 2.5; + RealD c = 1.5; + RealD mf = 0.01; + RealD mb = 1.0; + RealD M5 = 1.8; + FermionActionD::ImplParams params; + params.twists[0] = 1; //GPBC in X + params.twists[Nd-1] = 1; //APRD in T + + std::vector gtwists(4,0); + gtwists[0] = 1; + + ConjugateGimplD::setDirections(gtwists); + + FermionActionD LopD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, mf, mf, mb, 0.0, -1, M5, b, c, params); + FermionActionD RopD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, mb, mf, mb, -1.0, 1, M5, b, c, params); + + FermionActionF LopF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, mf, mf, mb, 0.0, -1, M5, b, c, params); + FermionActionF RopF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, mb, mf, mb, -1.0, 1, M5, b, c, params); + + + OneFlavourRationalParams OFRp(0.95, 100.0, 5000, 1.0e-12, 12); + ConjugateGradient CG(1.0e-10, 10000); + + + typedef SchurDiagMooeeOperator EOFAschuropD; + typedef SchurDiagMooeeOperator EOFAschuropF; + + EOFAschuropD linopL_D(LopD); + EOFAschuropD linopR_D(RopD); + + EOFAschuropF linopL_F(LopF); + EOFAschuropF linopR_F(RopF); + + typedef MixedPrecisionConjugateGradientOperatorFunction EOFA_mxCG; + + EOFA_mxCG MCG_L(1e-10, 10000, 1000, UGridF, FrbGridF, LopF, LopD, linopL_F, linopL_D); + MCG_L.InnerTolerance = 1e-5; + + EOFA_mxCG MCG_R(1e-10, 10000, 1000, UGridF, FrbGridF, RopF, RopD, linopR_F, linopR_D); + MCG_R.InnerTolerance = 1e-5; + + ExactOneFlavourRatioPseudoFermionAction MeofaD(LopD, RopD, CG, CG, CG, CG, CG, OFRp, true); + ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction MeofaMx(LopF, RopF, LopD, RopD, MCG_L, MCG_R, MCG_L, MCG_R, MCG_L, MCG_R, OFRp, true); + + FermionFieldD eta(FGridD); + gaussian(RNG5, eta); + + MeofaD.refresh(Ud, eta); + MeofaMx.refresh(Ud, eta); + + FermionFieldD diff_phi(FGridD); + diff_phi = MeofaD.getPhi() - MeofaMx.getPhi(); + + RealD n = norm2(diff_phi); + + std::cout << GridLogMessage << "Phi(double)=" << norm2(MeofaD.getPhi()) << " Phi(mixed)=" << norm2(MeofaMx.getPhi()) << " diff=" << n << std::endl; + + assert(n < 1e-8); + + RealD Sd = MeofaD.S(Ud); + RealD Smx = MeofaMx.S(Ud); + + std::cout << GridLogMessage << "Initial action double=" << Sd << " mixed=" << Smx << " diff=" << Sd-Smx << std::endl; + + assert(fabs(Sd-Smx) < 1e-6); + + SU::HotConfiguration(RNG4,Ud); + precisionChange(Uf, Ud); + + Sd = MeofaD.S(Ud); + Smx = MeofaMx.S(Ud); + + std::cout << GridLogMessage << "After randomizing U, action double=" << Sd << " mixed=" << Smx << " diff=" << Sd-Smx << std::endl; + + assert(fabs(Sd-Smx) < 1e-6); + + std::cout << GridLogMessage << "Done" << std::endl; + Grid_finalize(); +} diff --git a/tests/forces/Test_mobius_gpforce_eofa.cc b/tests/forces/Test_mobius_gpforce_eofa.cc index 7f114615..dd71b565 100644 --- a/tests/forces/Test_mobius_gpforce_eofa.cc +++ b/tests/forces/Test_mobius_gpforce_eofa.cc @@ -34,7 +34,7 @@ using namespace Grid; ; typedef GparityWilsonImplR FermionImplPolicy; -typedef GparityMobiusEOFAFermionR FermionAction; +typedef GparityMobiusEOFAFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; int main (int argc, char** argv) diff --git a/tests/forces/Test_partfrac_force.cc b/tests/forces/Test_partfrac_force.cc index 33f7b5fd..173f7626 100644 --- a/tests/forces/Test_partfrac_force.cc +++ b/tests/forces/Test_partfrac_force.cc @@ -69,7 +69,7 @@ int main (int argc, char ** argv) //////////////////////////////////// RealD mass=0.01; RealD M5=1.8; - OverlapWilsonPartialFractionTanhFermionR Dpf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + OverlapWilsonPartialFractionTanhFermionD Dpf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); Dpf.M (phi,Mphi); ComplexD S = innerProduct(Mphi,Mphi); // pdag MdagM p diff --git a/tests/forces/Test_wilson_force.cc b/tests/forces/Test_wilson_force.cc index b7bf1268..f4bf8ed3 100644 --- a/tests/forces/Test_wilson_force.cc +++ b/tests/forces/Test_wilson_force.cc @@ -67,7 +67,7 @@ int main (int argc, char ** argv) // Unmodified matrix element //////////////////////////////////// RealD mass=-4.0; //kills the diagonal term - WilsonFermionR Dw (U, Grid,RBGrid,mass); + WilsonFermionD Dw (U, Grid,RBGrid,mass); Dw.M (phi,Mphi); ComplexD S = innerProduct(Mphi,Mphi); // pdag MdagM p diff --git a/tests/forces/Test_wilsonclover_force.cc b/tests/forces/Test_wilsonclover_force.cc index 6a28e4e2..8aa5eb9d 100644 --- a/tests/forces/Test_wilsonclover_force.cc +++ b/tests/forces/Test_wilsonclover_force.cc @@ -70,7 +70,7 @@ int main(int argc, char **argv) //////////////////////////////////// RealD mass = 0.1; Real csw = 1.0; - WilsonCloverFermionR Dw(U, Grid, RBGrid, mass, csw, csw); + WilsonCloverFermionD Dw(U, Grid, RBGrid, mass, csw, csw); Dw.ImportGauge(U); Dw.M(phi, Mphi); ComplexD S = innerProduct(Mphi, Mphi); // Action : pdag MdagM p diff --git a/tests/forces/Test_zmobius_force.cc b/tests/forces/Test_zmobius_force.cc index 89673bc7..5d3a86f4 100644 --- a/tests/forces/Test_zmobius_force.cc +++ b/tests/forces/Test_zmobius_force.cc @@ -81,7 +81,7 @@ int main (int argc, char ** argv) omegas.push_back( std::complex(0.0686324988446592,0.0550658530827402) ); omegas.push_back( std::complex(0.0686324988446592,-0.0550658530827402) ); - ZMobiusFermionR Ddwf(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,b,c); + ZMobiusFermionD Ddwf(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,b,c); Ddwf.M (phi,Mphi); diff --git a/tests/hmc/Test_action_dwf_gparity2fvs1f.cc b/tests/hmc/Test_action_dwf_gparity2fvs1f.cc new file mode 100644 index 00000000..46f87d93 --- /dev/null +++ b/tests/hmc/Test_action_dwf_gparity2fvs1f.cc @@ -0,0 +1,257 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: tests/hmc/Test_action_dwf_gparity2fvs1f.cc + + Copyright (C) 2015 + + Author: Christopher Kelly + Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace Grid; + + + +template +void copy2fTo1fFermionField(FermionField1f &out, const FermionField2f &in, int gpdir){ + auto f0_halfgrid = PeekIndex(in,0); //on 2f Grid + FermionField1f f0_fullgrid_dbl(out.Grid()); + Replicate(f0_halfgrid, f0_fullgrid_dbl); //double it up to live on the 1f Grid + + auto f1_halfgrid = PeekIndex(in,1); + FermionField1f f1_fullgrid_dbl(out.Grid()); + Replicate(f1_halfgrid, f1_fullgrid_dbl); + + const Coordinate &dim_2f = in.Grid()->GlobalDimensions(); + const Coordinate &dim_1f = out.Grid()->GlobalDimensions(); + + //We have to be careful for 5d fields; the s-direction is placed before the x,y,z,t and so we need to shift gpdir by 1 + std::cout << "gpdir " << gpdir << std::endl; + + gpdir+=1; + std::cout << "gpdir for 5D fields " << gpdir << std::endl; + + std::cout << "dim_2f " << dim_2f << std::endl; + std::cout << "dim_1f " << dim_1f << std::endl; + + assert(dim_1f[gpdir] == 2*dim_2f[gpdir]); + + LatticeInteger xcoor_1f(out.Grid()); //5d lattice integer + LatticeCoordinate(xcoor_1f,gpdir); + + Integer L = dim_2f[gpdir]; + + out = where(xcoor_1f < L, f0_fullgrid_dbl, f1_fullgrid_dbl); +} + +//Both have the same field type +void copy2fTo1fGaugeField(LatticeGaugeField &out, const LatticeGaugeField &in, int gpdir){ + LatticeGaugeField U_dbl(out.Grid()); + Replicate(in, U_dbl); + + LatticeGaugeField Uconj_dbl = conjugate( U_dbl ); + + const Coordinate &dim_2f = in.Grid()->GlobalDimensions(); + + LatticeInteger xcoor_1f(out.Grid()); + LatticeCoordinate(xcoor_1f,gpdir); + + Integer L = dim_2f[gpdir]; + + out = where(xcoor_1f < L, U_dbl, Uconj_dbl); +} + + +std::ostream & operator<<(std::ostream &os, const Coordinate &x){ + os << "("; + for(int i=0;i seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5_2f(FGrid_2f); RNG5_2f.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4); + + std::cout << "Generating hot 2f gauge configuration" << std::endl; + LatticeGaugeField Umu_2f(UGrid_2f); + SU::HotConfiguration(RNG4_2f,Umu_2f); + + std::cout << "Copying 2f->1f gauge field" << std::endl; + LatticeGaugeField Umu_1f(UGrid_1f); + copy2fTo1fGaugeField(Umu_1f, Umu_2f, mu); + + typedef GparityWilsonImplR FermionImplPolicy2f; + typedef GparityDomainWallFermionD FermionAction2f; + typedef typename FermionAction2f::FermionField FermionField2f; + + typedef WilsonImplR FermionImplPolicy1f; + typedef DomainWallFermionD FermionAction1f; + typedef typename FermionAction1f::FermionField FermionField1f; + + std::cout << "Generating eta 2f" << std::endl; + FermionField2f eta_2f(FGrid_2f); + gaussian(RNG5_2f, eta_2f); + + RealD scale = std::sqrt(0.5); + eta_2f=eta_2f*scale; + + std::cout << "Copying 2f->1f eta" << std::endl; + FermionField1f eta_1f(FGrid_1f); + copy2fTo1fFermionField(eta_1f, eta_2f, mu); + + Real beta = 2.13; + Real light_mass = 0.01; + Real strange_mass = 0.032; + Real pv_mass = 1.0; + RealD M5 = 1.8; + + //Setup the Dirac operators + std::cout << "Initializing Dirac operators" << std::endl; + + FermionAction2f::ImplParams Params_2f; + Params_2f.twists[mu] = 1; + Params_2f.twists[Nd-1] = 1; //APBC in time direction + + //note 'Num' and 'Den' here refer to the determinant ratio, not the operator ratio in the pseudofermion action where the two are inverted + //to my mind the Pauli Villars and 'denominator' are synonymous but the Grid convention has this as the 'Numerator' operator in the RHMC implementation + FermionAction2f NumOp_2f(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f, *UrbGrid_2f, light_mass,M5,Params_2f); + FermionAction2f DenOp_2f(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f, *UrbGrid_2f, pv_mass, M5,Params_2f); + + FermionAction1f::ImplParams Params_1f; + Params_1f.boundary_phases[mu] = -1; //antiperiodic in doubled lattice in GP direction + Params_1f.boundary_phases[Nd-1] = -1; + + FermionAction1f NumOp_1f(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f, *UrbGrid_1f, light_mass,M5,Params_1f); + FermionAction1f DenOp_1f(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f, *UrbGrid_1f, pv_mass, M5,Params_1f); + + //Test the replication routines by running a CG on eta + double StoppingCondition = 1e-10; + double MaxCGIterations = 30000; + ConjugateGradient CG_2f(StoppingCondition,MaxCGIterations); + ConjugateGradient CG_1f(StoppingCondition,MaxCGIterations); + + NumOp_1f.ImportGauge(Umu_1f); + NumOp_2f.ImportGauge(Umu_2f); + + FermionField1f test_1f(FGrid_1f); + FermionField2f test_2f(FGrid_2f); + + MdagMLinearOperator Linop_1f(NumOp_1f); + MdagMLinearOperator Linop_2f(NumOp_2f); + + CG_1f(Linop_1f, eta_1f, test_1f); + CG_2f(Linop_2f, eta_2f, test_2f); + RealD test_1f_norm = norm2(test_1f); + RealD test_2f_norm = norm2(test_2f); + + std::cout << "Verification of replication routines: " << test_1f_norm << " " << test_2f_norm << " " << test_1f_norm - test_2f_norm << std::endl; + + +#if 1 + typedef GeneralEvenOddRatioRationalPseudoFermionAction Action2f; + typedef GeneralEvenOddRatioRationalPseudoFermionAction Action1f; + + RationalActionParams rational_params; + rational_params.inv_pow = 2; + rational_params.lo = 1e-5; + rational_params.hi = 32; + rational_params.md_degree = 16; + rational_params.action_degree = 16; + + Action2f action_2f(DenOp_2f, NumOp_2f, rational_params); + Action1f action_1f(DenOp_1f, NumOp_1f, rational_params); +#else + typedef TwoFlavourEvenOddRatioPseudoFermionAction Action2f; + typedef TwoFlavourEvenOddRatioPseudoFermionAction Action1f; + + Action2f action_2f(DenOp_2f, NumOp_2f, CG_2f, CG_2f); + Action1f action_1f(DenOp_1f, NumOp_1f, CG_1f, CG_1f); +#endif + + + std::cout << "Action refresh" << std::endl; + action_2f.refresh(Umu_2f, eta_2f); + action_1f.refresh(Umu_1f, eta_1f); + + std::cout << "Action compute post heatbath" << std::endl; + RealD S_2f = action_2f.S(Umu_2f); + RealD S_1f = action_1f.S(Umu_1f); + + std::cout << "Action comparison post heatbath" << std::endl; + std::cout << S_2f << " " << S_1f << " " << S_2f-S_1f << std::endl; + + //Change the gauge field between refresh and action eval else the matrix and inverse matrices all cancel and we just get |eta|^2 + SU::HotConfiguration(RNG4_2f,Umu_2f); + copy2fTo1fGaugeField(Umu_1f, Umu_2f, mu); + + //Now compute the action with the new gauge field + std::cout << "Action compute post gauge field update" << std::endl; + S_2f = action_2f.S(Umu_2f); + S_1f = action_1f.S(Umu_1f); + + std::cout << "Action comparison post gauge field update" << std::endl; + std::cout << S_2f << " " << S_1f << " " << S_2f-S_1f << std::endl; + + Grid_finalize(); +} // main + + diff --git a/tests/hmc/Test_hmc_EODWFRatio.cc b/tests/hmc/Test_hmc_EODWFRatio.cc index 93469ffe..ff8521cb 100644 --- a/tests/hmc/Test_hmc_EODWFRatio.cc +++ b/tests/hmc/Test_hmc_EODWFRatio.cc @@ -43,7 +43,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef DomainWallFermionR FermionAction; + typedef DomainWallFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; @@ -136,16 +136,9 @@ int main(int argc, char **argv) { TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file // Reset performance counters - NumOp.ZeroCounters(); - DenOp.ZeroCounters(); TheHMC.Run(); // no smearing // TheHMC.Run(SmearingPolicy); // for smearing - std::cout << GridLogMessage << "Numerator report, Pauli-Villars term : " << std::endl; - NumOp.Report(); - std::cout << GridLogMessage << "Denominator report, Dw(m) term (includes CG) : " << std::endl; - DenOp.Report(); - Grid_finalize(); } // main diff --git a/tests/hmc/Test_hmc_EODWFRatio_Gparity.cc b/tests/hmc/Test_hmc_EODWFRatio_Gparity.cc index 9ca0b0a0..f98d0edc 100644 --- a/tests/hmc/Test_hmc_EODWFRatio_Gparity.cc +++ b/tests/hmc/Test_hmc_EODWFRatio_Gparity.cc @@ -42,7 +42,7 @@ int main(int argc, char **argv) { typedef ConjugateHMCRunner HMCWrapper; // Uses the default minimum norm typedef GparityWilsonImplR FermionImplPolicy; - typedef GparityDomainWallFermionR FermionAction; + typedef GparityDomainWallFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; @@ -132,15 +132,9 @@ int main(int argc, char **argv) { TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file // Reset performance counters - NumOp.ZeroCounters(); - DenOp.ZeroCounters(); TheHMC.Run(); // no smearing // TheHMC.Run(SmearingPolicy); // for smearing - std::cout << GridLogMessage << "Numerator report, Pauli-Villars term : " << std::endl; - NumOp.Report(); - std::cout << GridLogMessage << "Denominator report, Dw(m) term (includes CG) : " << std::endl; - DenOp.Report(); Grid_finalize(); diff --git a/tests/hmc/Test_hmc_EOMobiusRatio.cc b/tests/hmc/Test_hmc_EOMobiusRatio.cc index 63b4d4fa..0e0a6611 100644 --- a/tests/hmc/Test_hmc_EOMobiusRatio.cc +++ b/tests/hmc/Test_hmc_EOMobiusRatio.cc @@ -83,7 +83,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef MobiusFermionR FermionAction; + typedef MobiusFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; // Serialiser typedef Grid::XmlReader Serialiser; @@ -211,8 +211,6 @@ int main(int argc, char **argv) { */ // Reset performance counters - NumOp.ZeroCounters(); - DenOp.ZeroCounters(); if (ApplySmearing){ SmearingParameters SmPar(Reader); @@ -225,11 +223,6 @@ int main(int argc, char **argv) { TheHMC.Run(); // no smearing } - std::cout << GridLogMessage << "Numerator report, Pauli-Villars term : " << std::endl; - NumOp.Report(); - std::cout << GridLogMessage << "Denominator report, Dw(m) term (includes CG) : " << std::endl; - DenOp.Report(); - Grid_finalize(); } // main diff --git a/tests/hmc/Test_hmc_EOMobiusRatioManyFlavour.cc b/tests/hmc/Test_hmc_EOMobiusRatioManyFlavour.cc index 790433f2..3f29ae62 100644 --- a/tests/hmc/Test_hmc_EOMobiusRatioManyFlavour.cc +++ b/tests/hmc/Test_hmc_EOMobiusRatioManyFlavour.cc @@ -89,7 +89,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef MobiusFermionR FermionAction; + typedef MobiusFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; // Serialiser typedef Grid::XmlReader Serialiser; @@ -226,8 +226,6 @@ int main(int argc, char **argv) { */ // Reset performance counters - NumOp.ZeroCounters(); - DenOp.ZeroCounters(); if (ApplySmearing){ SmearingParameters SmPar(Reader); @@ -240,10 +238,6 @@ int main(int argc, char **argv) { TheHMC.Run(); // no smearing } - std::cout << GridLogMessage << "Numerator report, Pauli-Villars term : " << std::endl; - NumOp.Report(); - std::cout << GridLogMessage << "Denominator report, Dw(m) term (includes CG) : " << std::endl; - DenOp.Report(); Grid_finalize(); } // main diff --git a/tests/hmc/Test_hmc_EOWilsonCloverFermionGauge.cc b/tests/hmc/Test_hmc_EOWilsonCloverFermionGauge.cc index 6b9b70b5..f6485f20 100644 --- a/tests/hmc/Test_hmc_EOWilsonCloverFermionGauge.cc +++ b/tests/hmc/Test_hmc_EOWilsonCloverFermionGauge.cc @@ -39,7 +39,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonCloverFermionR FermionAction; + typedef WilsonCloverFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; diff --git a/tests/hmc/Test_hmc_EOWilsonFermionGauge.cc b/tests/hmc/Test_hmc_EOWilsonFermionGauge.cc index 3643c0ad..092e66d1 100644 --- a/tests/hmc/Test_hmc_EOWilsonFermionGauge.cc +++ b/tests/hmc/Test_hmc_EOWilsonFermionGauge.cc @@ -40,7 +40,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonFermionR FermionAction; + typedef WilsonFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; diff --git a/tests/hmc/Test_hmc_EOWilsonRatio.cc b/tests/hmc/Test_hmc_EOWilsonRatio.cc index 675bc605..406aa34d 100644 --- a/tests/hmc/Test_hmc_EOWilsonRatio.cc +++ b/tests/hmc/Test_hmc_EOWilsonRatio.cc @@ -42,7 +42,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonFermionR FermionAction; + typedef WilsonFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; diff --git a/tests/hmc/Test_hmc_GparityIwasakiGauge.cc b/tests/hmc/Test_hmc_GparityIwasakiGauge.cc index 7f74d5d8..d79404a0 100644 --- a/tests/hmc/Test_hmc_GparityIwasakiGauge.cc +++ b/tests/hmc/Test_hmc_GparityIwasakiGauge.cc @@ -41,7 +41,7 @@ int main(int argc, char **argv) { typedef ConjugateHMCRunner HMCWrapper; // Uses the default minimum norm typedef GparityWilsonImplR FermionImplPolicy; - typedef GparityDomainWallFermionR FermionAction; + typedef GparityDomainWallFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; @@ -58,7 +58,7 @@ int main(int argc, char **argv) { CheckpointerParameters CPparams; CPparams.config_prefix = "ckpoint_EODWF_lat"; CPparams.rng_prefix = "ckpoint_EODWF_rng"; - CPparams.saveInterval = 5; + CPparams.saveInterval = 1; CPparams.format = "IEEE64BIG"; TheHMC.Resources.LoadNerscCheckpointer(CPparams); @@ -79,7 +79,7 @@ int main(int argc, char **argv) { // that have a complex construction // standard RealD beta = 2.6 ; - const int nu = 3; + const int nu = 1; std::vector twists(Nd,0); twists[nu] = 1; ConjugateGimplD::setDirections(twists); diff --git a/tests/hmc/Test_hmc_GparityWilsonGauge.cc b/tests/hmc/Test_hmc_GparityWilsonGauge.cc index b8c078fe..76901235 100644 --- a/tests/hmc/Test_hmc_GparityWilsonGauge.cc +++ b/tests/hmc/Test_hmc_GparityWilsonGauge.cc @@ -42,7 +42,7 @@ int main(int argc, char **argv) { typedef ConjugateHMCRunner HMCWrapper; // Uses the default minimum norm typedef GparityWilsonImplR FermionImplPolicy; - typedef GparityDomainWallFermionR FermionAction; + typedef GparityDomainWallFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; diff --git a/tests/hmc/Test_hmc_Mobius2p1f.cc b/tests/hmc/Test_hmc_Mobius2p1f.cc index 508f5b5e..8c97fbb5 100644 --- a/tests/hmc/Test_hmc_Mobius2p1f.cc +++ b/tests/hmc/Test_hmc_Mobius2p1f.cc @@ -39,7 +39,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef WilsonImplR FermionImplPolicy; - typedef MobiusFermionR FermionAction; + typedef MobiusFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; typedef Grid::XmlReader Serialiser; @@ -148,14 +148,14 @@ int main(int argc, char **argv) { // Level1.push_back(&StrangePseudoFermion); // DJM: setup for EOFA ratio (Shamir) - // DomainWallEOFAFermionR Strange_Op_L(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, strange_mass, strange_mass, pv_mass, 0.0, -1, M5); - // DomainWallEOFAFermionR Strange_Op_R(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, pv_mass, strange_mass, pv_mass, -1.0, 1, M5); + // DomainWallEOFAFermionD Strange_Op_L(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, strange_mass, strange_mass, pv_mass, 0.0, -1, M5); + // DomainWallEOFAFermionD Strange_Op_R(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, pv_mass, strange_mass, pv_mass, -1.0, 1, M5); // ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L, Strange_Op_R, CG, OFRp, true); // Level1.push_back(&EOFA); // DJM: setup for EOFA ratio (Mobius) - MobiusEOFAFermionR Strange_Op_L(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); - MobiusEOFAFermionR Strange_Op_R(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + MobiusEOFAFermionD Strange_Op_L(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionD Strange_Op_R(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L, Strange_Op_R, CG, OFRp, true); Level1.push_back(&EOFA); diff --git a/tests/hmc/Test_hmc_ScalarActionNxN.cc b/tests/hmc/Test_hmc_ScalarActionNxN.cc index 726ecd4a..119a39dc 100644 --- a/tests/hmc/Test_hmc_ScalarActionNxN.cc +++ b/tests/hmc/Test_hmc_ScalarActionNxN.cc @@ -34,7 +34,7 @@ class ScalarActionParameters : Serializable { double, lambda, double, g); - ScalarActionParameters() = default; + ScalarActionParameters() {}; template ScalarActionParameters(Reader& Reader){ @@ -45,7 +45,6 @@ class ScalarActionParameters : Serializable { } using namespace Grid; - ; template class MagMeas : public HmcObservable { diff --git a/tests/hmc/Test_hmc_WC2ASFG_Production.cc b/tests/hmc/Test_hmc_WC2ASFG_Production.cc index 0bbf4ece..90f43ede 100644 --- a/tests/hmc/Test_hmc_WC2ASFG_Production.cc +++ b/tests/hmc/Test_hmc_WC2ASFG_Production.cc @@ -28,7 +28,7 @@ directory /* END LEGAL */ #include - +#ifdef ENABLE_FERMION_REPS namespace Grid{ struct FermionParameters: Serializable { GRID_SERIALIZABLE_CLASS_MEMBERS(FermionParameters, @@ -80,7 +80,7 @@ int main(int argc, char **argv) // Typedefs to simplify notation typedef GenericHMCRunnerHirep HMCWrapper; // Uses the default minimum norm typedef WilsonTwoIndexAntiSymmetricImplR FermionImplPolicy; // gauge field implemetation for the pseudofermions - typedef WilsonCloverTwoIndexAntiSymmetricFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...) + typedef WilsonCloverTwoIndexAntiSymmetricFermionD FermionAction; // type of lattice fermions (Wilson, DW, ...) typedef typename FermionAction::FermionField FermionField; //typedef Grid::JSONReader Serialiser; typedef Grid::XmlReader Serialiser; @@ -210,4 +210,6 @@ int main(int argc, char **argv) Grid_finalize(); } // main - +#else +int main(int argc, char **argv){} +#endif diff --git a/tests/hmc/Test_hmc_WC2SFG_Production.cc b/tests/hmc/Test_hmc_WC2SFG_Production.cc index 64a3f1cb..16ca05a0 100644 --- a/tests/hmc/Test_hmc_WC2SFG_Production.cc +++ b/tests/hmc/Test_hmc_WC2SFG_Production.cc @@ -29,6 +29,7 @@ directory #include +#ifdef ENABLE_FERMION_REPS namespace Grid{ struct FermionParameters: Serializable { GRID_SERIALIZABLE_CLASS_MEMBERS(FermionParameters, @@ -81,7 +82,7 @@ int main(int argc, char **argv) // Typedefs to simplify notation typedef GenericHMCRunnerHirep HMCWrapper; // Uses the default minimum norm typedef WilsonTwoIndexSymmetricImplR FermionImplPolicy; // gauge field implemetation for the pseudofermions - typedef WilsonCloverTwoIndexSymmetricFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...) + typedef WilsonCloverTwoIndexSymmetricFermionD FermionAction; // type of lattice fermions (Wilson, DW, ...) typedef typename FermionAction::FermionField FermionField; //typedef Grid::JSONReader Serialiser; typedef Grid::XmlReader Serialiser; @@ -211,3 +212,6 @@ int main(int argc, char **argv) Grid_finalize(); } // main +#else +int main(int argc, char **argv){} +#endif diff --git a/tests/hmc/Test_hmc_WCFG_Production.cc b/tests/hmc/Test_hmc_WCFG_Production.cc index cebe3791..bd02886d 100644 --- a/tests/hmc/Test_hmc_WCFG_Production.cc +++ b/tests/hmc/Test_hmc_WCFG_Production.cc @@ -79,7 +79,7 @@ int main(int argc, char **argv) // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonCloverFermionR FermionAction; + typedef WilsonCloverFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; typedef Grid::XmlReader Serialiser; diff --git a/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc b/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc index 211900be..3be63a15 100644 --- a/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc +++ b/tests/hmc/Test_hmc_WCMixedRepFG_Production.cc @@ -32,6 +32,7 @@ directory #include "Grid/Grid.h" +#ifdef ENABLE_FERMION_REPS namespace Grid{ struct FermionParameters: Serializable { GRID_SERIALIZABLE_CLASS_MEMBERS(FermionParameters, @@ -84,11 +85,11 @@ int main(int argc, char **argv) { typedef GenericHMCRunnerHirep HMCWrapper; typedef WilsonImplR FundImplPolicy; - typedef WilsonCloverFermionR FundFermionAction; + typedef WilsonCloverFermionD FundFermionAction; typedef typename FundFermionAction::FermionField FundFermionField; typedef WilsonTwoIndexAntiSymmetricImplR ASymmImplPolicy; - typedef WilsonCloverTwoIndexAntiSymmetricFermionR ASymmFermionAction; + typedef WilsonCloverTwoIndexAntiSymmetricFermionD ASymmFermionAction; typedef typename ASymmFermionAction::FermionField ASymmFermionField; typedef Grid::XmlReader Serialiser; @@ -222,3 +223,6 @@ int main(int argc, char **argv) { Grid_finalize(); } // main +#else +int main(int argc, char **argv){} +#endif diff --git a/tests/hmc/Test_hmc_WCadjFG_Production.cc b/tests/hmc/Test_hmc_WCadjFG_Production.cc index 5cf4bac1..f0e2742d 100644 --- a/tests/hmc/Test_hmc_WCadjFG_Production.cc +++ b/tests/hmc/Test_hmc_WCadjFG_Production.cc @@ -29,6 +29,7 @@ directory #include +#ifdef ENABLE_FERMION_REPS namespace Grid{ struct FermionParameters: Serializable { GRID_SERIALIZABLE_CLASS_MEMBERS(FermionParameters, @@ -81,7 +82,7 @@ int main(int argc, char **argv) // Typedefs to simplify notation typedef GenericHMCRunnerHirep HMCWrapper; // Uses the default minimum norm typedef WilsonAdjImplR FermionImplPolicy; // gauge field implemetation for the pseudofermions - typedef WilsonCloverAdjFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...) + typedef WilsonCloverAdjFermionD FermionAction; // type of lattice fermions (Wilson, DW, ...) typedef typename FermionAction::FermionField FermionField; typedef Grid::XmlReader Serialiser; @@ -211,3 +212,6 @@ int main(int argc, char **argv) } // main +#else +int main(int argc, char **argv){} +#endif diff --git a/tests/hmc/Test_hmc_WilsonAdjointFermionGauge.cc b/tests/hmc/Test_hmc_WilsonAdjointFermionGauge.cc index cc56cae3..3d601d25 100644 --- a/tests/hmc/Test_hmc_WilsonAdjointFermionGauge.cc +++ b/tests/hmc/Test_hmc_WilsonAdjointFermionGauge.cc @@ -31,9 +31,10 @@ directory /* END LEGAL */ #include "Grid/Grid.h" +#ifdef ENABLE_FERMION_REPS + int main(int argc, char **argv) { using namespace Grid; - ; // Here change the allowed (higher) representations typedef Representations< FundamentalRepresentation, AdjointRepresentation > TheRepresentations; @@ -46,7 +47,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunnerHirep HMCWrapper; typedef WilsonAdjImplR FermionImplPolicy; // gauge field implemetation for the pseudofermions - typedef WilsonAdjFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...) + typedef WilsonAdjFermionD FermionAction; // type of lattice fermions (Wilson, DW, ...) typedef typename FermionAction::FermionField FermionField; //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: @@ -127,3 +128,6 @@ int main(int argc, char **argv) { } // main +#else +int main(int argc, char **argv){} +#endif diff --git a/tests/hmc/Test_hmc_WilsonCloverFermionGauge.cc b/tests/hmc/Test_hmc_WilsonCloverFermionGauge.cc index 9e27e3ec..149e6c5c 100644 --- a/tests/hmc/Test_hmc_WilsonCloverFermionGauge.cc +++ b/tests/hmc/Test_hmc_WilsonCloverFermionGauge.cc @@ -41,7 +41,7 @@ int main(int argc, char **argv) // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonCloverFermionR FermionAction; + typedef WilsonCloverFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: diff --git a/tests/hmc/Test_hmc_WilsonFermionGauge.cc b/tests/hmc/Test_hmc_WilsonFermionGauge.cc index cc1f2474..a0c43c51 100644 --- a/tests/hmc/Test_hmc_WilsonFermionGauge.cc +++ b/tests/hmc/Test_hmc_WilsonFermionGauge.cc @@ -42,7 +42,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonFermionR FermionAction; + typedef WilsonFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; diff --git a/tests/hmc/Test_hmc_WilsonGauge.cc b/tests/hmc/Test_hmc_WilsonGauge.cc index 4ef0e658..28feadf3 100644 --- a/tests/hmc/Test_hmc_WilsonGauge.cc +++ b/tests/hmc/Test_hmc_WilsonGauge.cc @@ -69,8 +69,10 @@ int main(int argc, char **argv) TopologyObsParameters TopParams; TopParams.interval = 5; TopParams.do_smearing = true; - TopParams.Smearing.steps = 200; - TopParams.Smearing.step_size = 0.01; + TopParams.Smearing.init_step_size = 0.01; + TopParams.Smearing.tolerance = 1e-5; + // TopParams.Smearing.steps = 200; + // TopParams.Smearing.step_size = 0.01; TopParams.Smearing.meas_interval = 50; TopParams.Smearing.maxTau = 2.0; TheHMC.Resources.AddObservable(TopParams); diff --git a/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc b/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc index 3b8cdda6..66a325f2 100644 --- a/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc +++ b/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc @@ -33,6 +33,7 @@ directory +#ifdef ENABLE_FERMION_REPS int main(int argc, char **argv) { #ifndef GRID_CUDA @@ -51,9 +52,9 @@ int main(int argc, char **argv) { typedef GenericHMCRunnerHirep HMCWrapper; typedef WilsonAdjImplR AdjImplPolicy; // gauge field implemetation for the pseudofermions - typedef WilsonAdjFermionR AdjFermionAction; // type of lattice fermions (Wilson, DW, ...) + typedef WilsonAdjFermionD AdjFermionAction; // type of lattice fermions (Wilson, DW, ...) typedef WilsonTwoIndexSymmetricImplR SymmImplPolicy; - typedef WilsonTwoIndexSymmetricFermionR SymmFermionAction; + typedef WilsonTwoIndexSymmetricFermionD SymmFermionAction; typedef typename AdjFermionAction::FermionField AdjFermionField; @@ -138,3 +139,6 @@ int main(int argc, char **argv) { } // main +#else +int main(int argc, char **argv){} +#endif diff --git a/tests/hmc/Test_hmc_WilsonRatio.cc b/tests/hmc/Test_hmc_WilsonRatio.cc index 3e3cac7e..e134dd83 100644 --- a/tests/hmc/Test_hmc_WilsonRatio.cc +++ b/tests/hmc/Test_hmc_WilsonRatio.cc @@ -41,7 +41,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonFermionR FermionAction; + typedef WilsonFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; diff --git a/tests/hmc/Test_hmc_WilsonTMFermionGauge.cc b/tests/hmc/Test_hmc_WilsonTMFermionGauge.cc index 5928efbe..9916580e 100644 --- a/tests/hmc/Test_hmc_WilsonTMFermionGauge.cc +++ b/tests/hmc/Test_hmc_WilsonTMFermionGauge.cc @@ -42,7 +42,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonTMFermionR FermionAction; + typedef WilsonTMFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; diff --git a/tests/hmc/Test_hmc_WilsonTwoIndexSymmetricFermionGauge.cc b/tests/hmc/Test_hmc_WilsonTwoIndexSymmetricFermionGauge.cc index 387842f7..4769e396 100644 --- a/tests/hmc/Test_hmc_WilsonTwoIndexSymmetricFermionGauge.cc +++ b/tests/hmc/Test_hmc_WilsonTwoIndexSymmetricFermionGauge.cc @@ -29,6 +29,7 @@ directory /* END LEGAL */ #include "Grid/Grid.h" +#ifdef ENABLE_FERMION_REPS int main(int argc, char **argv) { using namespace Grid; ; @@ -45,7 +46,7 @@ int main(int argc, char **argv) { typedef GenericHMCRunnerHirep HMCWrapper; typedef WilsonTwoIndexSymmetricImplR FermionImplPolicy; // gauge field implemetation for the pseudofermions - typedef WilsonTwoIndexSymmetricFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...) + typedef WilsonTwoIndexSymmetricFermionD FermionAction; // type of lattice fermions (Wilson, DW, ...) typedef typename FermionAction::FermionField FermionField; //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: @@ -127,3 +128,6 @@ int main(int argc, char **argv) { } // main +#else +int main(int argc, char **argv){} +#endif diff --git a/tests/hmc/Test_rhmc_EOWilson1p1.cc b/tests/hmc/Test_rhmc_EOWilson1p1.cc index 51a966b1..1e0975ca 100644 --- a/tests/hmc/Test_rhmc_EOWilson1p1.cc +++ b/tests/hmc/Test_rhmc_EOWilson1p1.cc @@ -41,7 +41,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonFermionR FermionAction; + typedef WilsonFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; diff --git a/tests/hmc/Test_rhmc_EOWilsonRatio.cc b/tests/hmc/Test_rhmc_EOWilsonRatio.cc index 44fb6d47..06d54215 100644 --- a/tests/hmc/Test_rhmc_EOWilsonRatio.cc +++ b/tests/hmc/Test_rhmc_EOWilsonRatio.cc @@ -42,7 +42,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonFermionR FermionAction; + typedef WilsonFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; diff --git a/tests/hmc/Test_rhmc_Wilson1p1.cc b/tests/hmc/Test_rhmc_Wilson1p1.cc index 93b748d2..2935092c 100644 --- a/tests/hmc/Test_rhmc_Wilson1p1.cc +++ b/tests/hmc/Test_rhmc_Wilson1p1.cc @@ -42,7 +42,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonFermionR FermionAction; + typedef WilsonFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; diff --git a/tests/hmc/Test_rhmc_WilsonRatio.cc b/tests/hmc/Test_rhmc_WilsonRatio.cc index 4896d329..60bb7641 100644 --- a/tests/hmc/Test_rhmc_WilsonRatio.cc +++ b/tests/hmc/Test_rhmc_WilsonRatio.cc @@ -41,7 +41,7 @@ int main(int argc, char **argv) { // Typedefs to simplify notation typedef GenericHMCRunner HMCWrapper; // Uses the default minimum norm typedef WilsonImplR FermionImplPolicy; - typedef WilsonFermionR FermionAction; + typedef WilsonFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; diff --git a/tests/lanczos/Test_WCMultiRep_lanczos.cc b/tests/lanczos/Test_WCMultiRep_lanczos.cc index 58759c96..0bfc75be 100644 --- a/tests/lanczos/Test_WCMultiRep_lanczos.cc +++ b/tests/lanczos/Test_WCMultiRep_lanczos.cc @@ -28,19 +28,21 @@ directory /* END LEGAL */ #include +#ifdef ENABLE_FERMION_REPS + using namespace std; using namespace Grid; -//typedef WilsonCloverFermionR FermionOp; -//typedef typename WilsonFermionR::FermionField FermionField; +//typedef WilsonCloverFermionD FermionOp; +//typedef typename WilsonFermionD::FermionField FermionField; typedef WilsonImplR FundImplPolicy; -typedef WilsonCloverFermionR FundFermionAction; +typedef WilsonCloverFermionD FundFermionAction; typedef typename FundFermionAction::FermionField FundFermionField; typedef WilsonTwoIndexAntiSymmetricImplR ASymmImplPolicy; -typedef WilsonCloverTwoIndexAntiSymmetricFermionR ASymmFermionAction; +typedef WilsonCloverTwoIndexAntiSymmetricFermionD ASymmFermionAction; typedef typename ASymmFermionAction::FermionField ASymmFermionField; @@ -175,3 +177,6 @@ NerscHmcCheckpointer Checkpoint(CPparams); Grid_finalize(); } +#else +int main(int argc,char **argv){ return 0;}; +#endif diff --git a/tests/lanczos/Test_compressed_lanczos.cc b/tests/lanczos/Test_compressed_lanczos.cc index d7d0d52d..28df3f99 100644 --- a/tests/lanczos/Test_compressed_lanczos.cc +++ b/tests/lanczos/Test_compressed_lanczos.cc @@ -188,8 +188,8 @@ int main (int argc, char ** argv) { std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; // ZMobius EO Operator - ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); - SchurDiagTwoOperator HermOp(Ddwf); + ZMobiusFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); + SchurDiagTwoOperator HermOp(Ddwf); // Eigenvector storage LanczosParams fine =Params.FineParams; diff --git a/tests/lanczos/Test_compressed_lanczos_gparity.cc b/tests/lanczos/Test_compressed_lanczos_gparity.cc new file mode 100644 index 00000000..ca353d61 --- /dev/null +++ b/tests/lanczos/Test_compressed_lanczos_gparity.cc @@ -0,0 +1,485 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_compressed_lanczos_gparity.cc + + Copyright (C) 2017 + +Author: Christopher Kelly +Author: Leans heavily on Christoph Lehner's code +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +/* + * Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features + * in Grid that were intended to be used to support blocked Aggregates, from + */ +#include +#include +#include + +using namespace std; +using namespace Grid; + +//For the CPS configurations we have to manually seed the RNG and deal with an incorrect factor of 2 in the plaquette metadata +void readConfiguration(LatticeGaugeFieldD &U, + const std::string &config, + bool is_cps_cfg = false){ + + if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = false; + + typedef GaugeStatistics GaugeStats; + + FieldMetaData header; + NerscIO::readConfiguration(U, header, config); + + if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = true; +} + +//Lanczos parameters in CPS conventions +struct CPSLanczosParams : Serializable { +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(CPSLanczosParams, + RealD, alpha, + RealD, beta, + int, ch_ord, + int, N_use, + int, N_get, + int, N_true_get, + RealD, stop_rsd, + int, maxits); + + //Translations + ChebyParams getChebyParams() const{ + ChebyParams out; + out.alpha = beta*beta; //aka lo + out.beta = alpha*alpha; //aka hi + out.Npoly = ch_ord+1; + return out; + } + int Nstop() const{ return N_true_get; } + int Nm() const{ return N_use; } + int Nk() const{ return N_get; } +}; + +//Maybe this class should be in the main library? +template +class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos +{ +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) + // Base constructor + : LocalCoherenceLanczos(FineGrid,CoarseGrid,FineOp,checkerboard) + {}; + + void checkpointFine(std::string evecs_file,std::string evals_file) + { + assert(this->subspace.size()==nbasis); + emptyUserRecord record; + Grid::ScidacWriter WR(this->_FineGrid->IsBoss()); + WR.open(evecs_file); + for(int k=0;ksubspace[k],record); + } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_fine); + } + + void checkpointFineRestore(std::string evecs_file,std::string evals_file) + { + this->evals_fine.resize(nbasis); + this->subspace.resize(nbasis,this->_FineGrid); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<evals_fine); + + if(this->evals_fine.size() < nbasis) assert(0 && "Not enough fine evals to complete basis"); + if(this->evals_fine.size() > nbasis){ //allow the use of precomputed evecs with a larger #evecs + std::cout << GridLogMessage << "Truncating " << this->evals_fine.size() << " evals to basis size " << nbasis << std::endl; + this->evals_fine.resize(nbasis); + } + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<subspace[k].Checkerboard()=this->_checkerboard; + RD.readScidacFieldRecord(this->subspace[k],record); + + } + RD.close(); + } + + void checkpointCoarse(std::string evecs_file,std::string evals_file) + { + int n = this->evec_coarse.size(); + emptyUserRecord record; + Grid::ScidacWriter WR(this->_CoarseGrid->IsBoss()); + WR.open(evecs_file); + for(int k=0;kevec_coarse[k],record); + } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_coarse); + } + + void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) + { + std::cout << "resizing coarse vecs to " << nvec<< std::endl; + this->evals_coarse.resize(nvec); + this->evec_coarse.resize(nvec,this->_CoarseGrid); + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<evals_coarse); + + assert(this->evals_coarse.size()==nvec); + emptyUserRecord record; + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<evec_coarse[k],record); + } + RD.close(); + } +}; + +struct Options{ + std::vector blockSize; + std::vector GparityDirs; + int Ls; + RealD mass; + RealD M5; + RealD mobius_scale; + std::string config; + bool is_cps_cfg; + + double coarse_relax_tol; + int smoother_ord; + + CPSLanczosParams fine; + CPSLanczosParams coarse; + + bool write_fine = false; + std::string write_fine_file; + + bool read_fine = false; + std::string read_fine_file; + + bool write_coarse = false; + std::string write_coarse_file; + + bool read_coarse = false; + std::string read_coarse_file; + + + Options(){ + blockSize = std::vector ({2,2,2,2,2}); + GparityDirs = std::vector ({1,1,1}); //1 for each GP direction + + Ls = 12; + mass = 0.01; + M5 = 1.8; + is_cps_cfg = false; + mobius_scale = 2.0; + + fine.alpha = 2; + fine.beta = 0.1; + fine.ch_ord = 100; + fine.N_use = 70; + fine.N_get = 60; + fine.N_true_get = 60; + fine.stop_rsd = 1e-8; + fine.maxits = 10000; + + coarse.alpha = 2; + coarse.beta = 0.1; + coarse.ch_ord = 100; + coarse.N_use = 200; + coarse.N_get = 190; + coarse.N_true_get = 190; + coarse.stop_rsd = 1e-8; + coarse.maxits = 10000; + + coarse_relax_tol = 1e5; + smoother_ord = 20; + + write_fine = false; + read_fine = false; + write_coarse = false; + read_coarse = false; + } +}; + +template +void runTest(const Options &opt){ + //Fine grids + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(opt.Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(opt.Ls,UGrid); + + //Setup G-parity BCs + assert(Nd == 4); + std::vector dirs4(4); + for(int i=0;i<3;i++) dirs4[i] = opt.GparityDirs[i]; + dirs4[3] = 0; //periodic gauge BC in time + + std::cout << GridLogMessage << "Gauge BCs: " << dirs4 << std::endl; + ConjugateGimplD::setDirections(dirs4); //gauge BC + + GparityWilsonImplD::ImplParams Params; + for(int i=0;i SchurOp(action); + + typedef GparityWilsonImplD::SiteSpinor SiteSpinor; + + const CPSLanczosParams &fine = opt.fine; + const CPSLanczosParams &coarse = opt.coarse; + + std::cout << GridLogMessage << "Keep " << fine.N_true_get << " fine vectors" << std::endl; + std::cout << GridLogMessage << "Keep " << coarse.N_true_get << " coarse vectors" << std::endl; + assert(coarse.N_true_get >= fine.N_true_get); + + assert(nbasis<=fine.N_true_get); + LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,SchurOp,Odd); + std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; + + //Compute and/or read fine evecs + if(opt.read_fine){ + _LocalCoherenceLanczos.checkpointFineRestore(opt.read_fine_file + "_evecs.scidac", opt.read_fine_file + "_evals.xml"); + }else{ + std::cout << GridLogMessage << "Performing fine grid IRL" << std::endl; + std::cout << GridLogMessage << "Using Chebyshev alpha=" << fine.alpha << " beta=" << fine.beta << " ord=" << fine.ch_ord << std::endl; + _LocalCoherenceLanczos.calcFine(fine.getChebyParams(), + fine.Nstop(),fine.Nk(),fine.Nm(), + fine.stop_rsd,fine.maxits,0,0); + if(opt.write_fine){ + std::cout << GridLogIRL<<"Checkpointing Fine evecs"< cheb_smoother(smoother); + + FermionField evec(FrbGrid); + FermionField evec_sm(FrbGrid); //smoothed + FermionField tmp(FrbGrid); + RealD eval; + + for(int i=0;i " << std::endl; + std::cout << GridLogMessage << " should have the format a.b.c where a,b,c are 0,1 depending on whether there are G-parity BCs in that direction" << std::endl; + std::cout << GridLogMessage << "Options:" << std::endl; + std::cout << GridLogMessage << "--Ls : Set Ls (default 12)" << std::endl; + std::cout << GridLogMessage << "--mass : Set the mass (default 0.01)" << std::endl; + std::cout << GridLogMessage << "--block : Set the block size. Format should be a.b.c.d.e where a-e are the block extents (default 2.2.2.2.2)" << std::endl; + std::cout << GridLogMessage << "--is_cps_cfg : Indicate that the configuration was generated with CPS where until recently the stored plaquette was wrong by a factor of 2" << std::endl; + std::cout << GridLogMessage << "--write_irl_templ: Write a template for the parameters file of the Lanczos to \"irl_templ.xml\"" << std::endl; + std::cout << GridLogMessage << "--read_irl_fine : Real the parameters file for the fine Lanczos" << std::endl; + std::cout << GridLogMessage << "--read_irl_coarse : Real the parameters file for the coarse Lanczos" << std::endl; + std::cout << GridLogMessage << "--write_fine : Write fine evecs/evals to filename starting with the stub" << std::endl; + std::cout << GridLogMessage << "--read_fine : Read fine evecs/evals from filename starting with the stub" << std::endl; + std::cout << GridLogMessage << "--write_coarse : Write coarse evecs/evals to filename starting with the stub" << std::endl; + std::cout << GridLogMessage << "--read_coarse : Read coarse evecs/evals from filename starting with the stub" << std::endl; + std::cout << GridLogMessage << "--smoother_ord : Set the Chebyshev order of the smoother (default 20)" << std::endl; + std::cout << GridLogMessage << "--coarse_relax_tol : Set the relaxation parameter for evaluating the residual of the reconstructed eigenvectors outside of the basis (default 1e5)" << std::endl; + std::cout << GridLogMessage << "--basis_size : Select the basis size from 100,200,300,350 (default 100)" << std::endl; + Grid_finalize(); + return 1; + } + opt.config = argv[1]; + GridCmdOptionIntVector(argv[2], opt.GparityDirs); + assert(opt.GparityDirs.size() == 3); + + for(int i=3;i> opt.mass; + std::cout << GridLogMessage << "Set quark mass to " << opt.mass << std::endl; + }else if(sarg == "--block"){ + GridCmdOptionIntVector(argv[i+1], opt.blockSize); + assert(opt.blockSize.size() == 5); + std::cout << GridLogMessage << "Set block size to "; + for(int q=0;q<5;q++) std::cout << opt.blockSize[q] << " "; + std::cout << std::endl; + }else if(sarg == "--is_cps_cfg"){ + opt.is_cps_cfg = true; + }else if(sarg == "--write_irl_templ"){ + XmlWriter writer("irl_templ.xml"); + write(writer,"Params", opt.fine); + Grid_finalize(); + return 0; + }else if(sarg == "--read_irl_fine"){ + std::cout << GridLogMessage << "Reading fine IRL params from " << argv[i+1] << std::endl; + XmlReader reader(argv[i+1]); + read(reader, "Params", opt.fine); + }else if(sarg == "--read_irl_coarse"){ + std::cout << GridLogMessage << "Reading coarse IRL params from " << argv[i+1] << std::endl; + XmlReader reader(argv[i+1]); + read(reader, "Params", opt.coarse); + }else if(sarg == "--write_fine"){ + opt.write_fine = true; + opt.write_fine_file = argv[i+1]; + }else if(sarg == "--read_fine"){ + opt.read_fine = true; + opt.read_fine_file = argv[i+1]; + }else if(sarg == "--write_coarse"){ + opt.write_coarse = true; + opt.write_coarse_file = argv[i+1]; + }else if(sarg == "--read_coarse"){ + opt.read_coarse = true; + opt.read_coarse_file = argv[i+1]; + }else if(sarg == "--smoother_ord"){ + std::istringstream ss(argv[i+1]); ss >> opt.smoother_ord; + std::cout << GridLogMessage << "Set smoother order to " << opt.smoother_ord << std::endl; + }else if(sarg == "--coarse_relax_tol"){ + std::istringstream ss(argv[i+1]); ss >> opt.coarse_relax_tol; + std::cout << GridLogMessage << "Set coarse IRL relaxation parameter to " << opt.coarse_relax_tol << std::endl; + }else if(sarg == "--mobius_scale"){ + std::istringstream ss(argv[i+1]); ss >> opt.mobius_scale; + std::cout << GridLogMessage << "Set Mobius scale to " << opt.mobius_scale << std::endl; + }else if(sarg == "--basis_size"){ + basis_size = std::stoi(argv[i+1]); + std::cout << GridLogMessage << "Set basis size to " << basis_size << std::endl; + } + } + + switch(basis_size){ + case 100: + runTest<100>(opt); break; + case 200: + runTest<200>(opt); break; + case 300: + runTest<300>(opt); break; + case 350: + runTest<350>(opt); break; + default: + std::cout << GridLogMessage << "Unsupported basis size " << basis_size << std::endl; + assert(0); + } + + Grid_finalize(); +} + diff --git a/tests/lanczos/Test_dwf_block_lanczos.README b/tests/lanczos/Test_dwf_block_lanczos.README new file mode 100644 index 00000000..179f9037 --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.README @@ -0,0 +1,73 @@ +#Example script +DIR=/gpfs/alpine/phy157/proj-shared/phy157dwf/chulwoo/Grid/BL/build/tests/lanczos +BIN=${DIR}/Test_dwf_block_lanczos + +VOL='--grid 16.16.16.32 ' +GRID='--mpi 1.1.1.4 ' +CONF='--gconf ckpoint_lat.IEEE64BIG.2000 ' +OPT='--mass 0.01 --M5 1.8 --phase in.params --omega in.params --shm 4096' +#BL='--rbl 16.1024.128.1000.10 --split 1.1.4.4 --check_int 100 --resid 1.0e-5 --cheby_l 0.007 --cheby_u 7 --cheby_n 51' +BL='--rbl 4.128.16.100.10 --split 1.1.1.4 --check_int 25 --resid 1.0e-5 --cheby_l 0.007 --cheby_u 7 --cheby_n 51' + +ARGS=${CONF}" "${OPT}" "${BL}" "${VOL}" "${GRID} +export APP="${BIN} ${ARGS}" +echo APP=${APP} +#export JS="jsrun --nrs 32 -a4 -g4 -c42 -dpacked -b packed:7 --smpiargs="-gpu" " +export JS="jsrun --nrs 1 -a4 -g4 -c42 -dpacked -b packed:10 --smpiargs="-gpu" " +$JS $APP + +#sample in.param + +boundary_phase 0 1 0 +boundary_phase 1 1 0 +boundary_phase 2 1 0 +boundary_phase 3 -1 0 + +omega 0 0.5 0 +omega 1 0.5 0 +omega 2 0.5 0 +omega 3 0.5 0 +omega 4 0.5 0 +omega 5 0.5 0 +omega 6 0.5 0 +omega 7 0.5 0 +omega 8 0.5 0 +omega 9 0.5 0 +omega 10 0.5 0 +omega 11 0.5 0 + + +#output + +Grid : Message : 1.717474 s : Gauge Configuration ckpoint_lat.IEEE64BIG.2000 +Grid : Message : 1.717478 s : boundary_phase[0] = (1,0) +Grid : Message : 1.717497 s : boundary_phase[1] = (1,0) +Grid : Message : 1.717500 s : boundary_phase[2] = (1,0) +Grid : Message : 1.717503 s : boundary_phase[3] = (-1,0) +Grid : Message : 1.717506 s : Ls 12 +Grid : Message : 1.717507 s : mass 0.01 +Grid : Message : 1.717510 s : M5 1.8 +Grid : Message : 1.717512 s : mob_b 1.5 +Grid : Message : 1.717514 s : omega[0] = (0.5,0) +Grid : Message : 1.717517 s : omega[1] = (0.5,0) +Grid : Message : 1.717520 s : omega[2] = (0.5,0) +Grid : Message : 1.717523 s : omega[3] = (0.5,0) +Grid : Message : 1.717526 s : omega[4] = (0.5,0) +Grid : Message : 1.717529 s : omega[5] = (0.5,0) +Grid : Message : 1.717532 s : omega[6] = (0.5,0) +Grid : Message : 1.717535 s : omega[7] = (0.5,0) +Grid : Message : 1.717538 s : omega[8] = (0.5,0) +Grid : Message : 1.717541 s : omega[9] = (0.5,0) +Grid : Message : 1.717544 s : omega[10] = (0.5,0) +Grid : Message : 1.717547 s : omega[11] = (0.5,0) +Grid : Message : 1.717550 s : Nu 4 +Grid : Message : 1.717551 s : Nk 128 +Grid : Message : 1.717552 s : Np 16 +Grid : Message : 1.717553 s : Nm 288 +Grid : Message : 1.717554 s : Nstop 100 +Grid : Message : 1.717555 s : Ntest 25 +Grid : Message : 1.717557 s : MaxIter 10 +Grid : Message : 1.717558 s : resid 1e-05 +Grid : Message : 1.717560 s : Cheby Poly 0.007,7,51 + + diff --git a/tests/lanczos/Test_dwf_block_lanczos.cc b/tests/lanczos/Test_dwf_block_lanczos.cc new file mode 100644 index 00000000..671f2fa6 --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.cc @@ -0,0 +1,410 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_block_lanczos.cc + + Copyright (C) 2022 + +Author: Peter Boyle +Author: Yong-Chull Jang +Author: Chulwoo Jung + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include + +using namespace std; +using namespace Grid; +//using namespace Grid::QCD; + +//typedef typename GparityDomainWallFermionR::FermionField FermionField; +typedef typename ZMobiusFermionF::FermionField FermionField; + +RealD AllZero(RealD x){ return 0.;} + +class CmdJobParams +{ + public: + std::string gaugefile; + + int Ls; + double mass; + double M5; + double mob_b; + std::vector omega; + std::vector boundary_phase; + std::vector mpi_split; + + LanczosType Impl; + int Nu; + int Nk; + int Np; + int Nm; + int Nstop; + int Ntest; + int MaxIter; + double resid; + + double low; + double high; + int order; + + CmdJobParams() + : gaugefile("Hot"), + Ls(8), mass(0.01), M5(1.8), mob_b(1.5), + Impl(LanczosType::irbl),mpi_split(4,1), + Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8), + low(0.2), high(5.5), order(11) + {Nm=Nk+Np;}; + + void Parse(char **argv, int argc); +}; + + +void CmdJobParams::Parse(char **argv,int argc) +{ + std::string arg; + std::vector vi; + double re,im; + int expect, idx; + std::string vstr; + std::ifstream pfile; + + if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){ + gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf"); + } + + if( GridCmdOptionExists(argv,argv+argc,"--phase") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--phase"); + pfile.open(arg); + assert(pfile); + expect = 0; + while( pfile >> vstr ) { + if ( vstr.compare("boundary_phase") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(expect==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + boundary_phase.push_back({re,im}); + expect++; + } + } + pfile.close(); + } else { + for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.}); + } + + if( GridCmdOptionExists(argv,argv+argc,"--omega") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--omega"); + pfile.open(arg); + assert(pfile); + Ls = 0; + while( pfile >> vstr ) { + if ( vstr.compare("omega") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(Ls==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + omega.push_back({re,im}); + Ls++; + } + } + pfile.close(); + } else { + if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--Ls"); + GridCmdOptionInt(arg,Ls); + } + } + + if( GridCmdOptionExists(argv,argv+argc,"--mass") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mass"); + GridCmdOptionFloat(arg,mass); + } + + if( GridCmdOptionExists(argv,argv+argc,"--M5") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--M5"); + GridCmdOptionFloat(arg,M5); + } + + if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b"); + GridCmdOptionFloat(arg,mob_b); + } + + if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--irbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::irbl; + Nm = Nk+Np; + } + + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--rbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; // vector space is enlarged by adding Np vectors + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::rbl; + Nm = Nk+Np*MaxIter; + } + +#if 1 + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--split") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--split"); + GridCmdOptionIntVector(arg,vi); + for(int i=0;i seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + // ypj [note] why seed RNG5 again? bug? In this case, run with a default seed(). + GridParallelRNG RNG5rb(FrbGridF); RNG5rb.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); + std::vector U(4,UGrid); + LatticeGaugeFieldF UmuF(UGridF); + std::vector UF(4,UGridF); + + if ( JP.gaugefile.compare("Hot") == 0 ) { + SU3::HotConfiguration(RNG4, Umu); + } else { + FieldMetaData header; + NerscIO::readConfiguration(Umu,header,JP.gaugefile); + // ypj [fixme] additional checks for the loaded configuration? + } + precisionChange (UmuF,Umu); + + for(int mu=0;mu(Umu,mu); + } + + RealD mass = JP.mass; + RealD M5 = JP.M5; + +// ypj [fixme] flexible support for a various Fermions +// RealD mob_b = JP.mob_b; // Gparity +// std::vector omega; // ZMobius + +// GparityMobiusFermionD ::ImplParams params; +// std::vector twists({1,1,1,0}); +// params.twists = twists; +// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); +// SchurDiagTwoOperator HermOp(Ddwf); + + +// int mrhs = JP.Nu; + int Ndir=4; + auto mpi_layout = GridDefaultMpi(); + std::vector mpi_split (Ndir,1); +#if 0 + int tmp=mrhs, dir=0; + std::cout << GridLogMessage << "dir= "<_processor,re); + src_tmp=re; + pickCheckerboard(Odd,src[i],src_tmp); + } + RNG5.Report(); +} else { + std::cout << GridLogMessage << "Using RNG5rb"< evec(JP.Nm,FrbGridF); + for(int i=0;i<1;++i){ + std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl; + }; + + int Nconv; + IRBL.calc(eval,evec,src,Nconv,JP.Impl); + + + Grid_finalize(); +} diff --git a/tests/lanczos/Test_dwf_block_lanczos.cc.double b/tests/lanczos/Test_dwf_block_lanczos.cc.double new file mode 100644 index 00000000..c71b80ec --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.cc.double @@ -0,0 +1,401 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_block_lanczos.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include + +using namespace std; +using namespace Grid; +//using namespace Grid::QCD; + +//typedef typename GparityDomainWallFermionR::FermionField FermionField; +typedef typename ZMobiusFermionR::FermionField FermionField; + +RealD AllZero(RealD x){ return 0.;} + +class CmdJobParams +{ + public: + std::string gaugefile; + + int Ls; + double mass; + double M5; + double mob_b; + std::vector omega; + std::vector boundary_phase; + std::vector mpi_split; + + LanczosType Impl; + int Nu; + int Nk; + int Np; + int Nm; + int Nstop; + int Ntest; + int MaxIter; + double resid; + + double low; + double high; + int order; + + CmdJobParams() + : gaugefile("Hot"), + Ls(8), mass(0.01), M5(1.8), mob_b(1.5), + Impl(LanczosType::irbl),mpi_split(4,1), + Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8), + low(0.2), high(5.5), order(11) + {Nm=Nk+Np;}; + + void Parse(char **argv, int argc); +}; + + +void CmdJobParams::Parse(char **argv,int argc) +{ + std::string arg; + std::vector vi; + double re,im; + int expect, idx; + std::string vstr; + std::ifstream pfile; + + if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){ + gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf"); + } + + if( GridCmdOptionExists(argv,argv+argc,"--phase") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--phase"); + pfile.open(arg); + assert(pfile); + expect = 0; + while( pfile >> vstr ) { + if ( vstr.compare("boundary_phase") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(expect==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + boundary_phase.push_back({re,im}); + expect++; + } + } + pfile.close(); + } else { + for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.}); + } + + if( GridCmdOptionExists(argv,argv+argc,"--omega") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--omega"); + pfile.open(arg); + assert(pfile); + Ls = 0; + while( pfile >> vstr ) { + if ( vstr.compare("omega") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(Ls==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + omega.push_back({re,im}); + Ls++; + } + } + pfile.close(); + } else { + if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--Ls"); + GridCmdOptionInt(arg,Ls); + } + } + + if( GridCmdOptionExists(argv,argv+argc,"--mass") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mass"); + GridCmdOptionFloat(arg,mass); + } + + if( GridCmdOptionExists(argv,argv+argc,"--M5") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--M5"); + GridCmdOptionFloat(arg,M5); + } + + if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b"); + GridCmdOptionFloat(arg,mob_b); + } + + if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--irbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::irbl; + Nm = Nk+Np; + } + + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--rbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; // vector space is enlarged by adding Np vectors + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::rbl; + Nm = Nk+Np*MaxIter; + } + +#if 1 + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--split") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--split"); + GridCmdOptionIntVector(arg,vi); + for(int i=0;i seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + // ypj [note] why seed RNG5 again? bug? In this case, run with a default seed(). + GridParallelRNG RNG5rb(FrbGrid); RNG5rb.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); + std::vector U(4,UGrid); + + if ( JP.gaugefile.compare("Hot") == 0 ) { + SU3::HotConfiguration(RNG4, Umu); + } else { + FieldMetaData header; + NerscIO::readConfiguration(Umu,header,JP.gaugefile); + // ypj [fixme] additional checks for the loaded configuration? + } + + for(int mu=0;mu(Umu,mu); + } + + RealD mass = JP.mass; + RealD M5 = JP.M5; + +// ypj [fixme] flexible support for a various Fermions +// RealD mob_b = JP.mob_b; // Gparity +// std::vector omega; // ZMobius + +// GparityMobiusFermionD ::ImplParams params; +// std::vector twists({1,1,1,0}); +// params.twists = twists; +// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); +// SchurDiagTwoOperator HermOp(Ddwf); + + +// int mrhs = JP.Nu; + int Ndir=4; + auto mpi_layout = GridDefaultMpi(); + std::vector mpi_split (Ndir,1); +#if 0 + int tmp=mrhs, dir=0; + std::cout << GridLogMessage << "dir= "<_processor,re); + src_tmp=re; + pickCheckerboard(Odd,src[i],src_tmp); + } + RNG5.Report(); +} else { + std::cout << GridLogMessage << "Using RNG5rb"< evec(JP.Nm,FrbGrid); + for(int i=0;i<1;++i){ + std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl; + }; + + int Nconv; + IRBL.calc(eval,evec,src,Nconv,JP.Impl); + + + Grid_finalize(); +} diff --git a/tests/lanczos/Test_dwf_block_lanczos.cc.single b/tests/lanczos/Test_dwf_block_lanczos.cc.single new file mode 100644 index 00000000..7449e32a --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.cc.single @@ -0,0 +1,408 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_block_lanczos.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include + +using namespace std; +using namespace Grid; +//using namespace Grid::QCD; + +//typedef typename GparityDomainWallFermionR::FermionField FermionField; +typedef typename ZMobiusFermionF::FermionField FermionField; + +RealD AllZero(RealD x){ return 0.;} + +class CmdJobParams +{ + public: + std::string gaugefile; + + int Ls; + double mass; + double M5; + double mob_b; + std::vector omega; + std::vector boundary_phase; + std::vector mpi_split; + + LanczosType Impl; + int Nu; + int Nk; + int Np; + int Nm; + int Nstop; + int Ntest; + int MaxIter; + double resid; + + double low; + double high; + int order; + + CmdJobParams() + : gaugefile("Hot"), + Ls(8), mass(0.01), M5(1.8), mob_b(1.5), + Impl(LanczosType::irbl),mpi_split(4,1), + Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8), + low(0.2), high(5.5), order(11) + {Nm=Nk+Np;}; + + void Parse(char **argv, int argc); +}; + + +void CmdJobParams::Parse(char **argv,int argc) +{ + std::string arg; + std::vector vi; + double re,im; + int expect, idx; + std::string vstr; + std::ifstream pfile; + + if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){ + gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf"); + } + + if( GridCmdOptionExists(argv,argv+argc,"--phase") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--phase"); + pfile.open(arg); + assert(pfile); + expect = 0; + while( pfile >> vstr ) { + if ( vstr.compare("boundary_phase") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(expect==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + boundary_phase.push_back({re,im}); + expect++; + } + } + pfile.close(); + } else { + for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.}); + } + + if( GridCmdOptionExists(argv,argv+argc,"--omega") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--omega"); + pfile.open(arg); + assert(pfile); + Ls = 0; + while( pfile >> vstr ) { + if ( vstr.compare("omega") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(Ls==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + omega.push_back({re,im}); + Ls++; + } + } + pfile.close(); + } else { + if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--Ls"); + GridCmdOptionInt(arg,Ls); + } + } + + if( GridCmdOptionExists(argv,argv+argc,"--mass") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mass"); + GridCmdOptionFloat(arg,mass); + } + + if( GridCmdOptionExists(argv,argv+argc,"--M5") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--M5"); + GridCmdOptionFloat(arg,M5); + } + + if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b"); + GridCmdOptionFloat(arg,mob_b); + } + + if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--irbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::irbl; + Nm = Nk+Np; + } + + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--rbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; // vector space is enlarged by adding Np vectors + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::rbl; + Nm = Nk+Np*MaxIter; + } + +#if 1 + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--split") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--split"); + GridCmdOptionIntVector(arg,vi); + for(int i=0;i seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + // ypj [note] why seed RNG5 again? bug? In this case, run with a default seed(). + GridParallelRNG RNG5rb(FrbGridF); RNG5rb.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); + std::vector U(4,UGrid); + LatticeGaugeFieldF UmuF(UGridF); + std::vector UF(4,UGridF); + + if ( JP.gaugefile.compare("Hot") == 0 ) { + SU3::HotConfiguration(RNG4, Umu); + } else { + FieldMetaData header; + NerscIO::readConfiguration(Umu,header,JP.gaugefile); + // ypj [fixme] additional checks for the loaded configuration? + } + precisionChange (UmuF,Umu); + + for(int mu=0;mu(Umu,mu); + } + + RealD mass = JP.mass; + RealD M5 = JP.M5; + +// ypj [fixme] flexible support for a various Fermions +// RealD mob_b = JP.mob_b; // Gparity +// std::vector omega; // ZMobius + +// GparityMobiusFermionD ::ImplParams params; +// std::vector twists({1,1,1,0}); +// params.twists = twists; +// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); +// SchurDiagTwoOperator HermOp(Ddwf); + + +// int mrhs = JP.Nu; + int Ndir=4; + auto mpi_layout = GridDefaultMpi(); + std::vector mpi_split (Ndir,1); +#if 0 + int tmp=mrhs, dir=0; + std::cout << GridLogMessage << "dir= "<_processor,re); + src_tmp=re; + pickCheckerboard(Odd,src[i],src_tmp); + } + RNG5.Report(); +} else { + std::cout << GridLogMessage << "Using RNG5rb"< evec(JP.Nm,FrbGridF); + for(int i=0;i<1;++i){ + std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl; + }; + + int Nconv; + IRBL.calc(eval,evec,src,Nconv,JP.Impl); + + + Grid_finalize(); +} diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index 669a7b6d..7a84a465 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -188,8 +188,8 @@ int main (int argc, char ** argv) { std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; // ZMobius EO Operator - ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); - SchurDiagTwoOperator HermOp(Ddwf); + ZMobiusFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); + SchurDiagTwoOperator HermOp(Ddwf); // Eigenvector storage LanczosParams fine =Params.FineParams; diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc index f3cb567c..e82a9741 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc @@ -301,8 +301,8 @@ int main (int argc, char ** argv) { std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; // ZMobius EO Operator - ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); - SchurDiagTwoOperator HermOp(Ddwf); + ZMobiusFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); + SchurDiagTwoOperator HermOp(Ddwf); // Eigenvector storage LanczosParams fine =Params.FineParams; diff --git a/tests/lanczos/Test_dwf_lanczos.cc b/tests/lanczos/Test_dwf_lanczos.cc index 00d29ec0..d10c62d3 100644 --- a/tests/lanczos/Test_dwf_lanczos.cc +++ b/tests/lanczos/Test_dwf_lanczos.cc @@ -31,71 +31,121 @@ using namespace std; using namespace Grid; ; -typedef typename GparityDomainWallFermionR::FermionField FermionField; +template +struct Setup{}; -RealD AllZero(RealD x){ return 0.;} +template<> +struct Setup{ + static GparityMobiusFermionF* getAction(LatticeGaugeFieldF &Umu, + GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ + RealD mass=0.00054; + RealD M5=1.8; + RealD mob_b=1.5; + GparityMobiusFermionD ::ImplParams params; + std::vector twists({1,1,1,0}); + params.twists = twists; + return new GparityMobiusFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); + } +}; -int main (int argc, char ** argv) -{ - Grid_init(&argc,&argv); +template<> +struct Setup{ + static DomainWallFermionF* getAction(LatticeGaugeFieldF &Umu, +struct Setup{ + static DomainWallFermionD* getAction(LatticeGaugeField &Umu, + GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ + RealD mass=0.00054; + RealD M5=1.8; + return new DomainWallFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + } +}; - const int Ls=8; +template<> +struct Setup{ + static MobiusFermionF* getAction(LatticeGaugeFieldF &Umu, + GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ + RealD mass=0.00054; + RealD M5=1.8; + RealD mob_b=1.5; + std::vector boundary = {1,1,1,-1}; + MobiusFermionF::ImplParams Params(boundary); + + std::cout << GridLogMessage << "mass "< +void run(){ + typedef typename Action::FermionField FermionField; + const int Ls=12; GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid); +// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid); + + GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + GridCartesian* FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls, UGridF); + GridRedBlackCartesian* FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGridF); + std::vector seeds4({1,2,3,4}); std::vector seeds5({5,6,7,8}); - GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); - GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGridF); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5rb(FrbGridF); RNG5.SeedFixedIntegers(seeds5); LatticeGaugeField Umu(UGrid); - SU::HotConfiguration(RNG4, Umu); +// SU::HotConfiguration(RNG4, Umu); + FieldMetaData header; + std::string file("./config"); - std::vector U(4,UGrid); - for(int mu=0;mu(Umu,mu); - } - - RealD mass=0.01; - RealD M5=1.8; - RealD mob_b=1.5; -// DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - GparityMobiusFermionD ::ImplParams params; - std::vector twists({1,1,1,0}); - params.twists = twists; - GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); +// int precision32 = 0; +// int tworow = 0; +// NerscIO::writeConfiguration(Umu,file,tworow,precision32); + NerscIO::readConfiguration(Umu,header,file); -// MdagMLinearOperator HermOp(Ddwf); -// SchurDiagTwoOperator HermOp(Ddwf); - SchurDiagTwoOperator HermOp(Ddwf); -// SchurDiagMooeeOperator HermOp(Ddwf); + LatticeGaugeFieldF UmuF(UGridF); + precisionChange(UmuF, Umu); - const int Nstop = 30; - const int Nk = 40; + Action *action = Setup::getAction(UmuF,FGridF,FrbGridF,UGridF,UrbGridF); + + //MdagMLinearOperator HermOp(Ddwf); +// SchurDiagTwoOperator HermOp(*action); + SchurDiagOneOperator HermOp(*action); + + const int Nstop = 150; + const int Nk = 160; const int Np = 40; const int Nm = Nk+Np; const int MaxIt= 10000; - RealD resid = 1.0e-8; + RealD resid = 1.0e-6; + std::cout << GridLogMessage << "Nstop "< Coeffs { 0.,-1.}; Polynomial PolyX(Coeffs); - Chebyshev Cheby(0.2,5.,11); + Chebyshev Cheby(0.0000006,5.5,4001); + std::cout << GridLogMessage << "Cheby(0.0000006,5.5,4001) "< OpCheby(Cheby,HermOp); - PlainHermOp Op (HermOp); + PlainHermOp Op (HermOp); ImplicitlyRestartedLanczos IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); - - + std::vector eval(Nm); - FermionField src(FrbGrid); + FermionField src(FrbGridF); gaussian(RNG5rb,src); - std::vector evec(Nm,FrbGrid); + std::vector evec(Nm,FrbGridF); for(int i=0;i<1;i++){ std::cout << GridLogMessage <(); + }else if(action == "DWF"){ + run(); + }else if(action == "Mobius"){ + run(); + }else{ + std::cout << "Unknown action" << std::endl; + exit(1); + } + Grid_finalize(); } diff --git a/tests/lanczos/Test_evec_compression.cc b/tests/lanczos/Test_evec_compression.cc new file mode 100644 index 00000000..1636ea3a --- /dev/null +++ b/tests/lanczos/Test_evec_compression.cc @@ -0,0 +1,582 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_evec_compression.cc + + Copyright (C) 2017 + +Author: Christopher Kelly +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +/* + * + * This test generates eigenvectors using the Lanczos algorithm then attempts to use local coherence compression + * to express those vectors in terms of a basis formed from a subset. This test is useful for finding the optimal + * blocking and basis size for performing a Local Coherence Lanczos + */ +#include +#include +#include + +using namespace std; +using namespace Grid; + +//For the CPS configurations we have to manually seed the RNG and deal with an incorrect factor of 2 in the plaquette metadata +template +void readConfiguration(LatticeGaugeFieldD &U, + const std::string &config, + bool is_cps_cfg = false){ + + if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = false; + + typedef GaugeStatistics GaugeStats; + + FieldMetaData header; + NerscIO::readConfiguration(U, header, config); + + if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = true; +} + +//Lanczos parameters in CPS conventions +struct CPSLanczosParams : Serializable { +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(CPSLanczosParams, + RealD, alpha, + RealD, beta, + int, ch_ord, + int, N_use, + int, N_get, + int, N_true_get, + RealD, stop_rsd, + int, maxits); + + //Translations + ChebyParams getChebyParams() const{ + ChebyParams out; + out.alpha = beta*beta; //aka lo + out.beta = alpha*alpha; //aka hi + out.Npoly = ch_ord+1; + return out; + } + int Nstop() const{ return N_true_get; } + int Nm() const{ return N_use; } + int Nk() const{ return N_get; } +}; + + +template +class LocalCoherenceCompressor{ +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice CoarseField; + typedef Lattice FineField; + + void compress(std::vector &basis, + std::vector &compressed_evecs, + const std::vector &evecs_in, + GridBase *FineGrid, + GridBase *CoarseGrid){ + int nevecs = evecs_in.size(); + assert(nevecs > nbasis); + + //Construct the basis + basis.resize(nbasis, FineGrid); + for(int b=0;b &basis, const std::vector &compressed_evecs) const{ + blockPromote(compressed_evecs[i],evec,basis); + } + + //Test uncompressed eigenvectors of Linop.HermOp to precision 'base_tolerance' for i=nbasis + //Because the uncompressed evec has a lot of high mode noise (unimportant for deflation) we apply a smoother before testing. + //The Chebyshev used by the Lanczos should be sufficient as a smoother + bool testCompression(LinearOperatorBase &Linop, OperatorFunction &smoother, + const std::vector &basis, const std::vector &compressed_evecs, const std::vector &evals, + const RealD base_tolerance, const RealD relax){ + std::cout << GridLogMessage << "Testing quality of uncompressed evecs (after smoothing)" << std::endl; + + GridBase* FineGrid = basis[0].Grid(); + GridBase* CoarseGrid = compressed_evecs[0].Grid(); + + bool fail = false; + FineField evec(FineGrid), Mevec(FineGrid), evec_sm(FineGrid); + for(int i=0;i tol) fail = true; + } + return fail; + } + + //Compare uncompressed evecs to original evecs + void compareEvecs(const std::vector &basis, const std::vector &compressed_evecs, const std::vector &orig_evecs){ + std::cout << GridLogMessage << "Comparing uncompressed evecs to original evecs" << std::endl; + + GridBase* FineGrid = basis[0].Grid(); + GridBase* CoarseGrid = compressed_evecs[0].Grid(); + + FineField evec(FineGrid), diff(FineGrid); + for(int i=0;i +void compareBlockPromoteTimings(const std::vector > &basis, const std::vector > > &compressed_evecs){ + typedef iVector CoarseSiteVector; + typedef Lattice CoarseScalar; + typedef Lattice CoarseField; + typedef Lattice FineField; + + GridStopWatch timer; + + GridBase* FineGrid = basis[0].Grid(); + GridBase* CoarseGrid = compressed_evecs[0].Grid(); + + FineField v1(FineGrid), v2(FineGrid); + + //Start with a cold start + for(int i=0;i blockSize; + std::vector GparityDirs; + + bool write_fine; + std::string write_fine_file; + bool read_fine; + std::string read_fine_file; + + int basis_size; + + Args(){ + blockSize = {2,2,2,2,2}; + GparityDirs = {1,1,1}; //1 for each GP direction + + Ls = 12; + mass = 0.01; + M5 = 1.8; + is_cps_cfg = false; + mobius_scale = 2; + + fine.alpha = 2; + fine.beta = 0.1; + fine.ch_ord = 100; + fine.N_use = 70; + fine.N_get = 60; + fine.N_true_get = 60; + fine.stop_rsd = 1e-8; + fine.maxits = 10000; + + coarse_relax_tol = 1e5; + + write_fine = false; + read_fine = false; + + basis_size = 100; + } +}; + + +GparityWilsonImplD::ImplParams setupGparityParams(const std::vector &GparityDirs){ + //Setup G-parity BCs + assert(Nd == 4); + std::vector dirs4(4); + for(int i=0;i<3;i++) dirs4[i] = GparityDirs[i]; + dirs4[3] = 0; //periodic gauge BC in time + + std::cout << GridLogMessage << "Gauge BCs: " << dirs4 << std::endl; + ConjugateGimplD::setDirections(dirs4); //gauge BC + + GparityWilsonImplD::ImplParams Params; + for(int i=0;i +void run_b(ActionType &action, const std::string &config, const Args &args){ + //Fine grids + GridCartesian * UGrid = (GridCartesian*)action.GaugeGrid(); + GridRedBlackCartesian * UrbGrid = (GridRedBlackCartesian*)action.GaugeRedBlackGrid(); + GridCartesian * FGrid = (GridCartesian*)action.FermionGrid(); + GridRedBlackCartesian * FrbGrid = (GridRedBlackCartesian*)action.FermionRedBlackGrid(); + + //Setup the coarse grids + auto fineLatt = GridDefaultLatt(); + Coordinate coarseLatt(4); + for (int d=0;d<4;d++){ + coarseLatt[d] = fineLatt[d]/args.blockSize[d]; assert(coarseLatt[d]*args.blockSize[d]==fineLatt[d]); + } + + std::cout << GridLogMessage<< " 5d coarse lattice is "; + for (int i=0;i<4;i++){ + std::cout << coarseLatt[i]<<"x"; + } + int cLs = args.Ls/args.blockSize[4]; assert(cLs*args.blockSize[4]==args.Ls); + std::cout << cLs< CoarseSiteVector; + typedef Lattice CoarseScalar; + typedef Lattice CoarseField; + + typedef typename ActionType::FermionField FermionField; + + SchurDiagTwoOperator SchurOp(action); + + typedef typename ActionType::SiteSpinor SiteSpinor; + + const CPSLanczosParams &fine = args.fine; + + //Do the fine Lanczos + std::vector evals; + std::vector evecs; + + if(args.read_fine){ + evals.resize(fine.N_true_get); + evecs.resize(fine.N_true_get, FrbGrid); + + std::string evals_file = args.read_fine_file + "_evals.xml"; + std::string evecs_file = args.read_fine_file + "_evecs.scidac"; + + std::cout << GridLogIRL<< "Reading evals from "< Cheby(fine.getChebyParams()); + FunctionHermOp ChebyOp(Cheby,SchurOp); + PlainHermOp Op(SchurOp); + + evals.resize(Nm); + evecs.resize(Nm,FrbGrid); + + ImplicitlyRestartedLanczos IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,0,0); + + FermionField src(FrbGrid); + typedef typename FermionField::scalar_type Scalar; + src=Scalar(1.0); + src.Checkerboard() = Odd; + + int Nconv; + IRL.calc(evals, evecs,src,Nconv,false); + if(Nconv < Nstop) assert(0 && "Fine lanczos failed to converge the required number of evecs"); //algorithm doesn't consider this a failure + if(Nconv > Nstop){ + //Yes this potentially throws away some evecs but it is better than having a random number of evecs between Nstop and Nm! + evals.resize(Nstop); + evecs.resize(Nstop, FrbGrid); + } + + if(args.write_fine){ + std::string evals_file = args.write_fine_file + "_evals.xml"; + std::string evecs_file = args.write_fine_file + "_evecs.scidac"; + + std::cout << GridLogIRL<< "Writing evecs to "<IsBoss()); + WR.open(evecs_file); + for(int k=0;k compressor; + std::vector basis(nbasis,FrbGrid); + std::vector compressed_evecs(evecs.size(),CoarseGrid5); + + compressor.compress(basis, compressed_evecs, evecs, FrbGrid, CoarseGrid5); + + compareBlockPromoteTimings(basis, compressed_evecs); + + //Compare uncompressed and original evecs + compressor.compareEvecs(basis, compressed_evecs, evecs); + + //Create the smoother + Chebyshev smoother(fine.getChebyParams()); + + //Test the quality of the uncompressed evecs + assert( compressor.testCompression(SchurOp, smoother, basis, compressed_evecs, evals, fine.stop_rsd, args.coarse_relax_tol) ); +} + +template +void run(ActionType &action, const std::string &config, const Args &args){ + switch(args.basis_size){ + case 50: + return run_b<50>(action,config,args); + case 100: + return run_b<100>(action,config,args); + case 150: + return run_b<150>(action,config,args); + case 200: + return run_b<200>(action,config,args); + case 250: + return run_b<250>(action,config,args); + case 300: + return run_b<300>(action,config,args); + case 350: + return run_b<350>(action,config,args); + case 400: + return run_b<400>(action,config,args); + default: + assert(0 && "Unsupported basis size: allowed values are 50,100,200,250,300,350,400"); + } +} + + + + +//Note: because we rely upon physical properties we must use a "real" gauge configuration +int main (int argc, char ** argv) { + Grid_init(&argc,&argv); + GridLogIRL.TimingMode(1); + + if(argc < 3){ + std::cout << GridLogMessage << "Usage: " << std::endl; + std::cout << GridLogMessage << " should have the format a.b.c where a,b,c are 0,1 depending on whether there are G-parity BCs in that direction" << std::endl; + std::cout << GridLogMessage << "Options:" << std::endl; + std::cout << GridLogMessage << "--Ls : Set Ls (default 12)" << std::endl; + std::cout << GridLogMessage << "--mass : Set the mass (default 0.01)" << std::endl; + std::cout << GridLogMessage << "--block : Set the block size. Format should be a.b.c.d.e where a-e are the block extents (default 2.2.2.2.2)" << std::endl; + std::cout << GridLogMessage << "--is_cps_cfg : Indicate that the configuration was generated with CPS where until recently the stored plaquette was wrong by a factor of 2" << std::endl; + std::cout << GridLogMessage << "--write_irl_templ: Write a template for the parameters file of the Lanczos to \"irl_templ.xml\"" << std::endl; + std::cout << GridLogMessage << "--read_irl_fine : Real the parameters file for the fine Lanczos" << std::endl; + std::cout << GridLogMessage << "--write_fine : Write fine evecs/evals to filename starting with the stub" << std::endl; + std::cout << GridLogMessage << "--read_fine : Read fine evecs/evals from filename starting with the stub" << std::endl; + std::cout << GridLogMessage << "--coarse_relax_tol : Set the relaxation parameter for evaluating the residual of the reconstructed eigenvectors outside of the basis (default 1e5)" << std::endl; + std::cout << GridLogMessage << "--action : Set the action from 'DWF', 'Mobius' (default Mobius)" << std::endl; + std::cout << GridLogMessage << "--mobius_scale : Set the Mobius scale b+c (default 2)" << std::endl; + std::cout << GridLogMessage << "--basis_size : Set the basis size from 50,100,150,200,250,300,350,400 (default 100)" << std::endl; + + Grid_finalize(); + return 1; + } + std::string config = argv[1]; + + Args args; + GridCmdOptionIntVector(argv[2], args.GparityDirs); + assert(args.GparityDirs.size() == 3); + + std::string action_s = "Mobius"; + + for(int i=3;i> args.mass; + std::cout << GridLogMessage << "Set quark mass to " << args.mass << std::endl; + }else if(sarg == "--block"){ + GridCmdOptionIntVector(argv[i+1], args.blockSize); + assert(args.blockSize.size() == 5); + std::cout << GridLogMessage << "Set block size to "; + for(int q=0;q<5;q++) std::cout << args.blockSize[q] << " "; + std::cout << std::endl; + }else if(sarg == "--is_cps_cfg"){ + args.is_cps_cfg = true; + }else if(sarg == "--write_irl_templ"){ + XmlWriter writer("irl_templ.xml"); + write(writer,"Params",args.fine); + Grid_finalize(); + return 0; + }else if(sarg == "--read_irl_fine"){ + std::cout << GridLogMessage << "Reading fine IRL params from " << argv[i+1] << std::endl; + XmlReader reader(argv[i+1]); + read(reader, "Params", args.fine); + }else if(sarg == "--write_fine"){ + args.write_fine = true; + args.write_fine_file = argv[i+1]; + }else if(sarg == "--read_fine"){ + args.read_fine = true; + args.read_fine_file = argv[i+1]; + }else if(sarg == "--coarse_relax_tol"){ + std::istringstream ss(argv[i+1]); ss >> args.coarse_relax_tol; + std::cout << GridLogMessage << "Set coarse IRL relaxation parameter to " << args.coarse_relax_tol << std::endl; + }else if(sarg == "--action"){ + action_s = argv[i+1]; + std::cout << "Action set to " << action_s << std::endl; + }else if(sarg == "--mobius_scale"){ + std::istringstream ss(argv[i+1]); ss >> args.mobius_scale; + std::cout << GridLogMessage << "Set Mobius scale to " << args.mobius_scale << std::endl; + }else if(sarg == "--basis_size"){ + args.basis_size = std::stoi(argv[i+1]); + std::cout << GridLogMessage << "Set basis size to " << args.basis_size << std::endl; + } + } + + //Fine grids + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(args.Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(args.Ls,UGrid); + + LatticeGaugeField Umu(UGrid); + + bool is_gparity = false; + for(auto g : args.GparityDirs) if(g) is_gparity = true; + + double bmc = 1.; + double b = (args.mobius_scale + bmc)/2.; // b = 1/2 [ (b+c) + (b-c) ] + double c = (args.mobius_scale - bmc)/2.; // c = 1/2 [ (b+c) - (b-c) ] + + if(is_gparity){ + GparityWilsonImplD::ImplParams Params = setupGparityParams(args.GparityDirs); + readConfiguration(Umu, config, args.is_cps_cfg); //Read the gauge field + + if(action_s == "DWF"){ + GparityDomainWallFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, Params); + run(action, config, args); + }else if(action_s == "Mobius"){ + GparityMobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, b, c, Params); + run(action, config, args); + } + }else{ + WilsonImplD::ImplParams Params = setupParams(); + readConfiguration(Umu, config, args.is_cps_cfg); //Read the gauge field + + if(action_s == "DWF"){ + DomainWallFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, Params); + run(action, config, args); + }else if(action_s == "Mobius"){ + MobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, b, c, Params); + run(action, config, args); + } + } + + Grid_finalize(); +} diff --git a/tests/lanczos/Test_wilson_lanczos.cc b/tests/lanczos/Test_wilson_lanczos.cc index af21fb1d..4814e8c6 100644 --- a/tests/lanczos/Test_wilson_lanczos.cc +++ b/tests/lanczos/Test_wilson_lanczos.cc @@ -32,8 +32,8 @@ using namespace std; using namespace Grid; ; -typedef WilsonFermionR FermionOp; -typedef typename WilsonFermionR::FermionField FermionField; +typedef WilsonFermionD FermionOp; +typedef typename WilsonFermionD::FermionField FermionField; RealD AllZero(RealD x) { return 0.; } diff --git a/tests/smearing/Test_WilsonFlow.cc b/tests/smearing/Test_WilsonFlow.cc index f339959a..e0726f87 100644 --- a/tests/smearing/Test_WilsonFlow.cc +++ b/tests/smearing/Test_WilsonFlow.cc @@ -96,13 +96,16 @@ int main(int argc, char **argv) { std::cout << GridLogMessage << "Initial plaquette: " << WilsonLoops::avgPlaquette(Umu) << std::endl; - WilsonFlow WF(WFPar.steps, WFPar.step_size, WFPar.meas_interval); + int t=WFPar.maxTau; + WilsonFlowAdaptive WF(WFPar.step_size, WFPar.maxTau, + 1.0e-4, + WFPar.meas_interval); - WF.smear_adaptive(Uflow, Umu, WFPar.maxTau); + WF.smear(Uflow, Umu); RealD WFlow_plaq = WilsonLoops::avgPlaquette(Uflow); RealD WFlow_TC = WilsonLoops::TopologicalCharge(Uflow); - RealD WFlow_T0 = WF.energyDensityPlaquette(Uflow); + RealD WFlow_T0 = WF.energyDensityPlaquette(t,Uflow); std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl; std::cout << GridLogMessage << "T0 "<< conf << " " << WFlow_T0 << std::endl; std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl; diff --git a/tests/smearing/Test_WilsonFlow_adaptive.cc b/tests/smearing/Test_WilsonFlow_adaptive.cc new file mode 100644 index 00000000..23123eb9 --- /dev/null +++ b/tests/smearing/Test_WilsonFlow_adaptive.cc @@ -0,0 +1,153 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/hmc/Test_WilsonFlow_adaptive.cc + +Copyright (C) 2017 + +Author: Christopher Kelly + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace Grid; + +//Linearly interpolate between two nearest times +RealD interpolate(const RealD t_int, const std::vector > &data){ + RealD tdiff1=1e32; int t1_idx=-1; + RealD tdiff2=1e32; int t2_idx=-1; + + for(int i=0;i seeds({1, 2, 3, 4, 5}); + GridSerialRNG sRNG; + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(seeds); + + LatticeGaugeField U(&Grid); + SU::HotConfiguration(pRNG, U); + + int Nstep = 300; + RealD epsilon = 0.01; + RealD maxTau = Nstep*epsilon; + RealD tolerance = 1e-4; + + for(int i=1;i> tolerance; + } + } + std::cout << "Adaptive smear tolerance " << tolerance << std::endl; + + //Setup iterative Wilson flow + WilsonFlow wflow(epsilon,Nstep); + wflow.resetActions(); + + std::vector > meas_orig; + + wflow.addMeasurement(1, [&wflow,&meas_orig](int step, RealD t, const LatticeGaugeField &U){ + std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl; + meas_orig.push_back( {t, wflow.energyDensityCloverleaf(t,U)} ); + }); + + //Setup adaptive Wilson flow + WilsonFlowAdaptive wflow_ad(epsilon,maxTau,tolerance); + wflow_ad.resetActions(); + + std::vector > meas_adaptive; + + wflow_ad.addMeasurement(1, [&wflow_ad,&meas_adaptive](int step, RealD t, const LatticeGaugeField &U){ + std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl; + meas_adaptive.push_back( {t, wflow_ad.energyDensityCloverleaf(t,U)} ); + }); + + //Run + LatticeGaugeFieldD Vtmp(U.Grid()); + wflow.smear(Vtmp, U); //basic smear + + Vtmp = Zero(); + wflow_ad.smear(Vtmp, U); + + //Output values for plotting + { + std::ofstream out("wflow_t2E_orig.dat"); + out.precision(16); + for(auto const &e: meas_orig){ + out << e.first << " " << e.second << std::endl; + } + } + { + std::ofstream out("wflow_t2E_adaptive.dat"); + out.precision(16); + for(auto const &e: meas_adaptive){ + out << e.first << " " << e.second << std::endl; + } + } + + //Compare at times available with adaptive smearing + for(int i=0;i MCR(1.0e-8,10000); - MdagMLinearOperator HermPosDefOp(Dcf); + MdagMLinearOperator HermPosDefOp(Dcf); MCR(HermPosDefOp,src,result); - HermitianLinearOperator HermIndefOp(Dcf); + HermitianLinearOperator HermIndefOp(Dcf); MCR(HermIndefOp,src,result); Grid_finalize(); diff --git a/tests/solver/Test_coarse_even_odd.cc b/tests/solver/Test_coarse_even_odd.cc index c7127121..9d2f8c22 100644 --- a/tests/solver/Test_coarse_even_odd.cc +++ b/tests/solver/Test_coarse_even_odd.cc @@ -108,8 +108,8 @@ int main(int argc, char** argv) { RealD mass = -0.30; RealD csw = 1.9192; - WilsonCloverFermionR Dwc(Umu, *Grid_f, *RBGrid_f, mass, csw, csw); - MdagMLinearOperator MdagMOp_Dwc(Dwc); + WilsonCloverFermionD Dwc(Umu, *Grid_f, *RBGrid_f, mass, csw, csw); + MdagMLinearOperator MdagMOp_Dwc(Dwc); ///////////////////////////////////////////////////////////////////////////// // Type definitions // diff --git a/tests/solver/Test_contfrac_cg.cc b/tests/solver/Test_contfrac_cg.cc index afabae4c..52599d07 100644 --- a/tests/solver/Test_contfrac_cg.cc +++ b/tests/solver/Test_contfrac_cg.cc @@ -102,21 +102,21 @@ int main (int argc, char ** argv) std::cout<(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + OverlapWilsonContFracTanhFermionD Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + TestCGinversions(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + OverlapWilsonContFracZolotarevFermionD Dcfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0); + TestCGinversions(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dpf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + OverlapWilsonPartialFractionTanhFermionD Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); + TestCGinversions(Dpf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); std::cout<(Dpfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + OverlapWilsonPartialFractionZolotarevFermionD Dpfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0); + TestCGinversions(Dpfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); Grid_finalize(); diff --git a/tests/solver/Test_dwf_cg_prec.cc b/tests/solver/Test_dwf_cg_prec.cc index debb736a..f4e346bf 100644 --- a/tests/solver/Test_dwf_cg_prec.cc +++ b/tests/solver/Test_dwf_cg_prec.cc @@ -79,7 +79,7 @@ int main(int argc, char** argv) { RealD mass = 0.01; RealD M5 = 1.8; - DomainWallFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5); + DomainWallFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5); LatticeFermion src_o(FrbGrid); LatticeFermion result_o(FrbGrid); @@ -88,7 +88,7 @@ int main(int argc, char** argv) { GridStopWatch CGTimer; - SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO(Ddwf); ConjugateGradient CG(1.0e-5, 10000, 0);// switch off the assert CGTimer.Start(); @@ -98,8 +98,5 @@ int main(int argc, char** argv) { std::cout << GridLogMessage << "Total CG time : " << CGTimer.Elapsed() << std::endl; - std::cout << GridLogMessage << "######## Dhop calls summary" << std::endl; - Ddwf.Report(); - Grid_finalize(); } diff --git a/tests/solver/Test_dwf_cg_schur.cc b/tests/solver/Test_dwf_cg_schur.cc index 6541e73d..bcc0cc40 100644 --- a/tests/solver/Test_dwf_cg_schur.cc +++ b/tests/solver/Test_dwf_cg_schur.cc @@ -70,7 +70,7 @@ int main (int argc, char ** argv) RealD mass=0.1; RealD M5=1.8; - DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); ConjugateGradient CG(1.0e-8,10000); SchurRedBlackDiagMooeeSolve SchurSolver(CG); diff --git a/tests/solver/Test_dwf_cg_unprec.cc b/tests/solver/Test_dwf_cg_unprec.cc index c867ccf3..58614c49 100644 --- a/tests/solver/Test_dwf_cg_unprec.cc +++ b/tests/solver/Test_dwf_cg_unprec.cc @@ -70,9 +70,9 @@ int main (int argc, char ** argv) RealD mass=0.1; RealD M5=1.8; - DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - MdagMLinearOperator HermOp(Ddwf); + MdagMLinearOperator HermOp(Ddwf); ConjugateGradient CG(1.0e-6,10000); CG(HermOp,src,result); diff --git a/tests/solver/Test_dwf_cr_unprec.cc b/tests/solver/Test_dwf_cr_unprec.cc index 8c8583ba..4d67231d 100644 --- a/tests/solver/Test_dwf_cr_unprec.cc +++ b/tests/solver/Test_dwf_cr_unprec.cc @@ -77,12 +77,12 @@ int main (int argc, char ** argv) RealD mass=0.5; RealD M5=1.8; - DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - MdagMLinearOperator HermOp(Ddwf); + MdagMLinearOperator HermOp(Ddwf); MCR(HermOp,src,result); - Gamma5R5HermitianLinearOperator g5HermOp(Ddwf); + Gamma5R5HermitianLinearOperator g5HermOp(Ddwf); MCR(g5HermOp,src,result); diff --git a/tests/solver/Test_dwf_fpgcr.cc b/tests/solver/Test_dwf_fpgcr.cc index 42cc8de1..3d779474 100644 --- a/tests/solver/Test_dwf_fpgcr.cc +++ b/tests/solver/Test_dwf_fpgcr.cc @@ -77,12 +77,12 @@ int main (int argc, char ** argv) RealD mass=0.5; RealD M5=1.8; - DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); std::cout< HermOp(Ddwf); + MdagMLinearOperator HermOp(Ddwf); TrivialPrecon simple; PrecGeneralisedConjugateResidual PGCR(1.0e-6,10000,HermOp,simple,4,160); @@ -92,7 +92,7 @@ int main (int argc, char ** argv) std::cout< g5HermOp(Ddwf); + Gamma5R5HermitianLinearOperator g5HermOp(Ddwf); PrecGeneralisedConjugateResidual PGCR5(1.0e-6,10000,g5HermOp,simple,4,160); result=Zero(); PGCR5(src,result); diff --git a/tests/solver/Test_dwf_hdcr.cc b/tests/solver/Test_dwf_hdcr.cc index ba77dffa..31b58284 100644 --- a/tests/solver/Test_dwf_hdcr.cc +++ b/tests/solver/Test_dwf_hdcr.cc @@ -254,7 +254,7 @@ int main (int argc, char ** argv) std::cout< Subspace; typedef CoarsenedMatrix CoarseOperator; @@ -263,7 +263,7 @@ int main (int argc, char ** argv) std::cout< HermDefOp(Ddwf); + MdagMLinearOperator HermDefOp(Ddwf); Subspace Aggregates(Coarse5d,FGrid,0); @@ -290,7 +290,7 @@ int main (int argc, char ** argv) typedef CoarsenedMatrix Level1Op; typedef CoarsenedMatrix,nbasisc> Level2Op; - Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); Level1Op LDOp(*Coarse5d,*Coarse5dRB,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates); @@ -351,16 +351,16 @@ int main (int argc, char ** argv) std::cout< , NormalEquations > TwoLevelMG; + typedef MultiGridPreconditioner , NormalEquations > TwoLevelMG; typedef MultiGridPreconditioner,nbasisc,Level1Op, DeflatedGuesser, NormalEquations > CoarseMG; - typedef MultiGridPreconditioner, LinearFunction > ThreeLevelMG; + typedef MultiGridPreconditioner, LinearFunction > ThreeLevelMG; // MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space ChebyshevSmoother CoarseSmoother(0.1,12.0,3,L1LinOp,LDOp); - ChebyshevSmoother FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); + ChebyshevSmoother FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); // MirsSmoother CoarseCGSmoother(0.1,0.1,4,L1LinOp,LDOp); - // MirsSmoother FineCGSmoother(0.0,0.01,8,HermIndefOp,Ddwf); + // MirsSmoother FineCGSmoother(0.0,0.01,8,HermIndefOp,Ddwf); CoarseMG Level2Precon (CoarseAggregates, L2Op, L1LinOp,LDOp, diff --git a/tests/solver/Test_dwf_hdcr_16_rb.cc b/tests/solver/Test_dwf_hdcr_16_rb.cc index 4682272d..ae8e7ae5 100644 --- a/tests/solver/Test_dwf_hdcr_16_rb.cc +++ b/tests/solver/Test_dwf_hdcr_16_rb.cc @@ -268,7 +268,7 @@ int main (int argc, char ** argv) std::cout< Subspace; typedef CoarsenedMatrix CoarseOperator; @@ -277,7 +277,7 @@ int main (int argc, char ** argv) std::cout< HermDefOp(Ddwf); + MdagMLinearOperator HermDefOp(Ddwf); Subspace Aggregates(Coarse5d,FGrid,0); @@ -311,7 +311,7 @@ int main (int argc, char ** argv) typedef CoarsenedMatrix Level1Op; typedef CoarsenedMatrix,nbasisc> Level2Op; - Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d); @@ -338,11 +338,11 @@ int main (int argc, char ** argv) std::cout< , SolverWrapper > TwoLevelMG; + typedef MultiGridPreconditioner , SolverWrapper > TwoLevelMG; typedef MultiGridPreconditioner,nbasisc,Level1Op, DeflatedGuesser, NormalEquations > CoarseMG; - typedef MultiGridPreconditioner, LinearFunction > ThreeLevelMG; + typedef MultiGridPreconditioner, LinearFunction > ThreeLevelMG; - ChebyshevSmoother FineSmoother(0.5,60.0,12,HermIndefOp,Ddwf); + ChebyshevSmoother FineSmoother(0.5,60.0,12,HermIndefOp,Ddwf); std::cout< HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO(Ddwf); // pCG(HermOpEO,src_o,result_o); std::cout< Subspace; typedef CoarsenedMatrix CoarseOperator; @@ -259,7 +259,7 @@ int main (int argc, char ** argv) std::cout< HermDefOp(Ddwf); + MdagMLinearOperator HermDefOp(Ddwf); Subspace Aggregates(Coarse5d,FGrid,0); @@ -292,7 +292,7 @@ int main (int argc, char ** argv) typedef CoarsenedMatrix Level1Op; typedef CoarsenedMatrix,nbasisc> Level2Op; - Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d); @@ -391,18 +391,18 @@ int main (int argc, char ** argv) std::cout< , NormalEquations > TwoLevelMG; - typedef MultiGridPreconditioner , NormalEquations > TwoLevelMG; + // typedef MultiGridPreconditioner , NormalEquations > TwoLevelMG; + typedef MultiGridPreconditioner , NormalEquations > TwoLevelMG; typedef MultiGridPreconditioner,nbasisc,Level1Op, DeflatedGuesser, NormalEquations > CoarseMG; - typedef MultiGridPreconditioner, LinearFunction > ThreeLevelMG; + typedef MultiGridPreconditioner, LinearFunction > ThreeLevelMG; - ChebyshevSmoother FineSmoother(0.25,60.0,12,HermIndefOp,Ddwf); + ChebyshevSmoother FineSmoother(0.25,60.0,12,HermIndefOp,Ddwf); /* // MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space ChebyshevSmoother CoarseSmoother(0.1,15.0,3,L1LinOp,LDOp); // MirsSmoother CoarseCGSmoother(0.1,0.1,4,L1LinOp,LDOp); - // MirsSmoother FineCGSmoother(0.0,0.01,8,HermIndefOp,Ddwf); + // MirsSmoother FineCGSmoother(0.0,0.01,8,HermIndefOp,Ddwf); CoarseMG Level2Precon (CoarseAggregates, L2Op, L1LinOp,LDOp, @@ -463,7 +463,7 @@ int main (int argc, char ** argv) LatticeFermion result_o(FrbGrid); pickCheckerboard(Odd,src_o,src); result_o=Zero(); - SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO(Ddwf); // pCG(HermOpEO,src_o,result_o); std::cout< Subspace; typedef CoarsenedMatrix CoarseOperator; @@ -297,7 +297,7 @@ int main (int argc, char ** argv) std::cout< HermDefOp(Ddwf); + MdagMLinearOperator HermDefOp(Ddwf); Subspace Aggregates(Coarse5d,FGrid,0); @@ -332,7 +332,7 @@ int main (int argc, char ** argv) std::cout< Level1Op; - Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); Level1Op LDOp(*Coarse5d,*Coarse5dRB,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates); @@ -375,21 +375,21 @@ int main (int argc, char ** argv) std::cout< , NormalEquations > TwoLevelMG; + typedef MultiGridPreconditioner , NormalEquations > TwoLevelMG; // MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space - // ChebyshevSmoother FineSmoother(0.5,60.0,14,HermIndefOp,Ddwf); // 72 iter 63s - // ChebyshevSmoother FineSmoother(0.1,60.0,20,HermIndefOp,Ddwf); // 66 iter 69s - // ChebyshevSmoother FineSmoother(0.5,60.0,20,HermIndefOp,Ddwf); // 63 iter 65 s - // ChebyshevSmoother FineSmoother(1.0,60.0,20,HermIndefOp,Ddwf); // 69, 70 - // ChebyshevSmoother FineSmoother(1.0,60.0,14,HermIndefOp,Ddwf); // 77 + // ChebyshevSmoother FineSmoother(0.5,60.0,14,HermIndefOp,Ddwf); // 72 iter 63s + // ChebyshevSmoother FineSmoother(0.1,60.0,20,HermIndefOp,Ddwf); // 66 iter 69s + // ChebyshevSmoother FineSmoother(0.5,60.0,20,HermIndefOp,Ddwf); // 63 iter 65 s + // ChebyshevSmoother FineSmoother(1.0,60.0,20,HermIndefOp,Ddwf); // 69, 70 + // ChebyshevSmoother FineSmoother(1.0,60.0,14,HermIndefOp,Ddwf); // 77 - // ChebyshevSmoother FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); // 23 iter 15.9s - // ChebyshevSmoother FineSmoother(0.5,60.0,14,HermIndefOp,Ddwf); // 20, 16.9s - ChebyshevSmoother FineSmoother(0.5,60.0,12,HermIndefOp,Ddwf); // 21, 15.6s + // ChebyshevSmoother FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); // 23 iter 15.9s + // ChebyshevSmoother FineSmoother(0.5,60.0,14,HermIndefOp,Ddwf); // 20, 16.9s + ChebyshevSmoother FineSmoother(0.5,60.0,12,HermIndefOp,Ddwf); // 21, 15.6s - // MirsSmoother FineCGSmoother(0.05,0.01,20,HermIndefOp,Ddwf); - // RedBlackSmoother FineRBSmoother(0.00,0.001,100,Ddwf); + // MirsSmoother FineCGSmoother(0.05,0.01,20,HermIndefOp,Ddwf); + // RedBlackSmoother FineRBSmoother(0.00,0.001,100,Ddwf); // Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space // ZeroGuesser CoarseZeroGuesser; @@ -416,7 +416,7 @@ int main (int argc, char ** argv) ConjugateGradient FineCG(1.0e-8,10000); - SchurDiagMooeeOperator FineDiagMooee(Ddwf); // M_ee - Meo Moo^-1 Moe + SchurDiagMooeeOperator FineDiagMooee(Ddwf); // M_ee - Meo Moo^-1 Moe LatticeFermion f_src_e(FrbGrid); f_src_e=1.0; LatticeFermion f_res_e(FrbGrid); f_res_e=Zero(); FineCG(FineDiagMooee,f_src_e,f_res_e); diff --git a/tests/solver/Test_dwf_hdcr_48_rb.cc b/tests/solver/Test_dwf_hdcr_48_rb.cc index 2b76681e..25ac1dac 100644 --- a/tests/solver/Test_dwf_hdcr_48_rb.cc +++ b/tests/solver/Test_dwf_hdcr_48_rb.cc @@ -264,7 +264,7 @@ int main (int argc, char ** argv) std::cout< Subspace; typedef CoarsenedMatrix CoarseOperator; @@ -273,7 +273,7 @@ int main (int argc, char ** argv) std::cout< HermDefOp(Ddwf); + MdagMLinearOperator HermDefOp(Ddwf); Subspace Aggregates(Coarse5d,FGrid,0); @@ -306,7 +306,7 @@ int main (int argc, char ** argv) typedef CoarsenedMatrix Level1Op; typedef CoarsenedMatrix,nbasisc> Level2Op; - Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d); @@ -332,9 +332,9 @@ int main (int argc, char ** argv) std::cout< , SolverWrapper > TwoLevelMG; + typedef MultiGridPreconditioner , SolverWrapper > TwoLevelMG; typedef MultiGridPreconditioner,nbasisc,Level1Op, DeflatedGuesser, NormalEquations > CoarseMG; - typedef MultiGridPreconditioner, LinearFunction > ThreeLevelMG; + typedef MultiGridPreconditioner, LinearFunction > ThreeLevelMG; std::cout< FineSmoother(los[l],60.0,ords[o],HermIndefOp,Ddwf); + ChebyshevSmoother FineSmoother(los[l],60.0,ords[o],HermIndefOp,Ddwf); ZeroGuesser CoarseZeroGuesser; ConjugateGradient CoarseCG(tols[t],10000); SchurRedBlackDiagMooeeSolve CoarseRBCG(CoarseCG); @@ -376,7 +376,7 @@ int main (int argc, char ** argv) LatticeFermion result_o(FrbGrid); pickCheckerboard(Odd,src_o,src); result_o=Zero(); - SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO(Ddwf); pCG(HermOpEO,src_o,result_o); std::cout< Subspace; typedef CoarsenedMatrix CoarseOperator; @@ -257,7 +257,7 @@ int main (int argc, char ** argv) std::cout< HermDefOp(Ddwf); + MdagMLinearOperator HermDefOp(Ddwf); Subspace Aggregates(Coarse5d,FGrid,0); @@ -290,7 +290,7 @@ int main (int argc, char ** argv) typedef CoarsenedMatrix Level1Op; typedef CoarsenedMatrix,nbasisc> Level2Op; - Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d); @@ -386,18 +386,18 @@ int main (int argc, char ** argv) std::cout< , NormalEquations > TwoLevelMG; - typedef MultiGridPreconditioner , NormalEquations > TwoLevelMG; + // typedef MultiGridPreconditioner , NormalEquations > TwoLevelMG; + typedef MultiGridPreconditioner , NormalEquations > TwoLevelMG; typedef MultiGridPreconditioner,nbasisc,Level1Op, DeflatedGuesser, NormalEquations > CoarseMG; - typedef MultiGridPreconditioner, LinearFunction > ThreeLevelMG; + typedef MultiGridPreconditioner, LinearFunction > ThreeLevelMG; - ChebyshevSmoother FineSmoother(0.25,60.0,12,HermIndefOp,Ddwf); + ChebyshevSmoother FineSmoother(0.25,60.0,12,HermIndefOp,Ddwf); /* // MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space ChebyshevSmoother CoarseSmoother(0.1,15.0,3,L1LinOp,LDOp); // MirsSmoother CoarseCGSmoother(0.1,0.1,4,L1LinOp,LDOp); - // MirsSmoother FineCGSmoother(0.0,0.01,8,HermIndefOp,Ddwf); + // MirsSmoother FineCGSmoother(0.0,0.01,8,HermIndefOp,Ddwf); CoarseMG Level2Precon (CoarseAggregates, L2Op, L1LinOp,LDOp, @@ -458,7 +458,7 @@ int main (int argc, char ** argv) LatticeFermion result_o(FrbGrid); pickCheckerboard(Odd,src_o,src); result_o=Zero(); - SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO(Ddwf); pCG(HermOpEO,src_o,result_o); std::cout< + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +//using namespace std; +using namespace Grid; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=12; + + std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + GridCartesian * UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f); + GridCartesian * FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f); + GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermionD src(FGrid); random(RNG5,src); + LatticeFermionD result(FGrid); result=Zero(); + LatticeGaugeFieldD Umu(UGrid); + LatticeGaugeFieldF Umu_f(UGrid_f); + + SU::HotConfiguration(RNG4,Umu); + + precisionChange(Umu_f,Umu); + + RealD mass=0.1; + RealD M5=1.8; + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionF Ddwf_f(Umu_f,*FGrid_f,*FrbGrid_f,*UGrid_f,*UrbGrid_f,mass,M5); + + LatticeFermionD src_o(FrbGrid); + LatticeFermionD result_o(FrbGrid); + LatticeFermionD result_o_2(FrbGrid); + pickCheckerboard(Odd,src_o,src); + result_o.Checkerboard() = Odd; + result_o = Zero(); + result_o_2.Checkerboard() = Odd; + result_o_2 = Zero(); + + SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO_f(Ddwf_f); + + std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl; + MixedPrecisionConjugateGradient mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); + double t1,t2,flops; + double MdagMsiteflops = 1452; // Mobius (real coeffs) + // CG overhead: 8 inner product, 4+8 axpy_norm, 4+4 linear comb (2 of) + double CGsiteflops = (8+4+8+4+4)*Nc*Ns ; + std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops< CG(1.0e-8,10000); + result_o_2 = Zero(); + t1=usecond(); + CG(HermOpEO,src_o,result_o_2); + t2=usecond(); + iters = CG.IterationsToComplete; + flops = MdagMsiteflops*4*FrbGrid->gSites()*iters; + flops+= CGsiteflops*FrbGrid->gSites()*iters; + + std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.< HermOp(Ddwf); - MdagMLinearOperator HermOpCk(Dchk); + MdagMLinearOperator HermOp(Ddwf); + MdagMLinearOperator HermOpCk(Dchk); ConjugateGradient CG((1.0e-5/(me+1)),10000); s_res = Zero(); CG(HermOp,s_src,s_res); diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index d0a32460..1a679f45 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -34,9 +34,9 @@ using namespace Grid; int main (int argc, char ** argv) { - typedef typename DomainWallFermionR::FermionField FermionField; - typedef typename DomainWallFermionR::ComplexField ComplexField; - typename DomainWallFermionR::ImplParams params; + typedef typename DomainWallFermionD::FermionField FermionField; + typedef typename DomainWallFermionD::ComplexField ComplexField; + typename DomainWallFermionD::ImplParams params; double stp=1.0e-5; const int Ls=4; @@ -189,15 +189,15 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << " Building the solvers"< HermOp(Ddwf); - MdagMLinearOperator HermOpCk(Dchk); + MdagMLinearOperator HermOp(Ddwf); + MdagMLinearOperator HermOpCk(Dchk); ConjugateGradient CG((stp),10000); s_res = Zero(); CG(HermOp,s_src,s_res); diff --git a/tests/solver/Test_dwf_mrhs_cg_mpieo.cc b/tests/solver/Test_dwf_mrhs_cg_mpieo.cc index 1906619f..0dcab577 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpieo.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpieo.cc @@ -34,9 +34,9 @@ using namespace Grid; int main (int argc, char ** argv) { - typedef typename DomainWallFermionR::FermionField FermionField; - typedef typename DomainWallFermionR::ComplexField ComplexField; - typename DomainWallFermionR::ImplParams params; + typedef typename DomainWallFermionD::FermionField FermionField; + typedef typename DomainWallFermionD::ComplexField ComplexField; + typename DomainWallFermionD::ImplParams params; const int Ls=4; @@ -124,15 +124,15 @@ int main (int argc, char ** argv) /////////////////////////////////////////////////////////////// RealD mass=0.01; RealD M5=1.8; - DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); - DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); + DomainWallFermionD Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); + DomainWallFermionD Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); std::cout << GridLogMessage << "****************************************************************** "< HermOp(Ddwf); - MdagMLinearOperator HermOpCk(Dchk); + MdagMLinearOperator HermOp(Ddwf); + MdagMLinearOperator HermOpCk(Dchk); ConjugateGradient CG((1.0e-8/(me+1)),10000); s_res = Zero(); CG(HermOp,s_src,s_res); diff --git a/tests/solver/Test_dwf_multigrid.cc b/tests/solver/Test_dwf_multigrid.cc index e670b358..1cd83375 100644 --- a/tests/solver/Test_dwf_multigrid.cc +++ b/tests/solver/Test_dwf_multigrid.cc @@ -397,8 +397,8 @@ int main (int argc, char ** argv) std::cout< Subspace; typedef CoarsenedMatrix CoarseOperator; @@ -407,7 +407,7 @@ int main (int argc, char ** argv) std::cout< HermDefOp(Ddwf); + MdagMLinearOperator HermDefOp(Ddwf); Subspace Aggregates(Coarse5d,FGrid,0); @@ -435,8 +435,8 @@ int main (int argc, char ** argv) typedef CoarsenedMatrix Level1Op; typedef CoarsenedMatrix,nbasisc> Level2Op; - Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); - Gamma5R5HermitianLinearOperator HermIndefOpPV(Dpv); + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOpPV(Dpv); std::cout< FineCG(tol,MaxIt); // GeneralisedMinimalResidual FineGMRES(tol,MaxIt,20); - MdagMLinearOperator FineMdagM(Ddwf); // M^\dag M - PVdagMLinearOperator FinePVdagM(Ddwf,Dpv);// M_{pv}^\dag M - SchurDiagMooeeOperator FineDiagMooee(Ddwf); // M_ee - Meo Moo^-1 Moe - SchurDiagOneOperator FineDiagOne(Ddwf); // 1 - M_ee^{-1} Meo Moo^{-1} Moe e + MdagMLinearOperator FineMdagM(Ddwf); // M^\dag M + PVdagMLinearOperator FinePVdagM(Ddwf,Dpv);// M_{pv}^\dag M + SchurDiagMooeeOperator FineDiagMooee(Ddwf); // M_ee - Meo Moo^-1 Moe + SchurDiagOneOperator FineDiagOne(Ddwf); // 1 - M_ee^{-1} Meo Moo^{-1} Moe e MdagMLinearOperator CoarseMdagM(LDOp); PVdagMLinearOperator CoarsePVdagM(LDOp,LDOpPV); @@ -552,7 +552,7 @@ int main (int argc, char ** argv) std::cout< CoarseMgridCG(0.001,1000); - ChebyshevSmoother FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); + ChebyshevSmoother FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); typedef HDCRPreconditioner > TwoLevelHDCR; TwoLevelHDCR TwoLevelPrecon(Aggregates, diff --git a/tests/solver/Test_dwf_multishift_mixedprec.cc b/tests/solver/Test_dwf_multishift_mixedprec.cc new file mode 100644 index 00000000..bdede459 --- /dev/null +++ b/tests/solver/Test_dwf_multishift_mixedprec.cc @@ -0,0 +1,184 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_multishift_mixedprec.cc + + Copyright (C) 2015 + +Author: Christopher Kelly + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace Grid; + +template +void run_test(int argc, char ** argv, const typename SpeciesD::ImplParams ¶ms){ + const int Ls = 16; + GridCartesian* UGrid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian* UrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_d); + GridCartesian* FGrid_d = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid_d); + GridRedBlackCartesian* FrbGrid_d = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid_d); + + GridCartesian* UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian* UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f); + GridCartesian* FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid_f); + GridRedBlackCartesian* FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid_f); + + typedef typename SpeciesD::FermionField FermionFieldD; + typedef typename SpeciesF::FermionField FermionFieldF; + + std::vector seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid_d); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid_d); + RNG4.SeedFixedIntegers(seeds4); + + FermionFieldD src_d(FGrid_d); + random(RNG5, src_d); + + LatticeGaugeFieldD Umu_d(UGrid_d); + + //CPS-created G-parity ensembles have a factor of 2 error in the plaquette that causes the read to fail unless we workaround it + bool gparity_plaquette_fix = false; + for(int i=1;i(Umu_d, metadata, file); + + if(gparity_plaquette_fix){ + metadata.plaquette *= 2.; //correct header value + + //Get the true plaquette + FieldMetaData tmp; + GaugeStatisticsType gs; gs(Umu_d, tmp); + + std::cout << "After correction: plaqs " << tmp.plaquette << " " << metadata.plaquette << std::endl; + assert(fabs(tmp.plaquette -metadata.plaquette ) < 1.0e-5 ); + } + + cfg_loaded=true; + break; + } + } + + if(!cfg_loaded) + SU::HotConfiguration(RNG4, Umu_d); + + LatticeGaugeFieldF Umu_f(UGrid_f); + precisionChange(Umu_f, Umu_d); + + std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; + + RealD mass = 0.01; + RealD M5 = 1.8; + SpeciesD Ddwf_d(Umu_d, *FGrid_d, *FrbGrid_d, *UGrid_d, *UrbGrid_d, mass, M5, params); + SpeciesF Ddwf_f(Umu_f, *FGrid_f, *FrbGrid_f, *UGrid_f, *UrbGrid_f, mass, M5, params); + + FermionFieldD src_o_d(FrbGrid_d); + pickCheckerboard(Odd, src_o_d, src_d); + + SchurDiagMooeeOperator HermOpEO_d(Ddwf_d); + SchurDiagMooeeOperator HermOpEO_f(Ddwf_f); + + AlgRemez remez(1e-4, 64, 50); + int order = 15; + remez.generateApprox(order, 1, 2); //sqrt + + MultiShiftFunction shifts(remez, 1e-10, false); + + int relup_freq = 50; + double t1=usecond(); + ConjugateGradientMultiShiftMixedPrec mcg(10000, shifts, FrbGrid_f, HermOpEO_f, relup_freq); + + std::vector results_o_d(order, FrbGrid_d); + mcg(HermOpEO_d, src_o_d, results_o_d); + double t2=usecond(); + + //Crosscheck double and mixed prec results + ConjugateGradientMultiShift dmcg(10000, shifts); + std::vector results_o_d_2(order, FrbGrid_d); + dmcg(HermOpEO_d, src_o_d, results_o_d_2); + double t3=usecond(); + + std::cout << GridLogMessage << "Comparison of mixed prec results to double prec results |mixed - double|^2 :" << std::endl; + FermionFieldD tmp(FrbGrid_d); + for(int i=0;i= 0 && gpdir <= 2); //spatial! + gparity = true; + } + } + if(gparity){ + std::cout << "Running test with G-parity BCs in " << gpdir << " direction" << std::endl; + GparityWilsonImplParams params; + params.twists[gpdir] = 1; + + std::vector conj_dirs(Nd,0); + conj_dirs[gpdir] = 1; + ConjugateGimplD::setDirections(conj_dirs); + + run_test(argc,argv,params); + }else{ + std::cout << "Running test with periodic BCs" << std::endl; + WilsonImplParams params; + run_test(argc,argv,params); + } + + Grid_finalize(); +} diff --git a/tests/solver/Test_dwf_qmr_unprec.cc b/tests/solver/Test_dwf_qmr_unprec.cc index 370e7409..eeb20c95 100644 --- a/tests/solver/Test_dwf_qmr_unprec.cc +++ b/tests/solver/Test_dwf_qmr_unprec.cc @@ -66,17 +66,17 @@ int main (int argc, char ** argv) RealD mass=0.0; RealD M5=-1.8; - DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - Gamma5R5HermitianLinearOperator g5HermOp(Ddwf); + Gamma5R5HermitianLinearOperator g5HermOp(Ddwf); QMR(g5HermOp,src,result); GMR(g5HermOp,src,result); - NonHermitianLinearOperator NonHermOp(Ddwf); + NonHermitianLinearOperator NonHermOp(Ddwf); QMR(NonHermOp,src,result); GMR(NonHermOp,src,result); - MdagMLinearOperator HermOp(Ddwf); + MdagMLinearOperator HermOp(Ddwf); ConjugateGradient CG(1.0e-8,10000); CG(HermOp,src,result); diff --git a/tests/solver/Test_dwf_relupcg_prec.cc b/tests/solver/Test_dwf_relupcg_prec.cc new file mode 100644 index 00000000..1d8c022a --- /dev/null +++ b/tests/solver/Test_dwf_relupcg_prec.cc @@ -0,0 +1,143 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/solver/Test_dwf_relupcg_prec.cc + + Copyright (C) 2015 + +Author: Christopher Kelly +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + double relup_delta = 0.2; + for(int i=1;i> relup_delta; + std::cout << GridLogMessage << "Set reliable update Delta to " << relup_delta << std::endl; + } + } + + const int Ls=12; + + { + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + GridCartesian * UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f); + GridCartesian * FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f); + GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermionD src(FGrid); random(RNG5,src); + LatticeFermionD result(FGrid); result=Zero(); + LatticeGaugeFieldD Umu(UGrid); + LatticeGaugeFieldF Umu_f(UGrid_f); + + SU::HotConfiguration(RNG4,Umu); + + precisionChange(Umu_f,Umu); + + RealD mass=0.1; + RealD M5=1.8; + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionF Ddwf_f(Umu_f,*FGrid_f,*FrbGrid_f,*UGrid_f,*UrbGrid_f,mass,M5); + + LatticeFermionD src_o(FrbGrid); + LatticeFermionD result_o(FrbGrid); + LatticeFermionD result_o_2(FrbGrid); + pickCheckerboard(Odd,src_o,src); + result_o.Checkerboard() = Odd; + result_o = Zero(); + result_o_2.Checkerboard() = Odd; + result_o_2 = Zero(); + + SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO_f(Ddwf_f); + + std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl; + ConjugateGradientReliableUpdate mCG(1e-8, 10000, relup_delta, FrbGrid_f, HermOpEO_f, HermOpEO); + double t1,t2,flops; + double MdagMsiteflops = 1452; // Mobius (real coeffs) + // CG overhead: 8 inner product, 4+8 axpy_norm, 4+4 linear comb (2 of) + double CGsiteflops = (8+4+8+4+4)*Nc*Ns ; + std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops< CG(1.0e-8,10000); + for(int i=0;i<1;i++){ + result_o_2 = Zero(); + t1=usecond(); + CG(HermOpEO,src_o,result_o_2); + t2=usecond(); + iters = CG.IterationsToComplete; + flops = MdagMsiteflops*4*FrbGrid->gSites()*iters; + flops+= CGsiteflops*FrbGrid->gSites()*iters; + + std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.< +Author: Peter Boyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +using namespace std; +using namespace Grid; + ; + +int main (int argc, char** argv) +{ + Grid_init(&argc, &argv); + + Coordinate latt_size = GridDefaultLatt(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); + + const int Ls = 8; + + GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + // Want a different conf at every run + // First create an instance of an engine. + std::random_device rnd_device; + // Specify the engine and distribution. + std::mt19937 mersenne_engine(rnd_device()); + std::uniform_int_distribution dist(1, 100); + + auto gen = std::bind(dist, mersenne_engine); + std::vector seeds4(4); + generate(begin(seeds4), end(seeds4), gen); + + //std::vector seeds4({1,2,3,5}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + LatticeFermion phi (FGrid); gaussian(RNG5, phi); + LatticeFermion Mphi (FGrid); + LatticeFermion MphiPrime (FGrid); + + LatticeGaugeField U(UGrid); + SU::HotConfiguration(RNG4,U); + + //////////////////////////////////// + // Unmodified matrix element + //////////////////////////////////// + RealD b = 2.5; + RealD c = 1.5; + RealD mf = 0.01; + RealD mb = 1.0; + RealD M5 = 1.8; + MobiusEOFAFermionD Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5, b, c); + MobiusEOFAFermionD Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, b, c); + OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-10, 12); + ConjugateGradient CG(1.0e-10, 5000); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, CG, CG, CG, CG, Params, false); + + GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds4); + + + //Random field + LatticeFermion eta(FGrid); + gaussian(RNG5,eta); + + //Check left inverse + LatticeFermion Meta(FGrid); + Meofa.Meofa(U, eta, Meta); + + LatticeFermion MinvMeta(FGrid); + Meofa.MeofaInv(U, Meta, MinvMeta); + + LatticeFermion diff = MinvMeta - eta; + + std::cout << GridLogMessage << "eta: " << norm2(eta) << " M*eta: " << norm2(Meta) << " M^{-1}*M*eta: " << norm2(MinvMeta) << " M^{-1}*M*eta - eta: " << norm2(diff) << " (expect 0)" << std::endl; + assert(norm2(diff) < 1e-8); + + //Check right inverse + LatticeFermion MinvEta(FGrid); + Meofa.MeofaInv(U, eta, MinvEta); + + LatticeFermion MMinvEta(FGrid); + Meofa.Meofa(U, MinvEta, MMinvEta); + + diff = MMinvEta - eta; + + std::cout << GridLogMessage << "eta: " << norm2(eta) << " M^{-1}*eta: " << norm2(MinvEta) << " M*M^{-1}*eta: " << norm2(MMinvEta) << " M*M^{-1}*eta - eta: " << norm2(diff) << " (expect 0)" << std::endl; + assert(norm2(diff) < 1e-8); + + std::cout << GridLogMessage << "Done" << std::endl; + Grid_finalize(); +} diff --git a/tests/solver/Test_hw_multigrid.cc b/tests/solver/Test_hw_multigrid.cc index 66c88883..fd30bca7 100644 --- a/tests/solver/Test_hw_multigrid.cc +++ b/tests/solver/Test_hw_multigrid.cc @@ -292,9 +292,9 @@ int main (int argc, char ** argv) std::cout< Subspace; typedef CoarsenedMatrix CoarseOperator; @@ -304,7 +304,7 @@ int main (int argc, char ** argv) std::cout< SubspaceOp(Dw); + MdagMLinearOperator SubspaceOp(Dw); Subspace Aggregates4D(Coarse4d,UGrid,0); Subspace Aggregates5D(Coarse5d,FGrid,0); @@ -335,7 +335,7 @@ int main (int argc, char ** argv) std::cout< Level1Op; - NonHermitianLinearOperator LinOpDwf(Ddwf); + NonHermitianLinearOperator LinOpDwf(Ddwf); Level1Op LDOp (*Coarse5d,*Coarse5dRB,0); diff --git a/tests/solver/Test_hw_multigrid_mixed_48.cc b/tests/solver/Test_hw_multigrid_mixed_48.cc index 0e8d6a17..3a31ddbe 100644 --- a/tests/solver/Test_hw_multigrid_mixed_48.cc +++ b/tests/solver/Test_hw_multigrid_mixed_48.cc @@ -395,7 +395,7 @@ public: Geometry geom; GridBase *Coarse5D; GridBase *Coarse4D; - CartesianStencil Stencil; + CartesianStencil Stencil; CoarsenedMatrix &Dw; GridBase * Grid(void) { return Coarse5D; }; // this is all the linalg routines need to know @@ -409,7 +409,7 @@ public: Coarse5D(&CoarseGrid5), Dw(_Dw), geom(CoarseGrid5._ndimension), - Stencil( &CoarseGrid5,geom.npoint,Even,geom.directions,geom.displacements,0) + Stencil( &CoarseGrid5,geom.npoint,Even,geom.directions,geom.displacements,DefaultImplParams()) { }; @@ -981,9 +981,9 @@ int main (int argc, char ** argv) RealD mass=0.00078; - WilsonFermionR Dw(Umu,*UGrid,*UrbGrid,-M5); - DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - DomainWallFermionR Dpv (Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5); + WilsonFermionD Dw(Umu,*UGrid,*UrbGrid,-M5); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionD Dpv (Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5); typedef Aggregation Subspace; typedef CoarsenedMatrix CoarseOperator; @@ -994,21 +994,21 @@ int main (int argc, char ** argv) std::cout< MdagM_Dw(Dw_null); + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.75); // 600 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.80); // 800 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.82); // 1023 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.85); // 1428 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.87); // 1900 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.90); // 3900 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.92); // 6200 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.94); // 8882 iters + WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.95); // 9170 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.96); // 8882 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.97); // 8406 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.99); // 6900 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-1.01); // 6397 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-1.00); // 5900 iters + MdagMLinearOperator MdagM_Dw(Dw_null); std::cout< Level1Op4; typedef CoarseCayleyFermion Level1Op5; Level1Op4 c_Dw (*Coarse4d,0); - NonHermitianLinearOperator LinOpDw(Dw); + NonHermitianLinearOperator LinOpDw(Dw); c_Dw.CoarsenOperator(UGrid,LinOpDw,Aggregates4D); // contains the M5 from Dw(-M5) // c_Dw.Test(Aggregates4D,UGrid,LinOpDw); @@ -1127,8 +1127,8 @@ int main (int argc, char ** argv) ConjugateGradient CoarseCG(tol,MaxIt); ConjugateGradient FineCG(tol,MaxIt); - NonHermitianLinearOperator FineM(Ddwf); - MdagMLinearOperator FineMdagM(Ddwf); // M^\dag M + NonHermitianLinearOperator FineM(Ddwf); + MdagMLinearOperator FineMdagM(Ddwf); // M^\dag M NonHermitianLinearOperator CoarseM(c_Dwf); MdagMLinearOperator CoarseMdagM(c_Dwf); @@ -1233,39 +1233,39 @@ typedef HDCRPreconditioner,nbasisc,NormalEquations // Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space - // ChebyshevSmoother FineSmoother1(0.5,60.0,14,FineM,Ddwf); // 26 iter, 39s - // ChebyshevSmoother FineSmoother2(0.5,60.0,14,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,14,FineM,Ddwf); // 26 iter, 39s + // ChebyshevSmoother FineSmoother2(0.5,60.0,14,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 25 iter, 38s - // ChebyshevSmoother FineSmoother2(0.5,60.0,16,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 25 iter, 38s + // ChebyshevSmoother FineSmoother2(0.5,60.0,16,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 23 iter, 39s - // ChebyshevSmoother FineSmoother2(0.5,60.0,20,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 23 iter, 39s + // ChebyshevSmoother FineSmoother2(0.5,60.0,20,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.5,60.0,10,FineM,Ddwf);24 iter, 44s - // ChebyshevSmoother FineSmoother2(0.5,60.0,24,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,10,FineM,Ddwf);24 iter, 44s + // ChebyshevSmoother FineSmoother2(0.5,60.0,24,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // odd convergence tail at 10^-9 ish - // ChebyshevSmoother FineSmoother2(0.1,60.0,24,FineM,Ddwf); // 33 iter, waas O(10-9 by 26) + // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // odd convergence tail at 10^-9 ish + // ChebyshevSmoother FineSmoother2(0.1,60.0,24,FineM,Ddwf); // 33 iter, waas O(10-9 by 26) - // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 25 iter, 39s - // ChebyshevSmoother FineSmoother2(0.5,60.0,18,FineM,Ddwf); // + // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 25 iter, 39s + // ChebyshevSmoother FineSmoother2(0.5,60.0,18,FineM,Ddwf); // - ChebyshevSmoother FineSmoother1(0.5,60.0,16,FineM,Ddwf); - ChebyshevSmoother FineSmoother2(0.5,60.0,16,FineM,Ddwf); // + ChebyshevSmoother FineSmoother1(0.5,60.0,16,FineM,Ddwf); + ChebyshevSmoother FineSmoother2(0.5,60.0,16,FineM,Ddwf); // - // ChebyshevSmoother FineSmoother1(0.5,60.0,11,FineM,Ddwf); // 33 iter, 49s - // ChebyshevSmoother FineSmoother2(0.5,60.0,11,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 26 iter, 37s - // ChebyshevSmoother FineSmoother2(0.5,60.0,12,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.4,60.0,12,FineM,Ddwf); // iter 26 no change in final residual - // ChebyshevSmoother FineSmoother2(0.4,60.0,12,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.3,60.0,12,FineM,Ddwf); // 27 iter 39s. - // ChebyshevSmoother FineSmoother2(0.3,60.0,12,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.3,60.0,13,FineM,Ddwf); // 26 iter, but slower - // ChebyshevSmoother FineSmoother2(0.3,60.0,13,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(1.0,60.0,12,FineM,Ddwf); // 34 iter, slower - // ChebyshevSmoother FineSmoother2(1.0,60.0,12,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,11,FineM,Ddwf); // 33 iter, 49s + // ChebyshevSmoother FineSmoother2(0.5,60.0,11,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 26 iter, 37s + // ChebyshevSmoother FineSmoother2(0.5,60.0,12,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.4,60.0,12,FineM,Ddwf); // iter 26 no change in final residual + // ChebyshevSmoother FineSmoother2(0.4,60.0,12,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.3,60.0,12,FineM,Ddwf); // 27 iter 39s. + // ChebyshevSmoother FineSmoother2(0.3,60.0,12,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.3,60.0,13,FineM,Ddwf); // 26 iter, but slower + // ChebyshevSmoother FineSmoother2(0.3,60.0,13,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(1.0,60.0,12,FineM,Ddwf); // 34 iter, slower + // ChebyshevSmoother FineSmoother2(1.0,60.0,12,FineM,Ddwf); ThreeLevelMG ThreeLevelPrecon(Aggregates4D, FineM, diff --git a/tests/solver/Test_hw_multigrid_mixed_48_rb.cc b/tests/solver/Test_hw_multigrid_mixed_48_rb.cc index e7ceb022..0f18893e 100644 --- a/tests/solver/Test_hw_multigrid_mixed_48_rb.cc +++ b/tests/solver/Test_hw_multigrid_mixed_48_rb.cc @@ -395,7 +395,7 @@ public: Geometry geom; GridBase *Coarse5D; GridBase *Coarse4D; - CartesianStencil Stencil; + CartesianStencil Stencil; CoarsenedMatrix &Dw; GridBase * Grid(void) { return Coarse5D; }; // this is all the linalg routines need to know @@ -409,7 +409,7 @@ public: Coarse5D(&CoarseGrid5), Dw(_Dw), geom(CoarseGrid5._ndimension), - Stencil( &CoarseGrid5,geom.npoint,Even,geom.directions,geom.displacements,0) + Stencil( &CoarseGrid5,geom.npoint,Even,geom.directions,geom.displacements,DefaultImplParams()) { }; @@ -1005,9 +1005,9 @@ int main (int argc, char ** argv) RealD mass=0.00078; - WilsonFermionR Dw(Umu,*UGrid,*UrbGrid,-M5); - DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - DomainWallFermionR Dpv (Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5); + WilsonFermionD Dw(Umu,*UGrid,*UrbGrid,-M5); + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionD Dpv (Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5); typedef Aggregation Subspace; typedef CoarsenedMatrix CoarseOperator; @@ -1018,21 +1018,21 @@ int main (int argc, char ** argv) std::cout< MdagM_Dw(Dw_null); + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.75); // 600 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.80); // 800 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.82); // 1023 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.85); // 1428 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.87); // 1900 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.90); // 3900 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.92); // 6200 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.94); // 8882 iters + WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.95); // 9170 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.96); // 8882 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.97); // 8406 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-0.99); // 6900 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-1.01); // 6397 iters + // WilsonFermionD Dw_null(Umu,*UGrid,*UrbGrid,-1.00); // 5900 iters + MdagMLinearOperator MdagM_Dw(Dw_null); std::cout< Level1Op4; typedef CoarseCayleyFermion Level1Op5; Level1Op4 c_Dw (*Coarse4d,0); - NonHermitianLinearOperator LinOpDw(Dw); + NonHermitianLinearOperator LinOpDw(Dw); c_Dw.CoarsenOperator(UGrid,LinOpDw,Aggregates4D); // contains the M5 from Dw(-M5) // c_Dw.Test(Aggregates4D,UGrid,LinOpDw); @@ -1148,8 +1148,8 @@ int main (int argc, char ** argv) ConjugateGradient CoarseCG(tol,MaxIt); ConjugateGradient FineCG(tol,MaxIt); - NonHermitianLinearOperator FineM(Ddwf); - MdagMLinearOperator FineMdagM(Ddwf); // M^\dag M + NonHermitianLinearOperator FineM(Ddwf); + MdagMLinearOperator FineMdagM(Ddwf); // M^\dag M NonHermitianLinearOperator CoarseM(c_Dwf); MdagMLinearOperator CoarseMdagM(c_Dwf); @@ -1272,38 +1272,38 @@ typedef HDCRPreconditioner,nbasisc,LinearFunction< // Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space - // ChebyshevSmoother FineSmoother1(0.5,60.0,14,FineM,Ddwf); // 26 iter, 39s - // ChebyshevSmoother FineSmoother2(0.5,60.0,14,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,14,FineM,Ddwf); // 26 iter, 39s + // ChebyshevSmoother FineSmoother2(0.5,60.0,14,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 25 iter, 38s - // ChebyshevSmoother FineSmoother2(0.5,60.0,16,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 25 iter, 38s + // ChebyshevSmoother FineSmoother2(0.5,60.0,16,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 23 iter, 39s - // ChebyshevSmoother FineSmoother2(0.5,60.0,20,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 23 iter, 39s + // ChebyshevSmoother FineSmoother2(0.5,60.0,20,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.5,60.0,10,FineM,Ddwf);24 iter, 44s - // ChebyshevSmoother FineSmoother2(0.5,60.0,24,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,10,FineM,Ddwf);24 iter, 44s + // ChebyshevSmoother FineSmoother2(0.5,60.0,24,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // odd convergence tail at 10^-9 ish - // ChebyshevSmoother FineSmoother2(0.1,60.0,24,FineM,Ddwf); // 33 iter, waas O(10-9 by 26) + // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // odd convergence tail at 10^-9 ish + // ChebyshevSmoother FineSmoother2(0.1,60.0,24,FineM,Ddwf); // 33 iter, waas O(10-9 by 26) - // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 25 iter, 39s - // ChebyshevSmoother FineSmoother2(0.5,60.0,18,FineM,Ddwf); // + // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 25 iter, 39s + // ChebyshevSmoother FineSmoother2(0.5,60.0,18,FineM,Ddwf); // - ChebyshevSmoother FineSmoother(f_lo,f_hi,f_ord,FineM,Ddwf); + ChebyshevSmoother FineSmoother(f_lo,f_hi,f_ord,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.5,60.0,11,FineM,Ddwf); // 33 iter, 49s - // ChebyshevSmoother FineSmoother2(0.5,60.0,11,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 26 iter, 37s - // ChebyshevSmoother FineSmoother2(0.5,60.0,12,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.4,60.0,12,FineM,Ddwf); // iter 26 no change in final residual - // ChebyshevSmoother FineSmoother2(0.4,60.0,12,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.3,60.0,12,FineM,Ddwf); // 27 iter 39s. - // ChebyshevSmoother FineSmoother2(0.3,60.0,12,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(0.3,60.0,13,FineM,Ddwf); // 26 iter, but slower - // ChebyshevSmoother FineSmoother2(0.3,60.0,13,FineM,Ddwf); - // ChebyshevSmoother FineSmoother1(1.0,60.0,12,FineM,Ddwf); // 34 iter, slower - // ChebyshevSmoother FineSmoother2(1.0,60.0,12,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,11,FineM,Ddwf); // 33 iter, 49s + // ChebyshevSmoother FineSmoother2(0.5,60.0,11,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.5,60.0,12,FineM,Ddwf); // 26 iter, 37s + // ChebyshevSmoother FineSmoother2(0.5,60.0,12,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.4,60.0,12,FineM,Ddwf); // iter 26 no change in final residual + // ChebyshevSmoother FineSmoother2(0.4,60.0,12,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.3,60.0,12,FineM,Ddwf); // 27 iter 39s. + // ChebyshevSmoother FineSmoother2(0.3,60.0,12,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(0.3,60.0,13,FineM,Ddwf); // 26 iter, but slower + // ChebyshevSmoother FineSmoother2(0.3,60.0,13,FineM,Ddwf); + // ChebyshevSmoother FineSmoother1(1.0,60.0,12,FineM,Ddwf); // 34 iter, slower + // ChebyshevSmoother FineSmoother2(1.0,60.0,12,FineM,Ddwf); ThreeLevelMG ThreeLevelPrecon(Aggregates4D, FineM, diff --git a/tests/solver/Test_mobius_bcg.cc b/tests/solver/Test_mobius_bcg.cc index 8092d61c..a54b4a05 100644 --- a/tests/solver/Test_mobius_bcg.cc +++ b/tests/solver/Test_mobius_bcg.cc @@ -33,9 +33,9 @@ using namespace Grid; int main (int argc, char ** argv) { - typedef typename MobiusFermionR::FermionField FermionField; - typedef typename MobiusFermionR::ComplexField ComplexField; - typename MobiusFermionR::ImplParams params; + typedef typename MobiusFermionD::FermionField FermionField; + typedef typename MobiusFermionD::ComplexField ComplexField; + typename MobiusFermionD::ImplParams params; const int Ls=12; @@ -158,15 +158,15 @@ int main (int argc, char ** argv) RealD mobius_factor=32./12.; RealD mobius_b=0.5*(mobius_factor+1.); RealD mobius_c=0.5*(mobius_factor-1.); - MobiusFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5,mobius_b,mobius_c,params); - MobiusFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5,mobius_b,mobius_c,params); + MobiusFermionD Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5,mobius_b,mobius_c,params); + MobiusFermionD Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5,mobius_b,mobius_c,params); std::cout << GridLogMessage << "****************************************************************** "< HermOp(Ddwf); - MdagMLinearOperator HermOpCk(Dchk); + MdagMLinearOperator HermOp(Ddwf); + MdagMLinearOperator HermOpCk(Dchk); ConjugateGradient CG((stp),100000); s_res = Zero(); diff --git a/tests/solver/Test_mobius_bcg_nosplit.cc b/tests/solver/Test_mobius_bcg_nosplit.cc index de02b1e3..f33a40ea 100644 --- a/tests/solver/Test_mobius_bcg_nosplit.cc +++ b/tests/solver/Test_mobius_bcg_nosplit.cc @@ -35,9 +35,9 @@ using namespace Grid; int main (int argc, char ** argv) { - typedef typename DomainWallFermionR::FermionField FermionField; - typedef typename DomainWallFermionR::ComplexField ComplexField; - typename DomainWallFermionR::ImplParams params; + typedef typename DomainWallFermionD::FermionField FermionField; + typedef typename DomainWallFermionD::ComplexField ComplexField; + typename DomainWallFermionD::ImplParams params; const int Ls=16; @@ -106,13 +106,13 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << " Building the solvers"< HermOp(Ddwf); + MdagMLinearOperator HermOp(Ddwf); ConjugateGradient CG((stp),100000); for(int rhs=0;rhs<1;rhs++){ diff --git a/tests/solver/Test_mobius_bcg_phys_nosplit.cc b/tests/solver/Test_mobius_bcg_phys_nosplit.cc index 2fe573ce..76a6f7e1 100644 --- a/tests/solver/Test_mobius_bcg_phys_nosplit.cc +++ b/tests/solver/Test_mobius_bcg_phys_nosplit.cc @@ -35,9 +35,9 @@ using namespace Grid; int main (int argc, char ** argv) { - typedef typename DomainWallFermionR::FermionField FermionField; - typedef typename DomainWallFermionR::ComplexField ComplexField; - typename DomainWallFermionR::ImplParams params; + typedef typename DomainWallFermionD::FermionField FermionField; + typedef typename DomainWallFermionD::ComplexField ComplexField; + typename DomainWallFermionD::ImplParams params; const int Ls=16; @@ -107,7 +107,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << " Building the solvers"< HermOp(Ddwf); + MdagMLinearOperator HermOp(Ddwf); ConjugateGradient CG((stp),100000); for(int rhs=0;rhs<1;rhs++){ diff --git a/tests/solver/Test_mobius_bcg_prec_nosplit.cc b/tests/solver/Test_mobius_bcg_prec_nosplit.cc index 3ac0d42b..f793893c 100644 --- a/tests/solver/Test_mobius_bcg_prec_nosplit.cc +++ b/tests/solver/Test_mobius_bcg_prec_nosplit.cc @@ -35,9 +35,9 @@ using namespace Grid; int main (int argc, char ** argv) { - typedef typename DomainWallFermionR::FermionField FermionField; - typedef typename DomainWallFermionR::ComplexField ComplexField; - typename DomainWallFermionR::ImplParams params; + typedef typename DomainWallFermionD::FermionField FermionField; + typedef typename DomainWallFermionD::ComplexField ComplexField; + typename DomainWallFermionD::ImplParams params; const int Ls=16; @@ -106,13 +106,13 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << " Building the solvers"< HermOp(Ddwf); + MdagMLinearOperator HermOp(Ddwf); ConjugateGradient CG((stp),100000); for(int rhs=0;rhs<1;rhs++){ diff --git a/tests/solver/Test_split_grid.cc b/tests/solver/Test_split_grid.cc index 85626c8e..39441c82 100644 --- a/tests/solver/Test_split_grid.cc +++ b/tests/solver/Test_split_grid.cc @@ -34,9 +34,9 @@ using namespace Grid; int main (int argc, char ** argv) { - typedef typename DomainWallFermionR::FermionField FermionField; - typedef typename DomainWallFermionR::ComplexField ComplexField; - typename DomainWallFermionR::ImplParams params; + typedef typename DomainWallFermionD::FermionField FermionField; + typedef typename DomainWallFermionD::ComplexField ComplexField; + typename DomainWallFermionD::ImplParams params; const int Ls=4; @@ -117,15 +117,15 @@ int main (int argc, char ** argv) /////////////////////////////////////////////////////////////// RealD mass=0.01; RealD M5=1.8; - DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); - DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); + DomainWallFermionD Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); + DomainWallFermionD Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); std::cout << GridLogMessage << "****************************************************************** "< HermOp(Ddwf); - MdagMLinearOperator HermOpCk(Dchk); + MdagMLinearOperator HermOp(Ddwf); + MdagMLinearOperator HermOpCk(Dchk); ConjugateGradient CG((1.0e-8/(me+1)),10000); s_res = Zero(); CG(HermOp,s_src,s_res); diff --git a/tests/solver/Test_staggered_block_cg_prec.cc b/tests/solver/Test_staggered_block_cg_prec.cc index c5306e85..4cb7801e 100644 --- a/tests/solver/Test_staggered_block_cg_prec.cc +++ b/tests/solver/Test_staggered_block_cg_prec.cc @@ -46,9 +46,9 @@ struct scal { int main (int argc, char ** argv) { - typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField; - typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField; - typename ImprovedStaggeredFermion5DR::ImplParams params; + typedef typename ImprovedStaggeredFermion5DD::FermionField FermionField; + typedef typename ImprovedStaggeredFermion5DD::ComplexField ComplexField; + typename ImprovedStaggeredFermion5DD::ImplParams params; const int Ls=8; @@ -98,8 +98,8 @@ int main (int argc, char ** argv) RealD c1=9.0/8.0; RealD c2=-1.0/24.0; RealD u0=1.0; - ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0); - SchurStaggeredOperator HermOp(Ds); + ImprovedStaggeredFermion5DD Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0); + SchurStaggeredOperator HermOp(Ds); ConjugateGradient CG(1.0e-8,10000); int blockDim = 0; @@ -111,8 +111,8 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "****************************************************************** "< HermOp4d(Ds4d); + ImprovedStaggeredFermionD Ds4d(Umu,Umu,*UGrid,*UrbGrid,mass,c1,c2,u0); + SchurStaggeredOperator HermOp4d(Ds4d); FermionField src4d(UGrid); random(pRNG,src4d); FermionField src4d_o(UrbGrid); pickCheckerboard(Odd,src4d_o,src4d); FermionField result4d_o(UrbGrid); @@ -135,7 +135,6 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "************************************************************************ "< using namespace std; using namespace Grid; - ; - -template -struct scal { - d internal; -}; Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, @@ -46,9 +40,9 @@ struct scal { int main (int argc, char ** argv) { - typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField; - typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField; - typename ImprovedStaggeredFermion5DR::ImplParams params; + typedef typename ImprovedStaggeredFermion5DD::FermionField FermionField; + typedef typename ImprovedStaggeredFermion5DD::ComplexField ComplexField; + typename ImprovedStaggeredFermion5DD::ImplParams params; const int Ls=8; @@ -83,8 +77,8 @@ int main (int argc, char ** argv) volume=volume*latt_size[mu]; } - ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0); - MdagMLinearOperator HermOp(Ds); + ImprovedStaggeredFermion5DD Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0); + MdagMLinearOperator HermOp(Ds); ConjugateGradient CG(1.0e-8,10000); int blockDim = 0; @@ -95,8 +89,8 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "****************************************************************** "< HermOp4d(Ds4d); + ImprovedStaggeredFermionD Ds4d(Umu,Umu,*UGrid,*UrbGrid,mass,c1,c2,u0); + MdagMLinearOperator HermOp4d(Ds4d); FermionField src4d(UGrid); random(pRNG,src4d); FermionField result4d(UGrid); result4d=Zero(); @@ -120,7 +114,6 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "************************************************************************ "< HermOp(Ds); + MdagMLinearOperator HermOp(Ds); CommunicationAvoidingGeneralisedMinimalResidual CAGMRES(1.0e-8, 10000, 25); CAGMRES(HermOp,src,result); diff --git a/tests/solver/Test_staggered_cg_prec.cc b/tests/solver/Test_staggered_cg_prec.cc index 854ef632..bc80da09 100644 --- a/tests/solver/Test_staggered_cg_prec.cc +++ b/tests/solver/Test_staggered_cg_prec.cc @@ -47,8 +47,8 @@ struct scal { int main (int argc, char ** argv) { - typedef typename ImprovedStaggeredFermionR::FermionField FermionField; - typename ImprovedStaggeredFermionR::ImplParams params; + typedef typename ImprovedStaggeredFermionD::FermionField FermionField; + typename ImprovedStaggeredFermionD::ImplParams params; Grid_init(&argc,&argv); @@ -74,14 +74,14 @@ int main (int argc, char ** argv) RealD c1=9.0/8.0; RealD c2=-1.0/24.0; RealD u0=1.0; - ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); + ImprovedStaggeredFermionD Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); FermionField res_o(&RBGrid); FermionField src_o(&RBGrid); pickCheckerboard(Odd,src_o,src); res_o=Zero(); - SchurStaggeredOperator HermOpEO(Ds); + SchurStaggeredOperator HermOpEO(Ds); ConjugateGradient CG(1.0e-8,10000); double t1=usecond(); CG(HermOpEO,src_o,res_o); diff --git a/tests/solver/Test_staggered_cg_schur.cc b/tests/solver/Test_staggered_cg_schur.cc index d8e5bdd4..5d7d073e 100644 --- a/tests/solver/Test_staggered_cg_schur.cc +++ b/tests/solver/Test_staggered_cg_schur.cc @@ -45,8 +45,8 @@ struct scal { int main (int argc, char ** argv) { - typedef typename ImprovedStaggeredFermionR::FermionField FermionField; - typename ImprovedStaggeredFermionR::ImplParams params; + typedef typename ImprovedStaggeredFermionD::FermionField FermionField; + typename ImprovedStaggeredFermionD::ImplParams params; Grid_init(&argc,&argv); Coordinate latt_size = GridDefaultLatt(); @@ -68,7 +68,7 @@ int main (int argc, char ** argv) RealD c1=9.0/8.0; RealD c2=-1.0/24.0; RealD u0=1.0; - ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); + ImprovedStaggeredFermionD Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); ConjugateGradient CG(1.0e-8,10000); SchurRedBlackStaggeredSolve SchurSolver(CG); diff --git a/tests/solver/Test_staggered_cg_unprec.cc b/tests/solver/Test_staggered_cg_unprec.cc index e023b910..466f1d04 100644 --- a/tests/solver/Test_staggered_cg_unprec.cc +++ b/tests/solver/Test_staggered_cg_unprec.cc @@ -47,9 +47,9 @@ struct scal { int main (int argc, char ** argv) { - typedef typename ImprovedStaggeredFermionR::FermionField FermionField; - typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField; - typename ImprovedStaggeredFermionR::ImplParams params; + typedef typename ImprovedStaggeredFermionD::FermionField FermionField; + typedef typename ImprovedStaggeredFermionD::ComplexField ComplexField; + typename ImprovedStaggeredFermionD::ImplParams params; Grid_init(&argc,&argv); @@ -76,9 +76,9 @@ int main (int argc, char ** argv) RealD c1=9.0/8.0; RealD c2=-1.0/24.0; RealD u0=1.0; - ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); + ImprovedStaggeredFermionD Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); - MdagMLinearOperator HermOp(Ds); + MdagMLinearOperator HermOp(Ds); ConjugateGradient CG(1.0e-6,10000); CG(HermOp,src,result); diff --git a/tests/solver/Test_staggered_fcagmres_prec.cc b/tests/solver/Test_staggered_fcagmres_prec.cc index 692d688e..7587748e 100644 --- a/tests/solver/Test_staggered_fcagmres_prec.cc +++ b/tests/solver/Test_staggered_fcagmres_prec.cc @@ -33,9 +33,9 @@ using namespace Grid; int main (int argc, char ** argv) { - typedef typename ImprovedStaggeredFermionR::FermionField FermionField; - typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField; - typename ImprovedStaggeredFermionR::ImplParams params; + typedef typename ImprovedStaggeredFermionD::FermionField FermionField; + typedef typename ImprovedStaggeredFermionD::ComplexField ComplexField; + typename ImprovedStaggeredFermionD::ImplParams params; Grid_init(&argc,&argv); @@ -62,9 +62,9 @@ int main (int argc, char ** argv) RealD c1=9.0/8.0; RealD c2=-1.0/24.0; RealD u0=1.0; - ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); + ImprovedStaggeredFermionD Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); - MdagMLinearOperator HermOp(Ds); + MdagMLinearOperator HermOp(Ds); TrivialPrecon simple; diff --git a/tests/solver/Test_staggered_fgmres_prec.cc b/tests/solver/Test_staggered_fgmres_prec.cc index fe6da67c..a3c65057 100644 --- a/tests/solver/Test_staggered_fgmres_prec.cc +++ b/tests/solver/Test_staggered_fgmres_prec.cc @@ -33,9 +33,9 @@ using namespace Grid; int main (int argc, char ** argv) { - typedef typename ImprovedStaggeredFermionR::FermionField FermionField; - typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField; - typename ImprovedStaggeredFermionR::ImplParams params; + typedef typename ImprovedStaggeredFermionD::FermionField FermionField; + typedef typename ImprovedStaggeredFermionD::ComplexField ComplexField; + typename ImprovedStaggeredFermionD::ImplParams params; Grid_init(&argc,&argv); @@ -62,9 +62,9 @@ int main (int argc, char ** argv) RealD c1=9.0/8.0; RealD c2=-1.0/24.0; RealD u0=1.0; - ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); + ImprovedStaggeredFermionD Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); - MdagMLinearOperator HermOp(Ds); + MdagMLinearOperator HermOp(Ds); TrivialPrecon simple; diff --git a/tests/solver/Test_staggered_gmres_unprec.cc b/tests/solver/Test_staggered_gmres_unprec.cc index ec9d4608..abfeab75 100644 --- a/tests/solver/Test_staggered_gmres_unprec.cc +++ b/tests/solver/Test_staggered_gmres_unprec.cc @@ -33,9 +33,9 @@ using namespace Grid; int main (int argc, char ** argv) { - typedef typename ImprovedStaggeredFermionR::FermionField FermionField; - typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField; - typename ImprovedStaggeredFermionR::ImplParams params; + typedef typename ImprovedStaggeredFermionD::FermionField FermionField; + typedef typename ImprovedStaggeredFermionD::ComplexField ComplexField; + typename ImprovedStaggeredFermionD::ImplParams params; Grid_init(&argc,&argv); @@ -62,9 +62,9 @@ int main (int argc, char ** argv) RealD c1=9.0/8.0; RealD c2=-1.0/24.0; RealD u0=1.0; - ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); + ImprovedStaggeredFermionD Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); - MdagMLinearOperator HermOp(Ds); + MdagMLinearOperator HermOp(Ds); GeneralisedMinimalResidual GMRES(1.0e-8, 10000, 25); GMRES(HermOp,src,result); diff --git a/tests/solver/Test_staggered_mr_unprec.cc b/tests/solver/Test_staggered_mr_unprec.cc index ddbb8de3..1cdd60f9 100644 --- a/tests/solver/Test_staggered_mr_unprec.cc +++ b/tests/solver/Test_staggered_mr_unprec.cc @@ -33,9 +33,9 @@ using namespace Grid; int main (int argc, char ** argv) { - typedef typename ImprovedStaggeredFermionR::FermionField FermionField; - typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField; - typename ImprovedStaggeredFermionR::ImplParams params; + typedef typename ImprovedStaggeredFermionD::FermionField FermionField; + typedef typename ImprovedStaggeredFermionD::ComplexField ComplexField; + typename ImprovedStaggeredFermionD::ImplParams params; Grid_init(&argc,&argv); @@ -62,9 +62,9 @@ int main (int argc, char ** argv) RealD c1=9.0/8.0; RealD c2=-1.0/24.0; RealD u0=1.0; - ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); + ImprovedStaggeredFermionD Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); - MdagMLinearOperator HermOp(Ds); + MdagMLinearOperator HermOp(Ds); MinimalResidual MR(1.0e-8,10000,0.8); MR(HermOp,src,result); diff --git a/tests/solver/Test_staggered_multishift.cc b/tests/solver/Test_staggered_multishift.cc index 856f0b87..9f6b37d6 100644 --- a/tests/solver/Test_staggered_multishift.cc +++ b/tests/solver/Test_staggered_multishift.cc @@ -46,8 +46,8 @@ struct scal { int main (int argc, char ** argv) { - typedef typename ImprovedStaggeredFermionR::FermionField FermionField; - typename ImprovedStaggeredFermionR::ImplParams params; + typedef typename ImprovedStaggeredFermionD::FermionField FermionField; + typename ImprovedStaggeredFermionD::ImplParams params; Grid_init(&argc,&argv); @@ -90,8 +90,8 @@ int main (int argc, char ** argv) RealD c2=-1.0/24.0; RealD u0=1.0; - ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); - SchurStaggeredOperator HermOpEO(Ds); + ImprovedStaggeredFermionD Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0); + SchurStaggeredOperator HermOpEO(Ds); FermionField src(&Grid); random(pRNG,src); FermionField src_o(&RBGrid); diff --git a/tests/solver/Test_wilson_cagmres_unprec.cc b/tests/solver/Test_wilson_cagmres_unprec.cc index 226d0719..80381a27 100644 --- a/tests/solver/Test_wilson_cagmres_unprec.cc +++ b/tests/solver/Test_wilson_cagmres_unprec.cc @@ -55,9 +55,9 @@ int main (int argc, char ** argv) } RealD mass=0.5; - WilsonFermionR Dw(Umu,Grid,RBGrid,mass); + WilsonFermionD Dw(Umu,Grid,RBGrid,mass); - MdagMLinearOperator HermOp(Dw); + MdagMLinearOperator HermOp(Dw); CommunicationAvoidingGeneralisedMinimalResidual CAGMRES(1.0e-8, 10000, 25); CAGMRES(HermOp,src,result); diff --git a/tests/solver/Test_wilson_cg_prec.cc b/tests/solver/Test_wilson_cg_prec.cc index a28e014e..cb480e8c 100644 --- a/tests/solver/Test_wilson_cg_prec.cc +++ b/tests/solver/Test_wilson_cg_prec.cc @@ -69,7 +69,7 @@ int main (int argc, char ** argv) } RealD mass=0.5; - WilsonFermionR Dw(Umu,Grid,RBGrid,mass); + WilsonFermionD Dw(Umu,Grid,RBGrid,mass); // HermitianOperator HermOp(Dw); // ConjugateGradient CG(1.0e-8,10000); @@ -80,7 +80,7 @@ int main (int argc, char ** argv) pickCheckerboard(Odd,src_o,src); result_o=Zero(); - SchurDiagMooeeOperator HermOpEO(Dw); + SchurDiagMooeeOperator HermOpEO(Dw); ConjugateGradient CG(1.0e-8,10000); CG(HermOpEO,src_o,result_o); diff --git a/tests/solver/Test_wilson_cg_schur.cc b/tests/solver/Test_wilson_cg_schur.cc index 97482131..601eb6b2 100644 --- a/tests/solver/Test_wilson_cg_schur.cc +++ b/tests/solver/Test_wilson_cg_schur.cc @@ -64,7 +64,7 @@ int main (int argc, char ** argv) LatticeFermion resid(&Grid); RealD mass=0.5; - WilsonFermionR Dw(Umu,Grid,RBGrid,mass); + WilsonFermionD Dw(Umu,Grid,RBGrid,mass); ConjugateGradient CG(1.0e-8,10000); SchurRedBlackDiagMooeeSolve SchurSolver(CG); diff --git a/tests/solver/Test_wilson_cg_unprec.cc b/tests/solver/Test_wilson_cg_unprec.cc index 07f6ba7b..f1ecebd3 100644 --- a/tests/solver/Test_wilson_cg_unprec.cc +++ b/tests/solver/Test_wilson_cg_unprec.cc @@ -68,9 +68,9 @@ int main (int argc, char ** argv) } RealD mass=0.5; - WilsonFermionR Dw(Umu,Grid,RBGrid,mass); + WilsonFermionD Dw(Umu,Grid,RBGrid,mass); - MdagMLinearOperator HermOp(Dw); + MdagMLinearOperator HermOp(Dw); ConjugateGradient CG(1.0e-8,10000); CG(HermOp,src,result); diff --git a/tests/solver/Test_wilson_cr_unprec.cc b/tests/solver/Test_wilson_cr_unprec.cc index 67510a23..a8b49afd 100644 --- a/tests/solver/Test_wilson_cr_unprec.cc +++ b/tests/solver/Test_wilson_cr_unprec.cc @@ -70,9 +70,9 @@ int main (int argc, char ** argv) } RealD mass=0.5; - WilsonFermionR Dw(Umu,Grid,RBGrid,mass); + WilsonFermionD Dw(Umu,Grid,RBGrid,mass); - MdagMLinearOperator HermOp(Dw); + MdagMLinearOperator HermOp(Dw); ConjugateResidual MCR(1.0e-8,10000); diff --git a/tests/solver/Test_wilson_fcagmres_prec.cc b/tests/solver/Test_wilson_fcagmres_prec.cc index d2a1acf4..66f9f518 100644 --- a/tests/solver/Test_wilson_fcagmres_prec.cc +++ b/tests/solver/Test_wilson_fcagmres_prec.cc @@ -55,9 +55,9 @@ int main (int argc, char ** argv) } RealD mass=0.5; - WilsonFermionR Dw(Umu,Grid,RBGrid,mass); + WilsonFermionD Dw(Umu,Grid,RBGrid,mass); - MdagMLinearOperator HermOp(Dw); + MdagMLinearOperator HermOp(Dw); TrivialPrecon simple; diff --git a/tests/solver/Test_wilson_fgmres_prec.cc b/tests/solver/Test_wilson_fgmres_prec.cc index 02d8f9f2..61368636 100644 --- a/tests/solver/Test_wilson_fgmres_prec.cc +++ b/tests/solver/Test_wilson_fgmres_prec.cc @@ -55,9 +55,9 @@ int main (int argc, char ** argv) } RealD mass=0.5; - WilsonFermionR Dw(Umu,Grid,RBGrid,mass); + WilsonFermionD Dw(Umu,Grid,RBGrid,mass); - MdagMLinearOperator HermOp(Dw); + MdagMLinearOperator HermOp(Dw); TrivialPrecon simple; diff --git a/tests/solver/Test_wilson_gmres_unprec.cc b/tests/solver/Test_wilson_gmres_unprec.cc index e52c047f..5f2728ce 100644 --- a/tests/solver/Test_wilson_gmres_unprec.cc +++ b/tests/solver/Test_wilson_gmres_unprec.cc @@ -55,9 +55,9 @@ int main (int argc, char ** argv) } RealD mass=0.5; - WilsonFermionR Dw(Umu,Grid,RBGrid,mass); + WilsonFermionD Dw(Umu,Grid,RBGrid,mass); - MdagMLinearOperator HermOp(Dw); + MdagMLinearOperator HermOp(Dw); GeneralisedMinimalResidual GMRES(1.0e-8, 10000, 25); GMRES(HermOp,src,result); diff --git a/tests/solver/Test_wilson_mg.cc b/tests/solver/Test_wilson_mg.cc index 99a16e19..875bf32a 100644 --- a/tests/solver/Test_wilson_mg.cc +++ b/tests/solver/Test_wilson_mg.cc @@ -77,16 +77,16 @@ int main(int argc, char **argv) { // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used const int nbasis = 40; - WilsonFermionR Dw(Umu, *FGrid, *FrbGrid, mass); + WilsonFermionD Dw(Umu, *FGrid, *FrbGrid, mass); - MdagMLinearOperator MdagMOpDw(Dw); + MdagMLinearOperator MdagMOpDw(Dw); std::cout << GridLogMessage << "**************************************************" << std::endl; std::cout << GridLogMessage << "Testing Multigrid for Wilson" << std::endl; std::cout << GridLogMessage << "**************************************************" << std::endl; TrivialPrecon TrivialPrecon; - auto MGPreconDw = createMGInstance(mgParams, levelInfo, Dw, Dw); + auto MGPreconDw = createMGInstance(mgParams, levelInfo, Dw, Dw); MGPreconDw->setup(); diff --git a/tests/solver/Test_wilson_mr_unprec.cc b/tests/solver/Test_wilson_mr_unprec.cc index fef83794..c71392e4 100644 --- a/tests/solver/Test_wilson_mr_unprec.cc +++ b/tests/solver/Test_wilson_mr_unprec.cc @@ -55,9 +55,9 @@ int main (int argc, char ** argv) } RealD mass=0.5; - WilsonFermionR Dw(Umu,Grid,RBGrid,mass); + WilsonFermionD Dw(Umu,Grid,RBGrid,mass); - MdagMLinearOperator HermOp(Dw); + MdagMLinearOperator HermOp(Dw); MinimalResidual MR(1.0e-8,10000,0.8); MR(HermOp,src,result); diff --git a/tests/solver/Test_wilson_qmr_unprec.cc b/tests/solver/Test_wilson_qmr_unprec.cc index c0b42a28..0cd132e4 100644 --- a/tests/solver/Test_wilson_qmr_unprec.cc +++ b/tests/solver/Test_wilson_qmr_unprec.cc @@ -56,9 +56,9 @@ int main (int argc, char ** argv) QuasiMinimalResidual QMR(1.0e-8,10000); RealD mass=0.0; - WilsonFermionR Dw(Umu,*Grid,*rbGrid,mass); + WilsonFermionD Dw(Umu,*Grid,*rbGrid,mass); - NonHermitianLinearOperator NonHermOp(Dw); + NonHermitianLinearOperator NonHermOp(Dw); QMR(NonHermOp,src,result); Grid_finalize(); diff --git a/tests/solver/Test_wilsonclover_bicgstab_prec.cc b/tests/solver/Test_wilsonclover_bicgstab_prec.cc index b382b1bb..d265e687 100644 --- a/tests/solver/Test_wilsonclover_bicgstab_prec.cc +++ b/tests/solver/Test_wilsonclover_bicgstab_prec.cc @@ -70,14 +70,14 @@ int main (int argc, char ** argv) RealD mass = -0.1; RealD csw_r = 1.0; RealD csw_t = 1.0; - WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + WilsonCloverFermionD Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); LatticeFermion src_o(&RBGrid); LatticeFermion result_o(&RBGrid); pickCheckerboard(Odd, src_o, src); result_o = Zero(); - NonHermitianSchurDiagMooeeOperator HermOp(Dw); + NonHermitianSchurDiagMooeeOperator HermOp(Dw); BiCGSTAB CG(1.0e-8,10000); CG(HermOp, src_o, result_o); diff --git a/tests/solver/Test_wilsonclover_bicgstab_schur.cc b/tests/solver/Test_wilsonclover_bicgstab_schur.cc index f09d7cd1..38bfdb72 100644 --- a/tests/solver/Test_wilsonclover_bicgstab_schur.cc +++ b/tests/solver/Test_wilsonclover_bicgstab_schur.cc @@ -70,7 +70,7 @@ int main (int argc, char ** argv) RealD mass = -0.1; RealD csw_r = 1.0; RealD csw_t = 1.0; - WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + WilsonCloverFermionD Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); BiCGSTAB CG(1.0e-8,10000); NonHermitianSchurRedBlackDiagMooeeSolve SchurSolver(CG); diff --git a/tests/solver/Test_wilsonclover_bicgstab_unprec.cc b/tests/solver/Test_wilsonclover_bicgstab_unprec.cc index f546a744..48f194b0 100644 --- a/tests/solver/Test_wilsonclover_bicgstab_unprec.cc +++ b/tests/solver/Test_wilsonclover_bicgstab_unprec.cc @@ -70,9 +70,9 @@ int main (int argc, char ** argv) RealD mass = -0.1; RealD csw_r = 1.0; RealD csw_t = 1.0; - WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + WilsonCloverFermionD Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); - NonHermitianLinearOperator HermOp(Dw); + NonHermitianLinearOperator HermOp(Dw); BiCGSTAB CG(1.0e-8,10000); CG(HermOp,src,result); diff --git a/tests/solver/Test_wilsonclover_cagmres_unprec.cc b/tests/solver/Test_wilsonclover_cagmres_unprec.cc index a8818168..8b264139 100644 --- a/tests/solver/Test_wilsonclover_cagmres_unprec.cc +++ b/tests/solver/Test_wilsonclover_cagmres_unprec.cc @@ -44,8 +44,8 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - typedef typename WilsonCloverFermionR::FermionField FermionField; - typename WilsonCloverFermionR::ImplParams params; + typedef typename WilsonCloverFermionD::FermionField FermionField; + typename WilsonCloverFermionD::ImplParams params; WilsonAnisotropyCoefficients anis; FermionField src(&Grid); random(pRNG,src); @@ -61,9 +61,9 @@ int main (int argc, char ** argv) RealD mass = 0.5; RealD csw_r = 1.0; RealD csw_t = 1.0; - WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params); + WilsonCloverFermionD Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params); - MdagMLinearOperator HermOp(Dwc); + MdagMLinearOperator HermOp(Dwc); CommunicationAvoidingGeneralisedMinimalResidual CAGMRES(1.0e-8, 10000, 25); CAGMRES(HermOp,src,result); diff --git a/tests/solver/Test_wilsonclover_cg_prec.cc b/tests/solver/Test_wilsonclover_cg_prec.cc index abf64a1f..a0b3a712 100644 --- a/tests/solver/Test_wilsonclover_cg_prec.cc +++ b/tests/solver/Test_wilsonclover_cg_prec.cc @@ -72,10 +72,10 @@ int main (int argc, char ** argv) RealD csw_r = 1.0; RealD csw_t = 1.0; RealD cF = 1.0; - WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); - CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); - WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); - CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + WilsonCloverFermionD Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonCloverFermionD Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + WilsonExpCloverFermionD Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonExpCloverFermionD Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); // HermitianOperator HermOp(Dw); @@ -89,22 +89,22 @@ int main (int argc, char ** argv) ConjugateGradient CG(1.0e-8,10000); std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; - SchurDiagMooeeOperator HermOpEO(Dw); + SchurDiagMooeeOperator HermOpEO(Dw); result_o=Zero(); CG(HermOpEO,src_o,result_o); std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; - SchurDiagMooeeOperator HermOpEO_compact(Dw_compact); + SchurDiagMooeeOperator HermOpEO_compact(Dw_compact); result_o=Zero(); CG(HermOpEO_compact,src_o,result_o); std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; - SchurDiagMooeeOperator HermOpEO_exp(Dwe); + SchurDiagMooeeOperator HermOpEO_exp(Dwe); result_o=Zero(); CG(HermOpEO_exp,src_o,result_o); std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; - SchurDiagMooeeOperator HermOpEO_exp_compact(Dwe_compact); + SchurDiagMooeeOperator HermOpEO_exp_compact(Dwe_compact); result_o=Zero(); CG(HermOpEO_exp_compact,src_o,result_o); diff --git a/tests/solver/Test_wilsonclover_cg_schur.cc b/tests/solver/Test_wilsonclover_cg_schur.cc index 50d06af7..50a1c4a6 100644 --- a/tests/solver/Test_wilsonclover_cg_schur.cc +++ b/tests/solver/Test_wilsonclover_cg_schur.cc @@ -72,22 +72,22 @@ int main (int argc, char ** argv) RealD cF = 1.0; std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; - WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + WilsonCloverFermionD Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); result=Zero(); SchurSolver(Dw,src,result); std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; - CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + CompactWilsonCloverFermionD Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); result=Zero(); SchurSolver(Dw_compact,src,result); std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; - WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + WilsonExpCloverFermionD Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); result=Zero(); SchurSolver(Dwe,src,result); std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; - CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + CompactWilsonExpCloverFermionD Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); result=Zero(); SchurSolver(Dwe_compact,src,result); diff --git a/tests/solver/Test_wilsonclover_cg_unprec.cc b/tests/solver/Test_wilsonclover_cg_unprec.cc index 2a859f11..25cf07ee 100644 --- a/tests/solver/Test_wilsonclover_cg_unprec.cc +++ b/tests/solver/Test_wilsonclover_cg_unprec.cc @@ -71,31 +71,31 @@ int main (int argc, char ** argv) RealD csw_r = 1.0; RealD csw_t = 1.0; RealD cF = 1.0; - WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); - CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); - WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); - CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + WilsonCloverFermionD Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonCloverFermionD Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + WilsonExpCloverFermionD Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonExpCloverFermionD Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); ConjugateGradient CG(1.0e-8,10000); std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; - MdagMLinearOperator HermOp(Dw); + MdagMLinearOperator HermOp(Dw); result=Zero(); CG(HermOp,src,result); std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; - MdagMLinearOperator HermOp_compact(Dw_compact); + MdagMLinearOperator HermOp_compact(Dw_compact); result=Zero(); CG(HermOp_compact,src,result); std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; - MdagMLinearOperator HermOp_exp(Dwe); + MdagMLinearOperator HermOp_exp(Dwe); result=Zero(); CG(HermOp_exp,src,result); std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; - MdagMLinearOperator HermOp_exp_compact(Dwe_compact); + MdagMLinearOperator HermOp_exp_compact(Dwe_compact); result=Zero(); CG(HermOp_exp_compact,src,result); diff --git a/tests/solver/Test_wilsonclover_fcagmres_prec.cc b/tests/solver/Test_wilsonclover_fcagmres_prec.cc index 1a294821..77b2afff 100644 --- a/tests/solver/Test_wilsonclover_fcagmres_prec.cc +++ b/tests/solver/Test_wilsonclover_fcagmres_prec.cc @@ -44,8 +44,8 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - typedef typename WilsonCloverFermionR::FermionField FermionField; - typename WilsonCloverFermionR::ImplParams params; + typedef typename WilsonCloverFermionD::FermionField FermionField; + typename WilsonCloverFermionD::ImplParams params; WilsonAnisotropyCoefficients anis; FermionField src(&Grid); random(pRNG,src); @@ -61,9 +61,9 @@ int main (int argc, char ** argv) RealD mass = 0.5; RealD csw_r = 1.0; RealD csw_t = 1.0; - WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params); + WilsonCloverFermionD Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params); - MdagMLinearOperator HermOp(Dwc); + MdagMLinearOperator HermOp(Dwc); TrivialPrecon simple; diff --git a/tests/solver/Test_wilsonclover_fgmres_prec.cc b/tests/solver/Test_wilsonclover_fgmres_prec.cc index 15bb4136..0f48871f 100644 --- a/tests/solver/Test_wilsonclover_fgmres_prec.cc +++ b/tests/solver/Test_wilsonclover_fgmres_prec.cc @@ -44,8 +44,8 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - typedef typename WilsonCloverFermionR::FermionField FermionField; - typename WilsonCloverFermionR::ImplParams params; + typedef typename WilsonCloverFermionD::FermionField FermionField; + typename WilsonCloverFermionD::ImplParams params; WilsonAnisotropyCoefficients anis; FermionField src(&Grid); random(pRNG,src); @@ -61,9 +61,9 @@ int main (int argc, char ** argv) RealD mass = 0.5; RealD csw_r = 1.0; RealD csw_t = 1.0; - WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params); + WilsonCloverFermionD Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params); - MdagMLinearOperator HermOp(Dwc); + MdagMLinearOperator HermOp(Dwc); TrivialPrecon simple; diff --git a/tests/solver/Test_wilsonclover_gmres_unprec.cc b/tests/solver/Test_wilsonclover_gmres_unprec.cc index 00f33382..b660d716 100644 --- a/tests/solver/Test_wilsonclover_gmres_unprec.cc +++ b/tests/solver/Test_wilsonclover_gmres_unprec.cc @@ -44,8 +44,8 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - typedef typename WilsonCloverFermionR::FermionField FermionField; - typename WilsonCloverFermionR::ImplParams params; + typedef typename WilsonCloverFermionD::FermionField FermionField; + typename WilsonCloverFermionD::ImplParams params; WilsonAnisotropyCoefficients anis; FermionField src(&Grid); random(pRNG,src); @@ -61,9 +61,9 @@ int main (int argc, char ** argv) RealD mass = 0.5; RealD csw_r = 1.0; RealD csw_t = 1.0; - WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params); + WilsonCloverFermionD Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params); - MdagMLinearOperator HermOp(Dwc); + MdagMLinearOperator HermOp(Dwc); GeneralisedMinimalResidual GMRES(1.0e-8, 10000, 25); GMRES(HermOp,src,result); diff --git a/tests/solver/Test_wilsonclover_mg.cc b/tests/solver/Test_wilsonclover_mg.cc index 605d225d..1b0e8bb7 100644 --- a/tests/solver/Test_wilsonclover_mg.cc +++ b/tests/solver/Test_wilsonclover_mg.cc @@ -80,16 +80,16 @@ int main(int argc, char **argv) { // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used const int nbasis = 40; - WilsonCloverFermionR Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t); + WilsonCloverFermionD Dwc(Umu, *FGrid, *FrbGrid, mass, csw_r, csw_t); - MdagMLinearOperator MdagMOpDwc(Dwc); + MdagMLinearOperator MdagMOpDwc(Dwc); std::cout << GridLogMessage << "**************************************************" << std::endl; std::cout << GridLogMessage << "Testing Multigrid for Wilson Clover" << std::endl; std::cout << GridLogMessage << "**************************************************" << std::endl; TrivialPrecon TrivialPrecon; - auto MGPreconDwc = createMGInstance(mgParams, levelInfo, Dwc, Dwc); + auto MGPreconDwc = createMGInstance(mgParams, levelInfo, Dwc, Dwc); MGPreconDwc->setup(); diff --git a/tests/solver/Test_wilsonclover_mr_unprec.cc b/tests/solver/Test_wilsonclover_mr_unprec.cc index ab49ec1f..be721236 100644 --- a/tests/solver/Test_wilsonclover_mr_unprec.cc +++ b/tests/solver/Test_wilsonclover_mr_unprec.cc @@ -44,8 +44,8 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - typedef typename WilsonCloverFermionR::FermionField FermionField; - typename WilsonCloverFermionR::ImplParams params; + typedef typename WilsonCloverFermionD::FermionField FermionField; + typename WilsonCloverFermionD::ImplParams params; WilsonAnisotropyCoefficients anis; FermionField src(&Grid); random(pRNG,src); @@ -61,9 +61,9 @@ int main (int argc, char ** argv) RealD mass = 0.5; RealD csw_r = 1.0; RealD csw_t = 1.0; - WilsonCloverFermionR Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params); + WilsonCloverFermionD Dwc(Umu,Grid,RBGrid,mass,csw_r,csw_t,anis,params); - MdagMLinearOperator HermOp(Dwc); + MdagMLinearOperator HermOp(Dwc); MinimalResidual MR(1.0e-8,10000,0.8); MR(HermOp,src,result); diff --git a/tests/solver/Test_zmobius_cg_prec.cc b/tests/solver/Test_zmobius_cg_prec.cc index 6b007afc..7f1f98b8 100644 --- a/tests/solver/Test_zmobius_cg_prec.cc +++ b/tests/solver/Test_zmobius_cg_prec.cc @@ -101,7 +101,7 @@ int main(int argc, char** argv) { omegas.push_back( std::complex(0.0686324988446592,-0.0550658530827402) ); #endif - ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,1.,0.); + ZMobiusFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,1.,0.); LatticeFermion src_o(FrbGrid); LatticeFermion result_o(FrbGrid); @@ -110,7 +110,7 @@ int main(int argc, char** argv) { GridStopWatch CGTimer; - SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO(Ddwf); ConjugateGradient CG(1.0e-8, 10000, 0);// switch off the assert CGTimer.Start(); @@ -121,7 +121,6 @@ int main(int argc, char** argv) { << std::endl; std::cout << GridLogMessage << "######## Dhop calls summary" << std::endl; - Ddwf.Report(); Grid_finalize(); }