diff --git a/Grid/qcd/action/fermion/WilsonFermion.h b/Grid/qcd/action/fermion/WilsonFermion.h index b7afc43a..8bfe6c1a 100644 --- a/Grid/qcd/action/fermion/WilsonFermion.h +++ b/Grid/qcd/action/fermion/WilsonFermion.h @@ -141,7 +141,7 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { GridRedBlackCartesian &Hgrid, RealD _mass, const ImplParams &p = ImplParams(), const WilsonAnisotropyCoefficients &anis = WilsonAnisotropyCoefficients() ); - + // DoubleStore impl dependent void ImportGauge(const GaugeField &_Umu); diff --git a/documentation/_build/latex/Grid.pdf b/documentation/_build/latex/Grid.pdf index 8b9af186..17cdf6c2 100644 Binary files a/documentation/_build/latex/Grid.pdf and b/documentation/_build/latex/Grid.pdf differ diff --git a/documentation/interfacing.rst b/documentation/interfacing.rst index 3fd0c8a3..5eddf059 100644 --- a/documentation/interfacing.rst +++ b/documentation/interfacing.rst @@ -10,7 +10,7 @@ examples. MPI initialization ------------------- +-------------------- Grid supports threaded MPI sends and receives and, if running with more than one thread, requires the MPI_THREAD_MULTIPLE mode of message @@ -21,7 +21,7 @@ appropriate initialization call is:: assert(MPI_THREAD_MULTIPLE == provided); Grid Initialization -------------------- +--------------------- Grid itself is initialized with a call:: @@ -38,12 +38,14 @@ The following Grid procedures are useful for verifying that Grid is properly initialized. ============================================================= =========================================================================================================== - Grid procedure returns + Grid procedure returns ============================================================= =========================================================================================================== - std::vector GridDefaultLatt(); lattice size - std::vector GridDefaultSimd(int Nd,vComplex::Nsimd()); SIMD layout - std::vector GridDefaultMpi(); MPI layout - int Grid::GridThread::GetThreads(); number of threads + std::vector GridDefaultLatt(); lattice size + std::vector GridDefaultSimd(int Nd,vComplex::Nsimd()); SIMD layout + std::vector GridDefaultMpi(); MPI layout + int Grid::GridThread::GetThreads(); number of threads +============================================================= =========================================================================================================== + MPI coordination ---------------- @@ -96,7 +98,7 @@ returns a rank that agrees with Grid's `peRank`. Mapping fields between Grid and user layouts -------------------------------------------- +--------------------------------------------- In order to map data between layouts, it is important to know how the lattice sites are distributed across the processor grid. A @@ -177,15 +179,15 @@ Grid 5D fermion field `cv5`. **Example**:: - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(); - GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid) typename ImprovedStaggeredFermion5D::FermionField cv5(FrbGrid); + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid) typename ImprovedStaggeredFermion5D::FermionField cv5(FrbGrid); - std::vector r(4); - indexToCoords(idx,r); - std::vector r5(1,0); - for( int d = 0; d < 4; d++ ) r5.push_back(r[d]); + std::vector r(4); + indexToCoords(idx,r); + std::vector r5(1,0); + for( int d = 0; d < 4; d++ ) r5.push_back(r[d]); - for( int j = 0; j < Ls; j++ ){ + for( int j = 0; j < Ls; j++ ){ r5[0] = j; ColourVector cVec; for(int col=0; colcv), r5); - } + } diff --git a/documentation/manual.rst b/documentation/manual.rst index 7d8765a8..8e0e79d7 100644 --- a/documentation/manual.rst +++ b/documentation/manual.rst @@ -69,7 +69,7 @@ a programme is simply written as a series of statements, addressing entire latti Implementation details may be provided to explain how the code works, but are not strictly part of the API. -**Example** +**Example** For example, as an implementation detail, in a single programme multiple data (SPMD) message passing supercomputer the main programme is trivially replicated on each computing node. The data parallel operations are called *collectively* by all nodes. Any scalar values returned by the various reduction routines are the same on each node, resulting in (for example) the same decision being made by all nodes to terminate an iterative solver on the same iteration. @@ -90,7 +90,8 @@ or any codes directly interacting with the * Stencil -will make use of facilities provided by to assist the creation of high performance code. The internal data layout complexities +will make use of facilities provided by to assist the creation of high performance code. +The internal data layout complexities will be exposed to some degree and the interfaces are subject to change without notice as HPC architectures change. Since some of the internal implementation details are needed to explain the design strategy of grid these will be @@ -585,18 +586,18 @@ to Grid to choose the specific word size. Type definitions are provided in qcd/QCD.h to give the internal index structures of QCD codes. For example:: - template using iSinglet = iScalar > >; - template using iSpinMatrix = iScalar, Ns> >; - template using iColourMatrix = iScalar > > ; - template using iSpinColourMatrix = iScalar, Ns> >; - template using iLorentzColourMatrix = iVector >, Nd > ; - template using iDoubleStoredColourMatrix = iVector >, Nds > ; - template using iSpinVector = iScalar, Ns> >; - template using iColourVector = iScalar > >; - template using iSpinColourVector = iScalar, Ns> >; - template using iHalfSpinVector = iScalar, Nhs> >; - template using iHalfSpinColourVector = iScalar, Nhs> >; - + template + using iSinglet = iScalar > >; + using iSpinMatrix = iScalar, Ns> >; + using iColourMatrix = iScalar > > ; + using iSpinColourMatrix = iScalar, Ns> >; + using iLorentzColourMatrix = iVector >, Nd > ; + using iDoubleStoredColourMatrix = iVector >, Nds > ; + using iSpinVector = iScalar, Ns> >; + using iColourVector = iScalar > >; + using iSpinColourVector = iScalar, Ns> >; + using iHalfSpinVector = iScalar, Nhs> >; + using iHalfSpinColourVector = iScalar, Nhs> >; Giving the type table: @@ -614,7 +615,7 @@ Scalar Scalar Matrix Matrix ComplexD SpinColourMatrixD The types are implemented via a recursive tensor nesting system. -**Example** +**Example** Here, the prefix "i" indicates for internal use, preserving the template nature of the class. Final types are declared with vtype selected to be both scalar and vector, appropriate to a @@ -1021,26 +1022,6 @@ This enables the coordinates to be manipulated without heap allocation or thread and avoids introducing STL functions into GPU code, but does so at the expense of introducing a maximum dimensionality. This limit is easy to change (lib/util/Coordinate.h). -**Internals** - -The processor Grid is defined by data values in the Communicator object:: - - int _Nprocessors; // How many in all - std::vector _processors; // Which dimensions get relayed out over processors lanes. - int _processor; // linear processor rank - std::vector _processor_coor; // linear processor coordinate - unsigned long _ndimension; - Grid_MPI_Comm communicator; - -The final of these is potentially an MPI Cartesian communicator, mapping some total number of processors -to an N-dimensional coordinate system. This is used by Grid to geometrically decompose the subvolumes of a -lattice field across processing elements. Grid is aware of multiple ranks per node and attempts to ensure -that the geometrical decomposition keeps as many neigbours as possible on the same node. This is done -by reordering the ranks in the constructor of a Communicator object once the topology requested has -been indicated, via an internal call to the method OptimalCommunicator(). The reordering is chosen -by Grid to trick MPI, which makes a simple lexicographic assignment of ranks to coordinate, to ensure -that the simple lexicographic assignment of the reordered ranks is the optimal choice. MPI does not do this -by default and substantial improvements arise from this design choice. Grids ------------- @@ -1135,6 +1116,26 @@ are provided to communicate fields between different communicators (e.g. between Grid_split (Umu,s_Umu); Grid_split (src,s_src); +**Internals** + +The processor Grid is defined by data values in the Communicator object:: + + int _Nprocessors; // How many in all + std::vector _processors; // Which dimensions get relayed out over processors lanes. + int _processor; // linear processor rank + std::vector _processor_coor; // linear processor coordinate + unsigned long _ndimension; + Grid_MPI_Comm communicator; + +The final of these is potentially an MPI Cartesian communicator, mapping some total number of processors +to an N-dimensional coordinate system. This is used by Grid to geometrically decompose the subvolumes of a +lattice field across processing elements. Grid is aware of multiple ranks per node and attempts to ensure +that the geometrical decomposition keeps as many neigbours as possible on the same node. This is done +by reordering the ranks in the constructor of a Communicator object once the topology requested has +been indicated, via an internal call to the method OptimalCommunicator(). The reordering is chosen +by Grid to trick MPI, which makes a simple lexicographic assignment of ranks to coordinate, to ensure +that the simple lexicographic assignment of the reordered ranks is the optimal choice. MPI does not do this +by default and substantial improvements arise from this design choice. Lattice containers ----------------------------------------- @@ -1565,62 +1566,6 @@ lattice site :math:`x_\mu = 1` in the rhs to :math:`x_\mu = 0` in the result. } -CovariantCshift -^^^^^^^^^^^^^^^^^^^^ - -Covariant Cshift operations are provided for common cases of the boundary condition. These may be further optimised -in future:: - - template - Lattice CovShiftForward(const Lattice &Link, int mu, - const Lattice &field); - - template - Lattice CovShiftBackward(const Lattice &Link, int mu, - const Lattice &field); - -Boundary conditions -^^^^^^^^^^^^^^^^^^^^ - -The covariant shift routines occur in namespaces PeriodicBC and ConjugateBC. The correct covariant shift -for the boundary condition is passed into the gauge actions and wilson loops via an -"Impl" template policy class. - -The relevant staples, plaquettes, and loops are formed by using the provided method:: - - Impl::CovShiftForward - Impl::CovShiftBackward - -etc... This makes physics code transform appropriately with externally supplied rules about -treating the boundary. - -**Example** (lib/qcd/util/WilsonLoops.h):: - - static void dirPlaquette(GaugeMat &plaq, const std::vector &U, - const int mu, const int nu) { - // ___ - //| | - //|<__| - plaq = Gimpl::CovShiftForward(U[mu],mu, - Gimpl::CovShiftForward(U[nu],nu, - Gimpl::CovShiftBackward(U[mu],mu, - Gimpl::CovShiftIdentityBackward(U[nu], nu)))); - } - -Currently provided predefined implementations are (qcd/action/gauge/GaugeImplementations.h):: - - typedef PeriodicGaugeImpl PeriodicGimplR; // Real.. whichever prec - typedef PeriodicGaugeImpl PeriodicGimplF; // Float - typedef PeriodicGaugeImpl PeriodicGimplD; // Double - - typedef PeriodicGaugeImpl PeriodicGimplAdjR; // Real.. whichever prec - typedef PeriodicGaugeImpl PeriodicGimplAdjF; // Float - typedef PeriodicGaugeImpl PeriodicGimplAdjD; // Double - - typedef ConjugateGaugeImpl ConjugateGimplR; // Real.. whichever prec - typedef ConjugateGaugeImpl ConjugateGimplF; // Float - typedef ConjugateGaugeImpl ConjugateGimplD; // Double - Inter-grid transfer operations @@ -1694,11 +1639,6 @@ Growing a lattice by a multiple factor, with periodic replication:: That latter is useful to, for example, pre-thermalise a smaller volume and then grow the volume in HMC. It was written while debugging G-parity boundary conditions. -Input/Output facilities ---------------------------------------------- - - - Random number generators ========================================= @@ -1732,18 +1672,32 @@ The interface is as follows:: class GridSerialRNG { GridSerialRNG(); void SeedFixedIntegers(const std::vector &seeds); + void SeedUniqueString(const std::string &s); } class GridParallelRNG { GridParallelRNG(GridBase *grid); void SeedFixedIntegers(const std::vector &seeds); + void SeedUniqueString(const std::string &s); } - template void random(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._uniform); } - template void gaussian(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._gaussian); } +* Seeding - template void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); } - template void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); } +The SeedUniqueString uses a 256bit SHA from the OpenSSL library to construct integer seeds. +The reason for this is to enable reproducible seeding in the measurement sector of physics codes. +For example, labelling a random drawn by a string representation the physics information, and the +appending trajectory number will give a unique set of seeds for each measurement on each trajectory. +This string based functionality is probably not expected to be used in a lattice evolution, except for +possibly the initial state. Subsequent evolution should checkpoint and restore lattice RNG state using +the interfaces below. + +These may be drawn as follows:: + + void random(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._uniform); } + void gaussian(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._gaussian); } + + void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); } + void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); } * Serial RNG's are used to assign scalar fields. @@ -2435,13 +2389,52 @@ Class:: class AlgRemez -Iterative ------------- +Iterative solvers and algorithms +----------------------------------- We document a number of iterative algorithms of topical relevance to Lattice Gauge theory. These are written for application to arbitrary fields and arbitrary operators using type templating, by implementating them as arbitrary OperatorFunctions. +Most of these algorithms these algorithms operate on a generic matrix class, which +derives from LinearOperatorBase. + +Linear operators +^^^^^^^^^^^^^^^^^ + +By sharing the class for Sparse Matrix across multiple operator wrappers, we can share code +between RB and non-RB variants. Sparse matrix is an abstract fermion action def, and then +the LinearOperator wrappers implement the specialisation of "Op" and "AdjOp" to the cases minimising +replication of code. + +algorithms/LinearOperator.h + +Class:: + + template class LinearOperatorBase { + public: + + // Support for coarsening to a multigrid + virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base + virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base + + virtual void Op (const Field &in, Field &out) = 0; // Abstract base + virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base + virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0; + virtual void HermOp(const Field &in, Field &out)=0; + }; + +The specific operators are: + + template class MdagMLinearOperator : public LinearOperatorBase + template class ShiftedMdagMLinearOperator : public LinearOperatorBase + template class HermitianLinearOperator : public LinearOperatorBase + template class SchurOperatorBase : public LinearOperatorBase + template class SchurDiagOneRH : public SchurOperatorBase + template class SchurDiagOneLH : public SchurOperatorBase + template class SchurStaggeredOperator : public SchurOperatorBase + + Conjugate Gradient ^^^^^^^^^^^^^^^^^^^ @@ -2558,41 +2551,6 @@ Solve method:: void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse=false) -Linear operators -^^^^^^^^^^^^^^^^^ - -By sharing the class for Sparse Matrix across multiple operator wrappers, we can share code -between RB and non-RB variants. Sparse matrix is an abstract fermion action def, and then -the LinearOperator wrappers implement the specialisation of "Op" and "AdjOp" to the cases minimising -replication of code. - -algorithms/LinearOperator.h - -Class:: - - template class LinearOperatorBase { - public: - - // Support for coarsening to a multigrid - virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base - virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base - - virtual void Op (const Field &in, Field &out) = 0; // Abstract base - virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base - virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0; - virtual void HermOp(const Field &in, Field &out)=0; - }; - -The specific operators are: - - template class MdagMLinearOperator : public LinearOperatorBase - template class ShiftedMdagMLinearOperator : public LinearOperatorBase - template class HermitianLinearOperator : public LinearOperatorBase - template class SchurOperatorBase : public LinearOperatorBase - template class SchurDiagOneRH : public SchurOperatorBase - template class SchurDiagOneLH : public SchurOperatorBase - template class SchurStaggeredOperator : public SchurOperatorBase - Schur decomposition ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2647,37 +2605,15 @@ Grid Dirac algebra, spin projectors, and Gamma multiplication table. The spin basis is: -* :math:`\gamma_x` -==== == == == - ( 0 0 0 i) - ( 0 0 i 0) - ( 0 -i 0 0) - (-i 0 0 0) -==== == == == - -* :math:`\gamma_y` -==== == == == - ( 0 0 0 -1) - ( 0 0 1 0) - ( 0 1 0 0) - (-1 0 0 0) -==== == == == - -* :math:`\gamma_z` -==== == == == - ( 0 0 i 0) - ( 0 0 0 -i) - (-i 0 0 0) - ( 0 i 0 0) -==== == == == +.. math:: \gamma_x= \left(\begin{array}{cccc} 0& 0& 0& i\\ 0& 0& i& 0\\ 0&-i& 0& 0\\ -i& 0& 0& 0 \end{array}\right) -* :math:`\gamma_t` -==== == == == - ( 0 0 1 0) - ( 0 0 0 1) - ( 1 0 0 0) - ( 0 1 0 0) -==== == == == +.. math:: \gamma_y= \left(\begin{array}{cccc} 0& 0& 0&-1\\ 0& 0& 1& 0\\ 0& 1& 0& 0\\ -1& 0& 0& 0 \end{array}\right) + +.. math:: \gamma_z= \left(\begin{array}{cccc} 0& 0& i& 0\\ 0& 0& 0&-i\\ -i& 0& 0& 0\\ 0& i& 0& 0 \end{array}\right) + +.. math:: \gamma_t= \left(\begin{array}{cccc} 0& 0& 1& 0\\ 0& 0& 0& 1\\ 1& 0& 0& 0\\ 0& 1& 0& 0 \end{array}\right) + +.. math:: \gamma_5= \left(\begin{array}{cccc} 1& 0& 0& 0\\ 0& 1& 0& 0\\ 0& 0&-1 &0\\ 0& 0& 0&-1 \end{array}\right) These can be accessed via a strongly typed enumeration to avoid multiplication by zeros. The values are considered opaque, and symbolic names must be used. @@ -2700,7 +2636,7 @@ These are signed (prefixes like MinusIdentity also work):: Gamma::Algebra::SigmaYZ Gamma::Algebra::SigmaZT -** Example ** +**Example** They can be used, for example (benchmarks/Benchmark_wilson.cc):: @@ -2733,73 +2669,523 @@ They can be used, for example (benchmarks/Benchmark_wilson.cc):: Two spin projection is also possible on non-lattice fields, and used to build high performance routines such as the Wilson kernel:: - template strong_inline void spProjXp (iVector &hspin,const iVector &fspin) - template strong_inline void spProjYp (iVector &hspin,const iVector &fspin) - template strong_inline void spProjZp (iVector &hspin,const iVector &fspin) - template strong_inline void spProjTp (iVector &hspin,const iVector &fspin) - template strong_inline void spProj5p (iVector &hspin,const iVector &fspin) - template strong_inline void spProjXm (iVector &hspin,const iVector &fspin) - template strong_inline void spProjYm (iVector &hspin,const iVector &fspin) - template strong_inline void spProjZm (iVector &hspin,const iVector &fspin) - template strong_inline void spProjTm (iVector &hspin,const iVector &fspin) - template strong_inline void spProj5m (iVector &hspin,const iVector &fspin) + template void spProjXp (iVector &hspin,const iVector &fspin) + template void spProjYp (iVector &hspin,const iVector &fspin) + template void spProjZp (iVector &hspin,const iVector &fspin) + template void spProjTp (iVector &hspin,const iVector &fspin) + template void spProj5p (iVector &hspin,const iVector &fspin) + template void spProjXm (iVector &hspin,const iVector &fspin) + template void spProjYm (iVector &hspin,const iVector &fspin) + template void spProjZm (iVector &hspin,const iVector &fspin) + template void spProjTm (iVector &hspin,const iVector &fspin) + template void spProj5m (iVector &hspin,const iVector &fspin) and there are associated reconstruction routines for assembling four spinors from these two spinors:: - template strong_inline void spReconXp (iVector &fspin,const iVector &hspin) - template strong_inline void spReconYp (iVector &fspin,const iVector &hspin) - template strong_inline void spReconZp (iVector &fspin,const iVector &hspin) - template strong_inline void spReconTp (iVector &fspin,const iVector &hspin) - template strong_inline void spRecon5p (iVector &fspin,const iVector &hspin) - template strong_inline void spReconXm (iVector &fspin,const iVector &hspin) - template strong_inline void spReconYm (iVector &fspin,const iVector &hspin) - template strong_inline void spReconZm (iVector &fspin,const iVector &hspin) - template strong_inline void spReconTm (iVector &fspin,const iVector &hspin) - template strong_inline void spRecon5m (iVector &fspin,const iVector &hspin) + template void spReconXp (iVector &fspin,const iVector &hspin) + template void spReconYp (iVector &fspin,const iVector &hspin) + template void spReconZp (iVector &fspin,const iVector &hspin) + template void spReconTp (iVector &fspin,const iVector &hspin) + template void spRecon5p (iVector &fspin,const iVector &hspin) + template void spReconXm (iVector &fspin,const iVector &hspin) + template void spReconYm (iVector &fspin,const iVector &hspin) + template void spReconZm (iVector &fspin,const iVector &hspin) + template void spReconTm (iVector &fspin,const iVector &hspin) + template void spRecon5m (iVector &fspin,const iVector &hspin) - template strong_inline void accumReconXp (iVector &fspin,const iVector &hspin) - template strong_inline void accumReconYp (iVector &fspin,const iVector &hspin) - template strong_inline void accumReconZp (iVector &fspin,const iVector &hspin) - template strong_inline void accumReconTp (iVector &fspin,const iVector &hspin) - template strong_inline void accumRecon5p (iVector &fspin,const iVector &hspin) - template strong_inline void accumReconXm (iVector &fspin,const iVector &hspin) - template strong_inline void accumReconYm (iVector &fspin,const iVector &hspin) - template strong_inline void accumReconZm (iVector &fspin,const iVector &hspin) - template strong_inline void accumReconTm (iVector &fspin,const iVector &hspin) - template strong_inline void accumRecon5m (iVector &fspin,const iVector &hspin) + template void accumReconXp (iVector &fspin,const iVector &hspin) + template void accumReconYp (iVector &fspin,const iVector &hspin) + template void accumReconZp (iVector &fspin,const iVector &hspin) + template void accumReconTp (iVector &fspin,const iVector &hspin) + template void accumRecon5p (iVector &fspin,const iVector &hspin) + template void accumReconXm (iVector &fspin,const iVector &hspin) + template void accumReconYm (iVector &fspin,const iVector &hspin) + template void accumReconZm (iVector &fspin,const iVector &hspin) + template void accumReconTm (iVector &fspin,const iVector &hspin) + template void accumRecon5m (iVector &fspin,const iVector &hspin) +These ca SU(N) -------- +A generic Nc qcd/utils/SUn.h is provided. This defines a template class:: + + template class SU ; + +The most important external methods are:: + + static void printGenerators(void) ; + template static void generator(int lieIndex, iSUnMatrix &ta) ; + + static void SubGroupHeatBath(GridSerialRNG &sRNG, GridParallelRNG &pRNG, RealD beta, // coeff multiplying staple in action (with no 1/Nc) + LatticeMatrix &link, + const LatticeMatrix &barestaple, // multiplied by action coeffs so th + int su2_subgroup, int nheatbath, LatticeInteger &wheremask); + + static void GaussianFundamentalLieAlgebraMatrix(GridParallelRNG &pRNG, + LatticeMatrix &out, + Real scale = 1.0) ; + static void GaugeTransform( GaugeField &Umu, GaugeMat &g) + static void RandomGaugeTransform(GridParallelRNG &pRNG, GaugeField &Umu, GaugeMat &g); + + static void HotConfiguration(GridParallelRNG &pRNG, GaugeField &out) ; + static void TepidConfiguration(GridParallelRNG &pRNG,GaugeField &out); + static void ColdConfiguration(GaugeField &out); + + static void taProj( const LatticeMatrixType &in, LatticeMatrixType &out); + static void taExp(const LatticeMatrixType &x, LatticeMatrixType &ex) ; + + static int su2subgroups(void) ; // returns how many subgroups + + +Specific instantiations are defined:: + + typedef SU<2> SU2; + typedef SU<3> SU3; + typedef SU<4> SU4; + typedef SU<5> SU5; + +For example, Quenched QCD updating may be run as (tests/core/Test_quenched_update.cc):: + + for(int sweep=0;sweep<1000;sweep++){ + + RealD plaq = ColourWilsonLoops::avgPlaquette(Umu); + + std::cout<(Umu,mu); + + for( int subgroup=0;subgroup(Umu,link,mu); + + //reunitarise link; + ProjectOnGroup(Umu); + } + } + } + + Space time grids ---------------- -Random configurations and random gauge transforms ---------------------------------------------------- - - -Wilson loops --------------- Lattice actions ========================================= -.. todo:: CD: The whole section needs to be completed, of course +We discuss in some detail the implementation of the lattice actions. +The action is a sum of terms, each of which must inherit from and provide the following interface. + +lib/qcd/action/ActionBase.h:: + + class Action + { + + public: + bool is_smeared = false; + // Heatbath? + virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) = 0; // refresh pseudofermions + virtual RealD S(const GaugeField& U) = 0; // evaluate the action + virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative + virtual std::string action_name() = 0; // return the action name + virtual std::string LogParameters() = 0; // prints action parameters + virtual ~Action(){} + }; + +Fermion Lattice actions are defined in the qcd/action/fermion subdirectory and in the +qcd/action/gauge subdirectories. For Hybrid Monte Carlo and derivative sampling algorithm +Pseudofermoin actions are defined in the qcd/action/pseudofermion subdirectory. + +The simplest lattice action is the Wilson plaquette action, and we start by considering the Wilson loops +facilities as this is illustrative of the implementation policy design approach. + +Wilson loops +-------------- + +Wilson loops are common objects in Lattice Gauge Theory. +A utility class is provided to assist assembling these as they occur both in common observable construction but +also in actions such as the Wilson plaquette and the rectangle actions. + +Since derivatives with respect to gauge links are required for evolution codes, non-closed staples of +various types are also provided. The gauge actions are all assembled consistently from the Wilson loops +class. + +**Implementation policies** + +The Wilson loops class is templated to take a implementation policy class parameter. The covarian Cshift is inherity from +the policy and implements boundary conditions, such as period, anti-period or charge conjugate. In this +way the Wilson loop code can automatically transform with the boundary condition and give the right plaquette, +force terms etc... for the boundary conditions passed in external to the class. + + +This implementation policy class is called an "impl", and a class that bundles together all the required +rules to assemble a gauge action is called a Gimpl. + +There are several facilities provided by a Gimpl. + +These include Boundary conditions and consequently a CovariantCshift. + +CovariantCshift +^^^^^^^^^^^^^^^^^^ + +Covariant Cshift operations are provided for common cases of the boundary condition. These may be further optimised +in future:: + + template + Lattice CovShiftForward(const Lattice &Link, int mu, + const Lattice &field); + + template + Lattice CovShiftBackward(const Lattice &Link, int mu, + const Lattice &field); + +Boundary conditions +^^^^^^^^^^^^^^^^^^^^^^^^ + +The covariant shift routines occur in namespaces PeriodicBC and ConjugateBC. The correct covariant shift +for the boundary condition is passed into the gauge actions and wilson loops via an +"Impl" template policy class. + +The relevant staples, plaquettes, and loops are formed by using the provided method:: + + Impl::CovShiftForward + Impl::CovShiftBackward + +etc... This makes physics code transform appropriately with externally supplied rules about +treating the boundary. + +**Example** (lib/qcd/util/WilsonLoops.h):: + + static void dirPlaquette(GaugeMat &plaq, const std::vector &U, + const int mu, const int nu) { + // ___ + //| | + //|<__| + plaq = Gimpl::CovShiftForward(U[mu],mu, + Gimpl::CovShiftForward(U[nu],nu, + Gimpl::CovShiftBackward(U[mu],mu, + Gimpl::CovShiftIdentityBackward(U[nu], nu)))); + } + +Currently provided predefined implementations are (qcd/action/gauge/GaugeImplementations.h):: + + typedef PeriodicGaugeImpl PeriodicGimplR; // Real.. whichever prec + typedef PeriodicGaugeImpl PeriodicGimplF; // Float + typedef PeriodicGaugeImpl PeriodicGimplD; // Double + + typedef PeriodicGaugeImpl PeriodicGimplAdjR; // Real.. whichever prec + typedef PeriodicGaugeImpl PeriodicGimplAdjF; // Float + typedef PeriodicGaugeImpl PeriodicGimplAdjD; // Double + + typedef ConjugateGaugeImpl ConjugateGimplR; // Real.. whichever prec + typedef ConjugateGaugeImpl ConjugateGimplF; // Float + typedef ConjugateGaugeImpl ConjugateGimplD; // Double + +Gauge Actions +--------------- + +lib/qcd/action/gauge/Photon.h defines the U(1) field:: + + class Photon + +using Fourier techniques. + +lib/qcd/action/gauge/WilsonGaugeAction.h defines the standard plaquette action:: + + template + class WilsonGaugeAction : public Action ; + +This action is suitable to use in a Hybrid Monte Carlo evolution as an action term and has constructor:: + + WilsonGaugeAction(RealD beta_); + +lib/qcd/action/gauge/PlaqPlusRectangleAction.h defines the standard plaquette plus rectangle class of action:: + + template + class PlaqPlusRectangleAction : public Action ; + +The constructor is:: + + PlaqPlusRectangleAction(RealD b,RealD c); + +Due to varying conventions, convenience wrappers are provided:: + + template class RBCGaugeAction : public PlaqPlusRectangleAction; + template class IwasakiGaugeAction : public RBCGaugeAction ; + template class SymanzikGaugeAction : public RBCGaugeAction ; + template class DBW2GaugeAction : public RBCGaugeAction ; + +With convenience constructors to set the rectangle coefficient automatically to popular values:: + + SymanzikGaugeAction(RealD beta) : RBCGaugeAction(beta,-1.0/12.0) {}; + IwasakiGaugeAction(RealD beta) : RBCGaugeAction(beta,-0.331) {}; + DBW2GaugeAction(RealD beta) : RBCGaugeAction(beta,-1.4067) {}; - WilsonCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); - -Gauge --------- Fermion -------- +These classes all make use of a Fermion Implementation (Fimpl) policy class to provide +things like boundary conditions, covariant transportation rules etc. + +All Fermion operators actions inherit from a common base class, + +that conforms to the CheckerBoardedSparseMatrix interface and constrains these objects to conform to the +interface expected by general algorithms in Grid:: + + ///////////////////////////////////////////////////////////////////////////////////////////// + // Interface defining what I expect of a general sparse matrix, such as a Fermion action + ///////////////////////////////////////////////////////////////////////////////////////////// + template class SparseMatrixBase { + public: + virtual GridBase *Grid(void) =0; + // Full checkerboar operations + virtual RealD M (const Field &in, Field &out)=0; + virtual RealD Mdag (const Field &in, Field &out)=0; + virtual void MdagM(const Field &in, Field &out,RealD &ni,RealD &no) { + Field tmp (in._grid); + ni=M(in,tmp); + no=Mdag(tmp,out); + } + virtual void Mdiag (const Field &in, Field &out)=0; + virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0; + }; + + ///////////////////////////////////////////////////////////////////////////////////////////// + // Interface augmented by a red black sparse matrix, such as a Fermion action + ///////////////////////////////////////////////////////////////////////////////////////////// + template class CheckerBoardedSparseMatrixBase : public SparseMatrixBase { + public: + virtual GridBase *RedBlackGrid(void)=0; + // half checkerboard operaions + virtual void Meooe (const Field &in, Field &out)=0; + virtual void Mooee (const Field &in, Field &out)=0; + virtual void MooeeInv (const Field &in, Field &out)=0; + + virtual void MeooeDag (const Field &in, Field &out)=0; + virtual void MooeeDag (const Field &in, Field &out)=0; + virtual void MooeeInvDag (const Field &in, Field &out)=0; + + }; + +The base class for Fermion Operators inherits frmo these:: + + template + class FermionOperator : public CheckerBoardedSparseMatrixBase, public Impl + +These all make use of an implementation template class, and the possible implementations include:: + + typedef WilsonImpl WilsonImplF; // Float + typedef WilsonImpl WilsonImplD; // Double + +Staggered fermions make us of a spin index free field via the StaggeredImpl:: + + typedef StaggeredImpl StaggeredImplF; // Float + typedef StaggeredImpl StaggeredImplD; // Double + +A number of alternate, non-fundamental Fermion representations are supported. Note that the Fermion +action code is common to each of these, demonstrating the utility of the template Fimpl classes for separating +the code that varies from the invariant sections:: + + typedef WilsonImpl WilsonAdjImplF; // Float + typedef WilsonImpl WilsonAdjImplD; // Double + + typedef WilsonImpl WilsonTwoIndexSymmetricImplF; // Float + typedef WilsonImpl WilsonTwoIndexSymmetricImplD; // Double + + typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplF; // Float + typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplD; // Double + +G-parity boundary conditions are supported, and an additional flavour index inserted on the Fermion field via the Gparity implementation:: + + typedef GparityWilsonImpl GparityWilsonImplF; // Float + typedef GparityWilsonImpl GparityWilsonImplD; // Double + +ZMobius Fermions use complex rather than real action coefficients and are supported via an alternate implementation:: + + typedef WilsonImpl ZWilsonImplF; // Float + typedef WilsonImpl ZWilsonImplD; // Double + + +Some example constructor calls are given below for Wilson and Clover fermions:: + + template class WilsonFermion; + +With constructor:: + + WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p = ImplParams(), + const WilsonAnisotropyCoefficients &anis = WilsonAnisotropyCoefficients() ); + +and:: + + template class WilsonCloverFermion : public WilsonFermion; + +with constructor:: + + WilsonCloverFermion(GaugeField &_Umu, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, + const RealD _mass, + const RealD _csw_r = 0.0, + const RealD _csw_t = 0.0, + const WilsonAnisotropyCoefficients &clover_anisotropy = WilsonAnisotropyCoefficients(), + const ImplParams &impl_p = ImplParams()); + +Additional paramters allow for anisotropic versions to be created, which take default values for +the isotropic case. + +The constuctor signatures can be found in the header files in qcd/action/fermion/ +A complete list of the 4D ultralocal Fermion types is:: + + WilsonFermion WilsonFermionF; + WilsonFermion WilsonAdjFermionF; + WilsonFermion WilsonTwoIndexSymmetricFermionF; + WilsonFermion WilsonTwoIndexAntiSymmetricFermionF; + WilsonTMFermion WilsonTMFermionF; + WilsonCloverFermion WilsonCloverFermionF; + WilsonCloverFermion WilsonCloverAdjFermionF; + WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionF; + WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionF; + ImprovedStaggeredFermion ImprovedStaggeredFermionF; + +Cayley form chiral fermions (incl. domain wall):: + + DomainWallFermion DomainWallFermionF; + DomainWallEOFAFermion DomainWallEOFAFermionF; + MobiusFermion MobiusFermionF; + MobiusEOFAFermion MobiusEOFAFermionF; + ZMobiusFermion ZMobiusFermionF; + ScaledShamirFermion ScaledShamirFermionF; + MobiusZolotarevFermion MobiusZolotarevFermionF; + ShamirZolotarevFermion ShamirZolotarevFermionF; + OverlapWilsonCayleyTanhFermion OverlapWilsonCayleyTanhFermionF; + OverlapWilsonCayleyZolotarevFermion OverlapWilsonCayleyZolotarevFermionF; + +Continued fraction overlap:: + + OverlapWilsonContFracTanhFermion OverlapWilsonContFracTanhFermionF; + OverlapWilsonContFracZolotarevFermion OverlapWilsonContFracZolotarevFermionF; + + Partial fraction overlap:: + + OverlapWilsonPartialFractionTanhFermion OverlapWilsonPartialFractionTanhFermionF; + OverlapWilsonPartialFractionZolotarevFermion OverlapWilsonPartialFractionZolotarevFermionF; + + Gparity cases; a partial list is defined until tested:: + + WilsonFermion GparityWilsonFermionF; + DomainWallFermion GparityDomainWallFermionF; + DomainWallEOFAFermion GparityDomainWallEOFAFermionF; + + WilsonTMFermion GparityWilsonTMFermionF; + MobiusFermion GparityMobiusFermionF; + MobiusEOFAFermion GparityMobiusEOFAFermionF; + +For each action, the suffix "F" can be replaced with "D" to obtain a double precision version. More generally, +it is possible to perform communications with a different precision from computation. +The number of combinations is rather large to list, but in the above listing the substitution is the +obvious one. + +========== ================ ================== +Suffix Computation Communication +========== ================ ================== +F fp32 fp32 +D fp64 fp64 +R default default +FH fp32 fp16 +DF fp64 fp32 +RL default lower +========== ================ ================== + + Pseudofermion --------------- +Pseudofermion actions are defined in qcd/action/pseudofermion/ . +These action terms are built from template classes:: + + // Base even odd HMC on the normal Mee based schur decomposition. + // + // M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo) + // (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 ) + // + // Determinant is det of middle factor. This assumes Mee is indept of U. + template class SchurDifferentiableOperator ; + + // S = phi^dag (Mdag M)^-1 phi + template class TwoFlavourPseudoFermionAction ; + + // S = phi^dag V (Mdag M)^-1 Vdag phi + template class TwoFlavourRatioPseudoFermionAction ; + + // S = phi^dag (Mdag M)^-1 phi (odd) + // + phi^dag (Mdag M)^-1 phi (even) + template class TwoFlavourEvenOddPseudoFermionAction; + + // S = phi^dag V (Mdag M)^-1 Vdag phi + template class TwoFlavourEvenOddRatioPseudoFermionAction ; + + +Rational HMC pseudofermion terms:: + + // S_f = chi^dag * N(M^dag*M)/D(M^dag*M) * chi + // + // Here, M is some operator + // N and D makeup the rat. poly + template class OneFlavourRationalPseudoFermionAction; + + // S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi + // + // Here P/Q \sim R_{1/4} ~ (V^dagV)^{1/4} + // Here N/D \sim R_{-1/2} ~ (M^dagM)^{-1/2} + template class OneFlavourRatioRationalPseudoFermionAction; + + + // S = phi^dag (Mdag M)^-1/2 phi + template class OneFlavourEvenOddRationalPseudoFermionAction; + + // S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi + // + // Here P/Q \sim R_{1/4} ~ (V^dagV)^{1/4} + // Here N/D \sim R_{-1/2} ~ (M^dagM)^{-1/2} + template class OneFlavourEvenOddRatioRationalPseudoFermionAction; + +The relevant Fermion operators are constructed externally, +and references are passed in to these object constructors. Thus, they work for any Fermion operator and the code +can be shared. For example, one of the constructors is given as:: + + TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator &_NumOp, + FermionOperator &_DenOp, + OperatorFunction & DS, + OperatorFunction & AS) : + + +The exact one flavour algorithm for Domain Wall Fermions is present but is not documented here:: + + #include + HMC =========================================