diff --git a/.gitignore b/.gitignore index 45c15053..f8b7afa9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +# Exclude directories _site .sass-cache .jekyll-metadata +pdf + +# Exclude backup files +*~ diff --git a/_config.yml b/_config.yml index 04f61ea4..fd2e0236 100644 --- a/_config.yml +++ b/_config.yml @@ -9,7 +9,8 @@ locale : "en" title : "GRID" title_separator : "|" -name : "Our team" +name : "University of Edinburgh" +authors : "Peter Boyle, Guido Cossu, Antonin Portelli, Azusa Yamaguchi" description : "Data parallel C++ mathematical object library." url : 'https://paboyle.github.io' # the base hostname & protocol for your site e.g. "https://mmistakes.github.io" baseurl : '/Grid' # the subpath of your site, e.g. "/blog" @@ -79,7 +80,7 @@ analytics: author: name : "PPT Group" avatar : "bio-photo.jpg" - bio : "Our group is amazing." + bio : "Some info" location : "Edinburgh" email : "guido.cossu@ed.ac.uk" uri : @@ -228,4 +229,16 @@ tag_archive: compress_html: clippings: all ignore: - envs: development \ No newline at end of file + envs: development + + +output: web +defaults: + - + scope: + path: "" + type: "pages" + values: + layout: "single" + comments: false + search: true diff --git a/_data/navigation.yml b/_data/navigation.yml index b13d5a34..64c15b11 100644 --- a/_data/navigation.yml +++ b/_data/navigation.yml @@ -1,4 +1,5 @@ # main links links + main: - title: "Documentation" icon: "octicon octicon-book vertical-middle" @@ -15,54 +16,107 @@ main: docs: + + - title: + output: pdf + type: frontmatter + children: + - title: + url: /titlepage.html + output: pdf + type: frontmatter + - title: + url: /tocpage.html + output: pdf + type: frontmatter + + - title: "Introduction" + output: pdf + children: + - title: "GRID" + url: /docs/ + output: pdf + - title: Getting Started + output: pdf children: - title: "Quick-Start Guide" url: /docs/quick-start-guide/ + output: web, pdf - title: "Communication interfaces" url: /docs/comm_interfaces/ + output: web, pdf - title: "Architecture targets" url: /docs/simd_targets/ - - title: "Building on KNL" - url: /docs/knl_build/ + output: web, pdf + - title: "Building on Intel and AMD targets" + url: /docs/general_build/ + output: web, pdf - title: Using GRID + output: pdf children: - title: "Running on KNL" url: /docs/running_knl/ + output: web, pdf - title: Development + output: pdf children: + - title: "Execution model" + url: /docs/execution-model/ + output: web, pdf - title: "Reporting an issue" url: /docs/bug_report/ + output: web, pdf - title: "Continuous integration" url: /docs/travis/ + output: web, pdf - title: API description + output: pdf children: - - title: "Structure" - url: /docs/structure/ - disable: "yes" - - title: "SIMD classes" - url: /docs/simd/ - disable: "yes" - - title: "Grid classes" - url: /docs/grid/ - disable: "yes" + - title: "Introduction" + url: /docs/API/introduction.html + output: web, pdf + disable: "no" - title: "Tensor classes" - url: /docs/tensor/ - disable: "yes" + url: /docs/API/tensor_classes.html + output: web, pdf + disable: "no" + - title: "Vectorisation" + url: /docs/API/vectorisation.html + output: web, pdf + disable: "no" + - title: "Coordinates" + url: /docs/API/coordinates.html + output: web, pdf + disable: "no" + - title: "Grids" + url: /docs/API/grids.html + output: web, pdf + disable: "no" + - title: "Lattice containers" + url: /docs/API/lattice_containers.html + output: web, pdf + disable: "no" + - title: "Random number generators" + url: /docs/API/random_number_generators.html + output: web, pdf + disable: "no" - title: "Lattice classes" url: /docs/lattice/ disable: "yes" - title: "Lattice theories" + output: pdf children: - title : "Theories" url: /docs/theories/ disable: "yes" + output: web - title: "HMC" url: /docs/theories/hmc/ disable: "no" - \ No newline at end of file + output: web,pdf diff --git a/_includes/head_print.html b/_includes/head_print.html new file mode 100644 index 00000000..46b543f7 --- /dev/null +++ b/_includes/head_print.html @@ -0,0 +1,38 @@ +{% include base_path %} + + + + + + +{% if page.homepage == true %} {{site.title}} {% elsif page.title %}{{ page.title }}{% endif %} + + + + + + + + + + + + + + + + + diff --git a/_layouts/default_print.html b/_layouts/default_print.html new file mode 100644 index 00000000..f3345f6d --- /dev/null +++ b/_layouts/default_print.html @@ -0,0 +1,19 @@ + + + + + {% include head_print.html %} + + + + +
+ +
+ {{content}} +
+
+ + + \ No newline at end of file diff --git a/_layouts/page_print.html b/_layouts/page_print.html new file mode 100644 index 00000000..40527ca2 --- /dev/null +++ b/_layouts/page_print.html @@ -0,0 +1,18 @@ +--- +layout: default_print +comments: false +--- + + + +
+

{{ page.excerpt }}

+
+ +
+ + {% if page.summary %} +
{{page.summary}}
+ {% endif %} + {{ content }} +
diff --git a/_layouts/single.html b/_layouts/single.html index 54a18f0d..9a1405c0 100644 --- a/_layouts/single.html +++ b/_layouts/single.html @@ -14,6 +14,11 @@ layout: default {% endunless %} {% endif %} + + +
{% include sidebar.html %} diff --git a/_pages/docs/API/coordinates.md b/_pages/docs/API/coordinates.md new file mode 100644 index 00000000..832adf8e --- /dev/null +++ b/_pages/docs/API/coordinates.md @@ -0,0 +1,34 @@ +--- +title : "API Documentation" +author_profile: false +excerpt: "Coordinates" +header: + overlay_color: "#5DADE2" +permalink: /docs/API/coordinates.html +sidebar: + nav : docs +--- + +The Grid is define on a N-dimensional set of integer coordinates. + +The maximum dimension is eight, and indexes in this space make use of the `Coordinate` class. +The coordinate class shares a similar interface to `std::vector`, but contains all data within the +object, and has a fixed maximum length (template parameter). + +**Example**: + +```c++ +const int Nd=4; +Coordinate point(Nd); + +for(int i=0;i using iSpinMatrix = iScalar, Ns> >; + typedef iSpinMatrix SpinMatrixF; //scalar + typedef iSpinMatrix vSpinMatrixF;//vectorised + typedef Lattice LatticeSpinMatrixF; +``` + +The full range of QCD relevant lattice objects is given below. + +|-----------|------------|----------|-----------|---------------|---------------------------------|--------------------------| +| Lattice | Lorentz | Spin | Colour | scalar_type | Field | Synonym | +|-----------|------------|----------|-----------|---------------|---------------------------------|--------------------------| +|`Vector` | `Scalar` | `Scalar` | `Scalar` | `RealD` | `LatticeRealD` | N/A | +|`Vector` | `Scalar` | `Scalar` | `Scalar` | `ComplexD` | `LatticeComplexD` | N/A | +|`Vector` | `Scalar` | `Scalar` | `Matrix` | `ComplexD` | `LatticeColourMatrixD` | `LatticeGaugeLink` | +|`Vector` | `Vector` | `Scalar` | `Matrix` | `ComplexD` | `LatticeLorentzColourMatrixD` | `LatticeGaugeFieldD` | +|`Vector` | `Scalar` | `Vector` | `Vector` | `ComplexD` | `LatticeSpinColourVectorD` | `LatticeFermionD` | +|`Vector` | `Scalar` | `Vector` | `Vector` | `ComplexD` | `LatticeHalfSpinColourVectorD` | `LatticeHalfFermionD` | +|`Vector` | `Scalar` | `Matrix` | `Matrix` | `ComplexD` | `LatticeSpinColourMatrixD` | `LatticePropagatorD` | +|-----------|------------|----------|-----------|---------------|---------------------------------|--------------------------| + +Additional single precison variants are defined with the suffix `F`. +Other lattice objects can be defined using the sort of typedef's shown above if needed. + +### Opaque containers + +The layout within the container is complicated to enable maximum opportunity for vectorisation, and +is opaque from the point of view of the API definition. The key implementation observation is that +so long as data parallel operations are performed and adjacent SIMD lanes correspond to well separated +lattice sites, then identical operations are performed on all SIMD lanes and enable good vectorisation. + +Because the layout is opaque, import and export routines from naturally ordered x,y,z,t arrays +are provided (`lib/lattice/Lattice_transfer.h`): + +```c++ + unvectorizeToLexOrdArray(std::vector &out, const Lattice &in); + vectorizeFromLexOrdArray(std::vector &in , Lattice &out); +``` + +The Lexicographic order of data in the external vector fields is defined by (`lib/util/Lexicographic.h`): + +```c++ + Lexicographic::IndexFromCoor(const Coordinate &lcoor, int &lex,Coordinate *local_dims); +``` + +This ordering is $$x + L_x * y + L_x*L_y*z + L_x*L_y*L_z *t$$ + +Peek and poke routines are provided to perform single site operations. These operations are +extremely low performance and are not intended for algorithm development or performance critical code. + +The following are "collective" operations and involve communication between nodes. All nodes receive the same +result by broadcast from the owning node: + +```c++ + void peekSite(sobj &s,const Lattice &l,const Coordinate &site); + void pokeSite(const sobj &s,Lattice &l,const Coordinate &site); +``` + +The following are executed independently by each node: + +```c++ + void peekLocalSite(sobj &s,const Lattice &l,Coordinate &site); + void pokeLocalSite(const sobj &s,Lattice &l,Coordinate &site); +``` + +Lattices of one tensor type may be transformed into lattices of another tensor type by +peeking and poking specific indices in a data parallel manner: + +```c++ + template // Vector data parallel index peek + auto PeekIndex(const Lattice &lhs,int i); + + template // Matrix data parallel index peek + auto PeekIndex(const Lattice &lhs,int i,int j); + + template // Vector poke + void PokeIndex(Lattice &lhs,const Lattice<> & rhs,int i) + + template // Matrix poke + void PokeIndex(Lattice &lhs,const Lattice<> & rhs,int i,int j) +``` + +The inconsistent capitalisation on the letter P is due to an obscure bug in g++ that has not to +our knowledge been fixed in any version. The bug was reported in 2016. + +### Global Reduction operations + +Reduction operations for any lattice field are provided. The result is identical on each computing node +that is part of the relevant Grid communicator: + +```c++ + template + RealD norm2(const Lattice &arg); + + template + ComplexD innerProduct(const Lattice &left,const Lattice &right); + + template + vobj sum(const Lattice &arg) +``` + +### Site local reduction operations + +Internal indices may be reduced, site by site, using the following routines: + +```c++ + template + auto localNorm2 (const Lattice &rhs) + + template + auto localInnerProduct (const Lattice &lhs,const Lattice &rhs) +``` + +### Outer product + +A site local outer product is defined: + +```c++ + template + auto outerProduct (const Lattice &lhs,const Lattice &rhs) +``` + +### Slice operations + +Slice operations are defined to operate on one lower dimension than the full lattice. The omitted dimension +is the parameter orthogdim: + +```c++ + template + void sliceSum(const Lattice &Data, + std::vector &result, + int orthogdim); + + template + void sliceInnerProductVector( std::vector & result, + const Lattice &lhs, + const Lattice &rhs, + int orthogdim); + + template + void sliceNorm (std::vector &sn, + const Lattice &rhs, + int orthogdim); +``` + +### Data parallel expression template engine + +The standard arithmetic operators and some data parallel library functions are implemented site by site +on lattice types. + +Operations may only ever combine lattice objects that have been constructed from the **same** grid pointer. + +**Example**: + +```c++ + LatticeFermionD A(&grid); + LatticeFermionD B(&grid); + LatticeFermionD C(&grid); + + A = B - C; +``` + +Such operations are said to be **conformable** and are the lattice are guaranteed to have the same dimensions +and both MPI and SIMD decomposition because they are based on the same grid object. The conformability check +is lightweight and simply requires the same grid pointers be passed to the lattice objects. The data members +of the grid objects are not compared. + +Conformable lattice fields may be combined with appropriate scalar types in expressions. The implemented +rules follow those already documented for the tensor types. + + + + +### Unary operators and functions + +The following sitewise unary operations are defined: + +|-----------------------|---------------------------------------------| +| Operation | Description | +|-----------------------|---------------------------------------------| +|`operator-` | negate | +|`adj` | Hermitian conjugate | +|`conjugate` | complex conjugate | +|`trace` | sitewise trace | +|`transpose` | sitewise transpose | +|`Ta` | take traceles anti Hermitian part | +|`ProjectOnGroup` | reunitarise or orthogonalise | +|`real` | take the real part | +|`imag` | take the imaginary part | +|`toReal` | demote complex to real | +|`toComplex` | promote real to complex | +|`timesI` | elementwise +i mult (0 is not multiplied) | +|`timesMinusI` | elementwise -i mult (0 is not multiplied) | +|`abs` | elementwise absolute value | +|`sqrt` | elementwise square root | +|`rsqrt` | elementwise reciprocal square root | +|`sin` | elementwise sine | +|`cos` | elementwise cosine | +|`asin` | elementwise inverse sine | +|`acos` | elementwise inverse cosine | +|`log` | elementwise logarithm | +|`exp` | elementwise exponentiation | +|`operator!` | Logical negation of integer field | +|`Not` | Logical negation of integer field | +|-----------------------|---------------------------------------------| + +The following sitewise applied functions with additional parameters are: + +```c++ + template Lattice pow(const Lattice &rhs_i,RealD y); + + template Lattice mod(const Lattice &rhs_i,Integer y); + + template Lattice div(const Lattice &rhs_i,Integer y); + + template Lattice + expMat(const Lattice &rhs_i, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP); +``` + +### Binary operators + +The following binary operators are defined: + +``` + operator+ + operator- + operator* + operator/ +``` + +Logical are defined on LatticeInteger types: + +``` + operator& + operator| + operator&& + operator|| +``` + +### Ternary operator, logical operatons and where + +Within the data parallel level of the API the only way to perform operations +that are differentiated between sites is use predicated execution. + +The predicate takes the form of a `LatticeInteger` which is confromable with both +the `iftrue` and `iffalse` argument: + +```c++ + template void where(const Lattice &pred, + Lattice &iftrue, + Lattice &iffalse); +``` +This plays the data parallel analogue of the C++ ternary operator: + +```c++ + a = b ? c : d; +``` + +In order to create the predicate in a coordinate dependent fashion it is often useful +to use the lattice coordinates. + +The `LatticeCoordinate` function: + +```c++ + template LatticeCoordinate(Lattice &coor,int dir); +``` + +fills an `Integer` field with the coordinate in the N-th dimension. +A usage example is given + +**Example**: + +```c++ + int dir =3; + int block=4; + LatticeInteger coor(FineGrid); + + LatticeCoordinate(coor,dir); + + result = where(mod(coor,block)==(block-1),x,z); +``` + +(Other usage cases of LatticeCoordinate include the generation of plane wave momentum phases.) + +### Site local fused operations + +The biggest limitation of expression template engines is that the optimisation +visibility is a single assignment statement in the original source code. + +There is no scope for loop fusion between multiple statements. +Multi-loop fusion gives scope for greater cache locality. + +Two primitives for hardware aware parallel loops are provided. +These will operate directly on the site objects which are expanded by a factor +of the vector length (in our struct of array datatypes). + +Since the mapping of sites +to data lanes is opaque, these vectorised loops +are *only* appropriate for optimisation of site local operations. + +### View objects + +Due to an obscure aspect of the way that Nvidia handle device C++11 lambda functions, +it is necessary to disable the indexing of a Lattice object. + +Rather, a reference to a lattice object must be first obtained. + +The reference is copyable to a GPU, and is able to be indexed on either accelerator code, +or by host code. + +In order to prevent people developing code that dereferences Lattice objects in a way that +works on CPU compilation, but fails on GPU compilation, we have decided to remove the ability +to index a lattice object on CPU code. + +As a result of Nvidia's constraints, all accesses to lattice objects are required to be made +through a View object. + +In the following, the type is `LatticeView`, however it is wise to use the C++11 auto keyword +to avoid naming the type. See code examples below. + + +### thread_loops + +The first parallel primitive is the thread_loop + +**Example**: + +```c++ + LatticeField r(grid); + LatticeField x(grid); + LatticeField p(grid); + LatticeField mmp(grid); + auto r_v = r.View(); + auto x_v = x.View(); + auto p_v = p.View(); + auto mmp_v = mmp.View(); + thread_loop(s , r_v, { + r_v[s] = r_v[s] - a * mmp_v[s]; + x_v[s] = x_v[s] + a*p_v[s]; + p_v[s] = p_v[s]*b + r_v[s]; + }); +``` + +### accelerator_loops + +The second parallel primitive is an accelerated_loop + +**Example**: + +```c++ + LatticeField r(grid); + LatticeField x(grid); + LatticeField p(grid); + LatticeField mmp(grid); + auto r_v = r.View(); + auto x_v = x.View(); + auto p_v = p.View(); + auto mmp_v = mmp.View(); + accelerator_loop(s , r_v, { + r_v[s] = r_v[s] - a * mmp_v[s]; + x_v[s] = x_v[s] + a*p_v[s]; + p_v[s] = p_v[s]*b + r_v[s]; + }); +``` + + +### Cshift + +Site shifting operations are provided using the Cshift function: + +```c++ + template + Lattice Cshift(const Lattice &rhs,int dimension,int shift) +``` + +This shifts the whole vector by any distance shift in the appropriate dimension. + +For the avoidance of doubt on direction conventions,a positive shift moves the +lattice site $$x_mu = 1$$ in the rhs to $$x_mu = 0$$ in the result. + +**Example** (`benchmarks/Benchmark_wilson.cc`): + +```c++ + { // Naive wilson implementation + ref = Zero(); + for(int mu=0;mu + Lattice CovShiftForward(const Lattice &Link, int mu, + const Lattice &field); + + template + Lattice CovShiftBackward(const Lattice &Link, int mu, + const Lattice &field); +``` + +### Boundary conditions + +The covariant shift routines occur in namespaces PeriodicBC and ConjugateBC. The correct covariant shift +for the boundary condition is passed into the gauge actions and wilson loops via an +"Impl" template policy class. + +The relevant staples, plaquettes, and loops are formed by using the provided method: + +```c++ + Impl::CovShiftForward + Impl::CovShiftBackward +``` + +etc... This makes physics code transform appropriately with externally supplied rules about +treating the boundary. + +**Example** (`lib/qcd/util/WilsonLoops.h`): + +```c++ + static void dirPlaquette(GaugeMat &plaq, const std::vector &U, + const int mu, const int nu) { + // ___ + //| | + //|<__| + plaq = Gimpl::CovShiftForward(U[mu],mu, + Gimpl::CovShiftForward(U[nu],nu, + Gimpl::CovShiftBackward(U[mu],mu, + Gimpl::CovShiftIdentityBackward(U[nu], nu)))); + } +``` + +### Inter-grid transfer operations + +Transferring between different checkerboards of the same global lattice: + +```c++ + template void pickCheckerboard(int cb,Lattice &half,const Lattice &full); + template void setCheckerboard(Lattice &full,const Lattice &half); +``` + +These are used to set up Schur red-black decomposed solvers, for example. + +Multi-grid projection between a fine and coarse grid: + +```c++ + template + void blockProject(Lattice > &coarseData, + const Lattice &fineData, + const std::vector > &Basis); +``` + +Multi-grid promotion to a finer grid: + +```c++ + template + void blockPromote(const Lattice > &coarseData, + Lattice &fineData, + const std::vector > &Basis) +``` + +Support for sub-block Linear algebra: + +```c++ + template + void blockZAXPY(Lattice &fineZ, + const Lattice &coarseA, + const Lattice &fineX, + const Lattice &fineY) + + template + void blockInnerProduct(Lattice &CoarseInner, + const Lattice &fineX, + const Lattice &fineY) + + template + void blockNormalise(Lattice &ip,Lattice &fineX) + + template + void blockSum(Lattice &coarseData,const Lattice &fineData) + + template + void blockOrthogonalise(Lattice &ip,std::vector > &Basis) +``` + +Conversion between different SIMD layouts: + +```c++ + template + void localConvert(const Lattice &in,Lattice &out) +``` + +Slices between grid of dimension N and grid of dimentions N+1: + +```c++ + template + void InsertSlice(const Lattice &lowDim,Lattice & higherDim,int slice, int orthog) + + template + void ExtractSlice(Lattice &lowDim,const Lattice & higherDim,int slice, int orthog) +``` + +Growing a lattice by a multiple factor, with periodic replication: + +```c++ + template + void Replicate(Lattice &coarse,Lattice & fine) +``` + +That latter is useful to, for example, pre-thermalise a smaller volume and then grow the volume in HMC. +It was written while debugging G-parity boundary conditions. diff --git a/_pages/docs/API/random_number_generators.md b/_pages/docs/API/random_number_generators.md new file mode 100644 index 00000000..b6b4a18a --- /dev/null +++ b/_pages/docs/API/random_number_generators.md @@ -0,0 +1,107 @@ +--- +title : "API Documentation" +author_profile: false +excerpt: "Random number generators" +header: + overlay_color: "#5DADE2" +permalink: /docs/API/random_number_generators.html +sidebar: + nav : docs +--- + +Grid provides three configure time options for random the number generator engine. + +* `sitmo` +* `ranlux48` +* `mt19937` + +The selection is controlled by the `--enable-rng=