1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 13:57:07 +01:00

Compare commits

..

608 Commits

Author SHA1 Message Date
ac1d655de8 Const correctness patch 2018-11-19 10:38:36 +00:00
3023287fd9 Hadrons: 3-index RO access to Eigen disk vector 2018-10-16 14:44:14 +01:00
b3d6805638 Merge branch 'feature/contractor' into develop 2018-10-16 11:29:37 +01:00
291bc2a1f0 IO benchmark on a list of directories 2018-10-15 17:25:08 +01:00
2f368c33fc Hadrons: copyright update 2018-10-15 15:51:45 +01:00
9592115341 Hadrons: NPR and gauge fixing linking fix 2018-10-15 15:49:42 +01:00
24c07694bc Mixed precision now supported in MADWF 2018-10-14 00:22:52 +01:00
f0229025e2 MADWF working across a range of actions 2018-10-13 19:55:03 +01:00
6de9a45a09 NPR first cut by Julia Kettle 2018-10-12 11:00:58 +01:00
03c3d495a2 First cut (non functional NPR code) developed by Julia Kettle 2018-10-12 10:59:33 +01:00
49f25e08e8 PauliVillars based 4D -> 5D reconstruction with Fourier Accelerated PV inverse
by Christoph. Differs from the one by Rudy in BFM since it vectorises the twisted
4D solves in pairs.
2018-10-11 12:35:32 +01:00
efc0c65056 Hadrons: DiskVector Eigen specialisation with binary I/O and sha256 correctness check 2018-10-08 19:02:00 +01:00
936eaac8e1 function to get the sha256 string 2018-10-08 19:00:50 +01:00
fe6a372f75 Hadrons: fixes and cleaning in the scalar SU(N) part 2018-10-08 15:14:08 +01:00
148fc052bd Hadrons: Aslash field, tested 2018-10-05 21:04:10 +01:00
c073341a10 Hadrons: more cleaning 2018-10-05 19:50:41 +01:00
78299daaac Hadrons: code cleaning 2018-10-05 16:47:52 +01:00
866449c804 Hadrons: integration of Peter's A2Autils 2018-10-05 16:42:44 +01:00
d69a52079f Merge remote-tracking branch 'gh/feature/a2a-integration' into feature/aslashfield 2018-10-05 15:39:09 +01:00
9f4f8a14a3 Hadrons: code cleaning 2018-10-05 15:38:01 +01:00
f6593dc881 Hadrons: A2A block performance counter fix 2018-10-05 15:11:01 +01:00
b46d31d4b6 MKL enable on Eigen if Grid is configured to use MKL 2018-10-05 11:29:40 +01:00
58567fc650 Hadrons: big update abstracting the block meson field routine, tested & working, performance counters broken and code dirty 2018-10-04 20:01:49 +01:00
7c57cac670 Adding A2A utils class for containing kernels. 2018-10-04 18:57:41 +01:00
d0b21bf1ff Merge branch 'feature/eigenpack-convert' into develop 2018-10-04 18:26:45 +01:00
a1825d1f59 Hadrons: final fix for multiprec eigenpacks 2018-10-04 18:25:26 +01:00
5a3e83ff7b Hadrons: new layer in eigenpacks class hierarchy 2018-10-03 14:45:01 +01:00
52569d98d8 Hadrons: multiprec eigenpack I/O fix 2018-10-03 14:24:43 +01:00
b351103c29 Hadrons: eigenpack load module with 32bit I/O 2018-10-02 21:07:56 +01:00
118cca4681 Hadrons: linking fix 2018-10-02 20:08:49 +01:00
44de727cd2 Hadrons: eigenpack support for multiprecision I/O 2018-10-02 19:51:09 +01:00
888ebc3cf9 Hadrons: better name for the EP converter 2018-10-02 15:22:18 +01:00
6c031a1b81 Merge branch 'feature/eigenpack-convert' into develop 2018-10-02 14:57:30 +01:00
02aa4bd762 Hadrons: cleaner eigenpack convert log 2018-10-02 13:43:25 +01:00
9aafa8ee60 Hadrons: eigenpack converter generalised for RB/5d grids 2018-10-02 13:34:17 +01:00
430b98b354 fix previous commit 2018-10-02 13:12:46 +01:00
84189867ef Hadrons: eigenpack converter with RB grids (to be generalised) 2018-10-02 13:05:05 +01:00
4ab8cfbe2a Hadrons: more verbose eigenpack convert 2018-10-02 12:24:45 +01:00
aadd9f4468 Eigenpack converter, to be tested, HadronsXmlRun moved to Utilities directory 2018-10-02 00:02:34 +01:00
8fbb27ce13 Hadrons: less code duplication in eigenpack IO 2018-10-01 20:15:21 +01:00
21bba95909 Hadrons: eigenpack metadata is no ignored anymore when reading 2018-10-01 19:33:45 +01:00
6448fe7121 More flexible XML control in Lime files 2018-10-01 19:32:50 +01:00
2458a11d1d Hadrons: precision cast module 2018-09-29 18:00:08 +01:00
d0ca7c3fe6 Hadrons: big update for getGrid, grids are now created automatically 2018-09-29 17:55:19 +01:00
57f899d79c Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-09-29 15:50:59 +01:00
e881a0c157 Merge commit 'beed527ea37c90fd5e19b82d326eb8adc8eba5ff' into develop 2018-09-29 15:50:21 +01:00
f411657118 JSON update 2018-09-29 15:48:05 +01:00
7458c6174b Use operator() for indexing internal indices 2018-09-27 06:42:02 +01:00
21b269d0f9 Move the Grid.pdf out of a deep directory 2018-09-27 06:36:25 +01:00
083af92ac2 Update from chulwoo ; high level link for Grid.pdf in documentation 2018-09-27 06:30:40 +01:00
2c162577b5 HMC documentation 2018-09-25 23:28:17 +01:00
b1c4e96382 Updates to actions etc.. 2018-09-24 22:10:30 +01:00
a55c6f34f3 Updated docs 2018-09-24 15:44:35 +01:00
beed527ea3 Carletons chapter 2018-09-24 15:09:51 +01:00
eaa633cf69 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-09-21 18:16:22 +01:00
c632455129 Hadrons: meson field IO fix 2018-09-21 18:16:01 +01:00
c012899ed5 Hadrons: big update after templating of get/createGrid 2018-09-21 18:15:33 +01:00
8bab544c2f Updated manual pdf 2018-09-20 18:51:11 +01:00
76fc06a5dc Updates with todo from Carleton 2018-09-20 18:50:11 +01:00
4af6c7e7aa Hadrons: copyright update 2018-09-14 12:51:48 +01:00
f60fbcfc4d Hadrons: mixed precision CG, to be tested 2018-09-14 12:47:55 +01:00
464c81706e Hadrons: defaults Impls for different precisions 2018-09-14 12:46:43 +01:00
408130b808 Hadrons: header list fix 2018-09-10 17:38:54 +01:00
375edd1370 file forgotten in last commit 2018-09-10 17:37:29 +01:00
6d912f6c67 Hadrons: general guesser factory 2018-09-10 17:36:54 +01:00
6d1d28955e Guesser class is redundant, switching to LinearFunction 2018-09-10 17:35:54 +01:00
920b471761 Hadrons tests update 2018-09-10 15:32:13 +01:00
63c21767ba Hadrons: grids stored with hash of SIMD type (for mixed-precision setups) 2018-09-10 15:31:39 +01:00
7b6b712565 function to convert std::vector to string 2018-09-10 15:17:32 +01:00
35abd05ee9 mute Version.h cache creation 2018-09-10 15:16:59 +01:00
dd36e60f6a compilation fix for hypercube optimal communicator 2018-09-08 18:07:29 +01:00
cb6c548e21 Hadrons: code cleaning 2018-09-07 20:40:55 +01:00
02c4ccf621 Hadrons: diskvector debug message for writes 2018-09-07 20:33:49 +01:00
fd24588212 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-09-07 20:25:11 +01:00
b800bb3ecb Hadrons: disk vector cache policy to last touch 2018-09-07 20:24:48 +01:00
f8abd0978b Hadrons copyright update 2018-09-07 20:10:07 +01:00
12c7c493bf Hadrons: disk-based container 2018-09-07 20:04:54 +01:00
c7c9072313 Documentation 2018-09-06 16:01:42 +01:00
2bf3be5fae Hadrons: copyright and code cleaning 2018-09-04 18:25:10 +01:00
3a40e4fc69 Hadrons: scalar SU(N) 2-pt guard against negative momenta components 2018-09-04 18:24:07 +01:00
2e69e03f6f Hadrons: CosmHol configs IO module 2018-09-04 18:23:28 +01:00
a09f9bb528 Hadrons: code cleaning 2018-09-04 18:22:21 +01:00
f0e341d726 Hadrons: module list generator fix 2018-09-04 18:22:04 +01:00
6f09df0daf Hadrons: A2A matrix IO fix 2018-09-02 01:46:22 +01:00
26cee605b8 Hadrons: copyright update 2018-09-01 21:30:30 +01:00
b3fa18c229 copyright script never removes authorship 2018-09-01 21:29:58 +01:00
2940c9bcfd Hadrons: dedicated IO class for A2A matrices 2018-09-01 21:09:01 +01:00
0bb532f72b more explicit clean git tree message 2018-09-01 20:02:18 +01:00
fada2aa0f7 Hadrons: precision fix 2018-09-01 20:00:12 +01:00
c193e4e675 Aslash expression in Mathematica notebook 2018-09-01 19:59:58 +01:00
3ee682f676 more Version.h fine tuning 2018-09-01 19:58:16 +01:00
d85ec3bac2 build system minor fix 2018-09-01 19:54:21 +01:00
b52d8eb1e3 better Version.h implementation 2018-09-01 19:49:13 +01:00
ee630d2e8b Hadrons: smearing plaquette output 2018-09-01 17:38:32 +01:00
2f0af79869 Hadrons: scalar SU(N) NPR update 2018-09-01 17:36:35 +01:00
1b7fb79ec0 CI fix 2018-08-28 18:26:37 +01:00
2db1a4628c build system minor fix 2018-08-28 18:26:30 +01:00
6aa047d842 Hadrons module template fix 2018-08-28 17:17:00 +01:00
8779c32ae1 Merge branch 'feature/hadrons' into develop 2018-08-28 17:10:33 +01:00
c527dc3358 CI fix 2018-08-28 17:10:08 +01:00
6b42577b6b gitignore update 2018-08-28 16:58:37 +01:00
fb3596f968 Hadrons: precision fixes 2018-08-28 16:58:23 +01:00
f3a0158213 code cleaning 2018-08-28 16:56:07 +01:00
0250aa9347 file committed in error 2018-08-28 16:55:48 +01:00
3df6743396 more build system cleaning and patch for bad include in Eigen 2018-08-28 16:54:57 +01:00
fb7d021b9d Hadrons: moving Hadrons to root directory, build system improvements 2018-08-28 15:00:40 +01:00
5f206df775 Hadrons: meson field cache friendly cache copy 2018-08-15 17:29:44 +01:00
7727e81113 Hadrons: slight improvement on previous commit 2018-08-14 20:18:47 +01:00
c4115544a5 Hadrons: application option to save graph 2018-08-14 20:03:53 +01:00
08c47328ba Hadrons: meson field kernel performance for each block 2018-08-14 17:35:42 +01:00
09001aedca Hadrons: meson fields saved in single precision 2018-08-14 17:19:38 +01:00
2c67304716 Hadrons: meson field code cleaning 2018-08-14 17:00:05 +01:00
dc6d8686de Hadrons: meson field chunked HDF5 IO 2018-08-14 16:40:29 +01:00
cc2780bea3 Hadrons: meson field parallel IO 2018-08-14 14:55:13 +01:00
6e5a2b7922 fix previous commit 2018-08-14 14:07:54 +01:00
f4878d3a13 Hadrons: meson field threaded cache copy 2018-08-14 14:02:37 +01:00
89d2fac92e Hadrons: copyright update 2018-08-14 12:19:14 +01:00
f2d3e41cf2 Hadrons: meson field: HDF5 perf, gamma input and Eigen tensors allocated by Grid 2018-08-13 20:18:33 +01:00
3c27bb36d4 Hadrons: direct timer access 2018-08-13 20:17:45 +01:00
603d59f389 Hadrons: code cleaning 2018-08-13 20:17:24 +01:00
07a0ef3f95 Hadrons: global measurement time profile 2018-08-13 16:44:57 +01:00
503259f9c9 Hadrons: meson field HDF5 IO done and tested 2018-08-12 16:52:12 +01:00
5be6a51044 Hadrons: meson fields code cleaning and momentum phases 2018-08-11 15:13:43 +01:00
ac69f042b1 Hadrons: module RNG uniquely seeded with <run id> + <module name> + <trajectory> 2018-08-10 18:27:00 +01:00
133d5c2e34 Merge branch 'develop' into feature/hadrons 2018-08-10 16:36:40 +01:00
2a94244890 configure: --with-openssl option and LIME is now mandatory 2018-08-10 16:36:11 +01:00
a15a2dfd29 Merge branch 'develop' into feature/hadrons 2018-08-10 16:08:22 +01:00
093bb02633 Hadrons: execute message for time diluted noise 2018-08-10 16:07:48 +01:00
99a85116f8 Hadrons: module and VM instrumentation 2018-08-10 16:07:30 +01:00
27cdb79063 Sha used to seed from a unique string 2018-08-10 15:11:01 +01:00
f4cbfd63ff Hadrons: more meson field cleaning, needs IO now 2018-08-09 18:39:58 +01:00
2b794b6aa7 Hadrons: module generating random lattices for testing purposes 2018-08-09 17:16:42 +01:00
d0244a059f Hadrons: cleaning cleaning... 2018-08-09 00:38:17 +01:00
dcdd891d7d Hadrons: precision fix 2018-08-09 00:13:53 +01:00
6d2df9de79 Hadrons: even more cleaning 2018-08-08 23:15:55 +01:00
41d4e37bae Hadrons: more cleaning 2018-08-08 19:04:44 +01:00
ee5c0cc9b6 Hadrons: code cleaning 2018-08-08 18:45:06 +01:00
0a4020eb4d Hadrons: copyright fix 2018-08-07 18:42:52 +01:00
b2de26589b Hadrons: code cleaning and copyright update 2018-08-07 18:40:48 +01:00
0677adb4dd Hadrons: overhaul of A2A for production 2018-08-07 18:27:59 +01:00
231cc95be6 Hadrons: eigenvalues precision fix 2018-08-07 18:27:19 +01:00
639f9cab82 Hadrons: schedule loading fix 2018-08-07 18:26:49 +01:00
4eac4e575e Hadrons: meson fields indentation fix 2018-08-06 12:42:25 +01:00
3f0f92cda6 Hadrons: first cleaning/integration of A2A/meson fields 2018-08-06 12:11:52 +01:00
d2650e89bd Hadrons: VM exception for object type (solves infinite loop in scheduler) 2018-08-06 12:11:00 +01:00
2962123cba Hadrons: diluted noise polish 2018-08-05 01:44:37 +01:00
830168ec37 Hadrons: first try at diluted noise class (tested) 2018-08-04 12:32:58 +01:00
584c921ca0 Eigen support fix (use of Grid as a library was broken) 2018-08-03 21:07:58 +01:00
81347b4d16 gitignore update 2018-08-03 19:58:52 +01:00
2cfa0b0e6b Merge pull request #174 from fionnoh/a2a_basics
A2A basics
2018-08-03 16:32:14 +01:00
fa5dee76b1 Included Peter's A2AMeson field and Eigen changes 2018-08-03 15:15:54 +01:00
8d1679c6b8 Merge branch 'feature/hadrons-a2a' of https://github.com/paboyle/Grid into a2a_basics 2018-08-03 15:12:24 +01:00
3791a38f7c Optimised the MesonField a bit more 2018-08-01 08:27:27 +01:00
142f7b0c86 Updated the A2A Meson Field module 2018-07-31 15:58:02 +01:00
891ad66eab Included changes to Hadrons RBPrecCG solver needed for subtraction of guess 2018-07-31 11:26:07 +01:00
60c43151c5 Merge branch 'feature/hadrons-a2a' of https://github.com/paboyle/Grid into feature/hadrons-a2a 2018-07-31 01:09:02 +01:00
e036800261 Eigen fix 2018-07-31 01:08:42 +01:00
62900def36 Merge branch 'feature/hadrons-a2a' of https://github.com/paboyle/Grid into feature/hadrons-a2a 2018-07-31 00:36:26 +01:00
e3a309a73f Eigen happiness 2018-07-31 00:35:17 +01:00
ad6c1c0c4e The basics of what is needed in Grid and Hadrons for the A2A class and module, with none of the contraction or MF code. 2018-07-30 18:40:50 +01:00
00b92a91b5 Optimising 2018-07-28 23:46:22 +01:00
65533741f7 7 moms 2018-07-28 16:17:47 +01:00
dc0259fbda Merge pull request #173 from fionnoh/feature/hadrons-a2a
Changes to meson field benchmark. Now includes the gammas in the fina…
2018-07-27 23:03:56 +01:00
131a6785d4 Merge branch 'feature/hadrons-a2a' into feature/hadrons-a2a 2018-07-27 23:03:42 +01:00
44f4f5c8e2 Momentum loop 2018-07-27 23:00:16 +01:00
2679df034f Changes to meson field benchmark. Now includes the gammas in the final part of the naive method, both methods compute
lhs^dag*Gamma*rhs (previously Gamma*lhs^dag*rhs), and checks results.
2018-07-27 18:31:10 +01:00
bf71162b97 Hadrons: backtrace on abort 2018-07-26 19:20:12 +01:00
299e828d83 Merge branch 'develop' into feature/hadrons 2018-07-26 16:49:49 +01:00
ef5452cddf Hadrons: smarter memory profiler 2018-07-26 16:47:45 +01:00
80de748737 Hadrons: new exceptions which can save a integer 2018-07-26 16:47:25 +01:00
71e1006ba8 Updated meson field benchmark for dirac structures 2018-07-26 09:09:29 +01:00
00f31ae83f Merge pull request #163 from goracle/unstaged
Add printing of whether there are unstaged changes in the git hash print
2018-07-25 19:00:00 +00:00
cce339deaf Merge pull request #172 from fionnoh/feature/hadrons
feature/hadrons -> feature/hadrons-a2a
2018-07-25 17:20:19 +00:00
24128ff109 Changes needed for MF benchmark to work with comms correctly 2018-07-23 15:51:37 +01:00
34e9d3f0ca Moved the creation and resizing of the v and w high modes from the A2A class to the A2A module and made them an output of the module. This means that they have to be inputs of the contration modules and they will freed from memory if they are no longer needed. 2018-07-22 14:40:31 +01:00
c995788259 Added ImportUnphysicalFermion and included appropriate logic for 5d w vectors in A2A code 2018-07-21 00:08:11 +01:00
94c7198001 Added ZFIMPL to A2AMeson contraction 2018-07-20 23:08:22 +01:00
04d86fe9f3 Removed overly verbose print statement 2018-07-20 21:38:19 +01:00
b78074b6a0 Removed a Dminus from high mode v and removed duplication pf D_oo code 2018-07-20 16:55:24 +01:00
7dfd3cdae8 Inclusion of ExportPhysicalFermionSource that fixes a bug in the low mode w vectors 2018-07-20 15:45:43 +01:00
cecee1ef2c Merge branch 'develop' of github.com:paboyle/Grid into feature/hadrons 2018-07-20 13:37:50 +01:00
355d4b58be Merge branch 'feature/hadrons' of github.com:fionnoh/Grid into feature/hadrons 2018-07-19 16:07:54 +01:00
2c54a536f3 Moved the meson field inner product to its own header file 2018-07-19 15:56:52 +01:00
d868a45120 Cleaned up some stuff that was erroneously included in a previous "trash" commit. Leaving in the mySliceInnerProdct function for now as it speeds up mesonfield creation quite a lot for 24^3 tests 2018-07-16 16:19:59 +01:00
9deae8c962 A2A meson field contraction code 2018-07-16 14:18:45 +01:00
db86cdd7bd Possible trash commit 2018-07-10 13:30:45 +01:00
ec9939c1ba Test for faster implementation of meson field inner loop
This should be possible to cache block at outer levels, global sum across nodes not performed
and deferred to caller to block them all into a big all reduce.
Nc=3 and Fermion is hard coded in an ugly way. We might think about benchmarking whether
a product without the conjugate should be made available by Grid.

It is not clear whether the explicit unroll, or the performing of conjugate on left once
was the real source of the speed up.

Gives 70-80 GF/s on my laptop (single) half that double, and 70GB/s to cache.

This is competitive with dslash and a reasonable stopping point for the optimisation. If necessary we can revisit.
2018-07-10 12:38:51 +01:00
f74617c124 Added ZFIMPL to meson field module 2018-07-03 14:04:53 +01:00
8c6a3921ed Merge remote-tracking branch 'upstream/feature/hadrons' into feature/hadrons 2018-07-03 11:35:14 +01:00
a8a15dd9d0 Hadrons: code cleaning 2018-07-02 17:52:39 +01:00
3ce68a751a Hadrons: stout smearing module 2018-07-02 17:52:04 +01:00
daa0977d01 Included a print statement that indicates that the guess is being subtracted from the solve. 2018-06-28 16:34:56 +01:00
a2929f4384 Removed A2A contraction module and replaced it with the beginnings of a meson field module 2018-06-28 16:17:26 +01:00
7fe3974c0a Included eigenPacks and action as references, not inputs, of A2A module. They now now longer need to be parameters in the meson field modules. 2018-06-28 16:14:49 +01:00
f7e86f81a0 Changes A2A class to make use of the new Solver class 2018-06-28 16:14:16 +01:00
fecec803d9 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2018-06-28 16:13:43 +01:00
8fe9a13cdd Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2018-06-28 16:13:07 +01:00
d2c42e6f42 Hadrons: scaled DWF action 2018-06-26 14:59:33 +01:00
049cc518f4 Hadrons: introduction message 2 2018-06-25 19:08:39 +01:00
2e1c66897f Hadrons: introduction message 2018-06-25 19:08:22 +01:00
adcef36189 Hadrons: Möbius DWF action 2018-06-25 15:58:35 +01:00
2f121c41c9 Commiting reation of meson field code before a merge with the upstream branch feature/hadrons 2018-06-25 12:20:46 +01:00
e0ed7e300f Hadrons: spurious Dminus removed 2018-06-22 16:33:43 +02:00
485207901b Merge branch 'develop' into feature/hadrons 2018-06-22 16:15:32 +02:00
c760f0a4c3 Hadrons: remove make_5D/4D functions and FreeProp fix 2018-06-22 16:12:46 +02:00
c84eeedec3 Hadrons: GaugeProp module for z-Wilson actions 2018-06-22 15:53:22 +02:00
1ac3526f33 Small changes to the A2A header and module 2018-06-22 12:29:42 +01:00
0de090ee74 Temporarily added in the contraction code that produced the working 2-pt function. This is commited for reference only and will be removed in the next push. 2018-06-22 12:28:41 +01:00
91405de3f7 Hadrons: new solver exposing fermion matrix and generic source/solve import/export 2018-06-22 12:14:37 +02:00
8fccda301a Fixed a bug where the guess was always subtracted after the solve and included appropriate weights for the sources in the one case we're looking at now. More work needs to be done to make the 5d/4d source logic less brittle. 2018-06-21 16:36:59 +01:00
7a0abfac89 Restructured the class that computes and returns the A2A vectors. 2018-06-21 16:36:06 +01:00
ae37fda699 A more elegant way to subtract guesses from solve and a bool check before verifying residual 2018-06-20 16:07:40 +01:00
b5fc5e2030 All to all module update that hit a promising milestone. Commiting for a reference for future changes. 2018-06-20 10:59:07 +01:00
8db0ef9736 Merge pull request #168 from jch1g10/feature/qed-fvol
Feature/qed fvol
2018-06-08 20:09:06 +02:00
95d4b46446 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-06-08 11:30:29 +01:00
5dfd216a34 Better thread safety 2018-06-04 21:08:44 +01:00
c2e8d0aa88 Solve g++ problem on the lanczos test 2018-06-04 18:34:15 +01:00
0fe5aeffbb Merge branch 'feature/hadrons' into feature/qed-fvol 2018-06-04 16:59:43 +01:00
7fbc469046 Merge branch 'develop' into feature/hadrons 2018-06-04 16:58:30 +01:00
bf96a4bdbf Merge branch 'master' into develop 2018-06-04 14:03:11 +01:00
84685c9bc3 Overflow fix 2018-06-04 13:42:07 +01:00
a8d4156997 Added a Hadrons module that computes the all-to-all v and w vectors 2018-05-31 17:18:58 +01:00
c18074869b Changes to Hadrons SchurRB solver to allow for a subtract_guess boolean to be passed 2018-05-31 17:17:16 +01:00
f4c6d39238 CHanges made to SchurRB solvers to allow for the subtraction of a guess after solve 2018-05-31 17:16:20 +01:00
200d35b38a Merge branch 'develop' into feature/hadrons 2018-05-28 11:52:47 +02:00
eb52e84d09 Merge branch 'feature/hadrons' of github.com:paboyle/Grid into feature/hadrons 2018-05-28 11:50:27 +02:00
72abc34764 Merge pull request #166 from guelpers/feature/hadrons
Feature/hadrons
2018-05-28 11:49:46 +02:00
e3164d4c7b Hadrons: env function to get volume in double 2018-05-28 11:39:17 +02:00
f5db386c55 Change MODULE_REGISTER_NS -> MODULE_REGISTER in UnitEM, ScalarVP and VPCounterTerms 2018-05-22 16:16:21 +01:00
294ee70a7a Merge branch 'feature/hadrons' into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/modules.inc
#	lib/qcd/action/gauge/Photon.h
2018-05-21 18:02:41 +01:00
013ea4e8d1 Merge branch 'feature/staggered-comms-compute' into develop 2018-05-21 13:11:56 +01:00
7fbbb31a50 Merge branch 'develop' into feature/staggered-comms-compute
Conflicts:
	lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
2018-05-21 13:07:29 +01:00
0e127b1fc7 New file single prec test 2018-05-21 12:57:13 +01:00
68c028b0a6 Comment 2018-05-21 12:54:25 +01:00
255d4992e1 Hadrons: stochastic scalar SU(N) free field fix 2018-05-18 20:49:55 +01:00
a0d399e5ce Hadrons: yet other attempts at EMT NPR 2018-05-18 20:49:26 +01:00
fd3b2e945a Hadrons: don't right result with empty stem 2018-05-18 20:48:24 +01:00
b999984501 Merge branch 'develop' into feature/hadrons 2018-05-15 13:53:57 +01:00
7836cc2d74 No checksum output on log for scidac 2018-05-15 10:10:08 +01:00
a61e0df54b Travis fix for Lime 2018-05-14 19:56:12 +01:00
9d835afa35 Attempt at solving the FP exception in the QED code 2018-05-14 19:05:54 +01:00
5e3be47117 Hadrons: scalar SU(N) various fixes 2018-05-14 18:58:39 +01:00
48de706dd5 Merge branch 'develop' into feature/hadrons 2018-05-11 18:06:40 +01:00
f871fb0c6d check file is opened correctly in the Lime reader 2018-05-11 18:06:28 +01:00
93771f3099 Hadrons: scalar SU(N) stochastic free field 2018-05-10 22:29:48 +01:00
8cb205725b Merge branch 'develop' into feature/hadrons 2018-05-09 23:56:35 +01:00
9ad580d82f Hadrons: format fix 2018-05-07 21:38:15 +01:00
899f961d0d Hadrons: eigenvalue metadata saved with 16 significant digits 2018-05-07 21:37:03 +01:00
54d789204f more general implementation of the precision interface for serialisers 2018-05-07 21:17:46 +01:00
25828746f3 XML precision scientific with 16 digits by default 2018-05-07 21:04:31 +01:00
f362c00739 Hadrons: better handling of automatic directory creation 2018-05-07 19:43:40 +01:00
25d1cadd3b Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-05-07 18:55:09 +01:00
c24d53bbd1 Further debug of RNG I/O 2018-05-07 18:55:05 +01:00
2017e4e3b4 Hadrons: more verbose directory creation error 2018-05-07 18:12:22 +01:00
27a4d4c951 Hadrons: multi-file eigenpack in separate directory 2018-05-07 17:52:54 +01:00
2f92721249 Merge branch 'develop' into feature/hadrons 2018-05-07 17:26:47 +01:00
3c7a4106ed Trap for deadly empty comm thread option 2018-05-07 17:26:39 +01:00
3252059daf Hadrons: multi-file support for eigenpacks 2018-05-07 17:25:36 +01:00
6eed167f0c Merge branch 'release/0.8.1' 2018-05-04 17:34:11 +01:00
4ad0df6fde Bump volume for Gerardo 2018-05-04 17:33:23 +01:00
661381e881 Merge branch 'develop' into feature/hadrons 2018-05-04 14:52:17 +01:00
68a5079f33 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-05-04 14:13:54 +01:00
8634e19f1b Update 2018-05-04 14:13:35 +01:00
9ada378e38 Add timing 2018-05-04 10:58:01 +01:00
9d9692d439 Fix double vs float in boundary phases 2018-05-03 16:40:16 +01:00
0659ae4014 Merge branch 'develop' into feature/hadrons 2018-05-03 16:20:22 +01:00
bfbf2f1fa0 no threaded stencil benchmark if OpenMP is not supported 2018-05-03 16:20:01 +01:00
dd6b796a01 Hadrons: scalar SU(N) volume factor fix 2018-05-03 16:19:17 +01:00
52a856b4a8 FreeProp module for Hadrons 2018-05-03 12:33:20 +01:00
04190ee7f3 5D free propagator for DWF and boundary conditions for free propagators 2018-05-03 12:31:36 +01:00
587bfcc0f4 Add Timing 2018-05-03 12:10:31 +01:00
2700992ef5 Merge remote-tracking branch 'upstream/feature/hadrons' into feature/hadrons 2018-05-03 10:01:52 +01:00
8c658de179 Compressor speed up (a little); streaming stores 2018-05-02 17:52:16 +01:00
ba37d51ee9 Debugging the RNG IO 2018-05-02 15:32:06 +01:00
4f4181c54a Merge branch 'feature/staggered-comms-compute' of https://github.com/paboyle/Grid into feature/staggered-comms-compute 2018-05-02 14:59:13 +01:00
4d4ac2517b Adding Scalar field theory example for Scidac format 2018-05-02 14:36:32 +01:00
e568c24d1d Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-05-02 14:29:25 +01:00
b458326744 Checkpointer module update 2018-05-02 14:29:22 +01:00
6e7d5e2243 HMC: added Scidac checkpointer and support for metadata 2018-05-02 14:28:59 +01:00
b35169f1dd MultiShift for Staggered 2018-05-02 14:22:37 +01:00
441ad7498d add Iterative counter 2018-05-02 14:21:30 +01:00
6f6c5c549a Split off gparity 2018-05-02 14:11:23 +01:00
1584e17b54 Revert to fast versoin 2018-05-02 14:10:55 +01:00
12982a4455 Hypercube optimisation 2018-05-02 14:10:21 +01:00
172f412102 shmget reintroduce 2018-05-02 14:07:41 +01:00
a64497265d TIming 2018-05-02 14:07:28 +01:00
ca639c195f Merge branch 'develop' into feature/hadrons 2018-05-01 14:07:32 +01:00
edc28dcfbf Hadrons: scalar SU(N) 2-pt fix 2018-05-01 14:02:31 +01:00
c45f24a1b5 Improvements for tesseract 2018-04-30 21:50:00 +01:00
aaf37ee4d7 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-27 11:45:13 +01:00
1dddd17e3c Benchmark improvements from tesseract 2018-04-27 11:44:46 +01:00
661f1d3e8e Merge branch 'release/0.8.0' into develop 2018-04-27 11:22:33 +01:00
edcf9b9293 Merge branch 'release/0.8.0' 2018-04-27 11:13:19 +01:00
fe6860b4dd Update with LIME library guard 2018-04-27 08:57:34 +01:00
d6406b13e1 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-27 07:52:56 +01:00
e369d7306d Rename 2018-04-27 07:51:44 +01:00
9f8d63e104 Roll over version 2018-04-27 07:51:12 +01:00
9b0240d101 Hot start test 2018-04-27 07:50:51 +01:00
b27f0e5a53 Control over IO 2018-04-27 07:50:15 +01:00
75e4483407 Stronger convergence test 2018-04-27 07:49:57 +01:00
0734e9ddd4 Debugging Scatter_plane_simple 2018-04-27 14:39:01 +09:00
809b1cdd58 Bug fix for MPI running ; introduced last night 2018-04-27 05:19:10 +01:00
1be8089604 Clean compile 2018-04-26 23:42:45 +01:00
3e0eff6468 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-26 23:00:46 +01:00
7ecc47ac89 Quenched test compile 2018-04-26 23:00:28 +01:00
e9f1ac09de static 2018-04-26 23:00:08 +01:00
fa0d8feff4 Performance of CovariantCshift now non-embarrassing. 2018-04-26 17:56:27 +01:00
49b8501fd4 Merge branch 'develop' into feature/hadrons 2018-04-26 17:33:50 +01:00
d47484717e Hadrons: scalar SU(N) result handling improvement 2018-04-26 17:32:37 +01:00
05b44aef6b Merge branch 'develop' of https://github.com/paboyle/Grid into develop
Conflicts:
	benchmarks/Benchmark_su3.cc
2018-04-26 15:38:49 +01:00
03e9832efa Use macros for bare openmp 2018-04-26 14:50:02 +01:00
28a375d35d Force static 2018-04-26 14:49:42 +01:00
3b06381745 Guard bare openmp statemetn with ifdef 2018-04-26 14:48:57 +01:00
91a0a3f820 Improvement 2018-04-26 14:48:35 +01:00
8f44c799a6 Saving the benchmarking tests for Cshift 2018-04-26 14:48:03 +01:00
96272f3841 Merge staggered fix linear operator and reduction 2018-04-26 10:33:19 +01:00
5c936d88a0 Merge branch 'feature/staggered-comms-compute' of https://github.com/paboyle/Grid into feature/staggered-comms-compute 2018-04-26 10:18:37 +01:00
1c64ee926e Faster staggered operator with m^2 term trivial used 2018-04-26 10:17:49 +01:00
2cbb72a81c Provide info if EE term is trivial (m^2 factor)
Better timing in staggered 4d case
2018-04-26 10:10:07 +01:00
31d83ee046 Enable special treatment of constEE cases 2018-04-26 10:08:46 +01:00
a9e8758a01 Improvements to staggered tests timings 2018-04-26 10:08:05 +01:00
3e125c5b61 Faster linalg on CG optimised against staggered
Sum overhead is bigger for staggered
2018-04-26 10:07:19 +01:00
eac6ec4b5e Faster reductions, important on single node staggered 2018-04-26 10:03:57 +01:00
213f8db6a2 Microsecond resultion 2018-04-26 10:01:39 +01:00
6358f35b7e Debug of previous commit 2018-04-26 14:18:11 +09:00
43f5a0df50 More timers in the integrator 2018-04-26 12:01:56 +09:00
c897878776 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-26 11:31:57 +09:00
cc6eb51e3e Hadrons: macro refactoring for library portability 2018-04-25 16:49:14 +01:00
507009089b Merge remote-tracking branch 'upstream/feature/hadrons' into feature/hadrons 2018-04-25 09:36:39 +01:00
2baf193031 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-25 00:14:03 +01:00
362ba0443a Cshift updates 2018-04-25 00:12:11 +01:00
276a2353df Move constructor 2018-04-25 00:11:07 +01:00
b234784c8e Hadrons: scalar SU(N) takes operator pairs now 2018-04-24 19:52:12 +01:00
6ea2a8b7ca Hadrons: scheduler shows starting value 2018-04-24 19:51:47 +01:00
c1d0359aaa Hadrons: scalar SU(N) kinetic term saves trace 2018-04-24 19:51:22 +01:00
047ee4ad0b Hadrons: scalar SU(N) cleanup 2018-04-24 19:50:58 +01:00
a13106da0c Hadrons: scalar SU(N) gradient 2018-04-24 19:50:30 +01:00
75113e6523 Hadrons: Scalar SU(N) variable name update 2018-04-24 19:49:27 +01:00
325c73d051 Hadrons: module template update 2018-04-24 19:48:54 +01:00
b25a59e95e Hadrons: mitigation of GCC/Intel compiler bug not generating defaulted destructors 2018-04-24 17:20:25 +01:00
c5b9147b53 Correction of a minor bug in the su3 benchmark 2018-04-24 08:03:57 -07:00
64ac815fd9 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-24 17:27:38 +09:00
a1be533329 Corrected Flop count in Benchmark su3 and expanded the Wilson flow output 2018-04-24 01:19:53 -07:00
7c4533797f Hadrons: scalar SU(N) EMT improvement term optional 2018-04-23 22:46:39 +01:00
af84fd65bb Hadrons: missing dependency message improvement 2018-04-23 22:46:17 +01:00
1a2613086a Fix print message. 2018-04-23 15:42:12 -04:00
4f110c09a5 Add printing of whether there are unstaged changes in the git hash print. 2018-04-23 15:38:23 -04:00
6764362237 Hadrons: automatic directory creation fix 2018-04-23 18:45:39 +01:00
2fa2b0e0b1 Hadrons: Application header does not include all the modules 2018-04-23 17:57:17 +01:00
b61292f735 Hadrons: recursive mkdir function 2018-04-23 17:36:43 +01:00
ce7720e221 Hadrons: copyright update 2018-04-23 17:36:20 +01:00
853a5528dc Hadrons: template modules compilation optimisation 2018-04-23 17:35:01 +01:00
169f405c9c Hadrons: tests repaired 2018-04-23 12:48:34 +01:00
c6125b01ce Hadrons: Error and Warning channels always on 2018-04-23 12:48:17 +01:00
b0b5b34bff Hadrons: custom abort with module trace 2018-04-23 12:48:00 +01:00
1c9722357d Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/qcd/action/fermion/FermionOperator.h
2018-04-20 17:15:21 +01:00
141da3ae71 function to get tensor dimensions 2018-04-20 17:13:34 +01:00
94edf9cf8b HDF5: direct access to group for custom operations 2018-04-20 17:13:21 +01:00
c11a3ca0a7 vectorise/unvectorise in reverse order 2018-04-20 17:13:04 +01:00
870b1a85ae Think I have the physical prop interface to CF and PF overlap right, but need a strong check/regression.
Only support Hw overlap, not Ht for now. Ht needs a new Dminus implemented.
2018-04-18 14:17:49 +01:00
b5510427f9 physical fermion interface, cshift benchmark in SU3. 2018-04-18 01:43:29 +01:00
26ed65c8f8 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-17 12:03:32 +01:00
f7f043d8cf Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-17 10:57:18 +01:00
ddcaa6ad29 Master does header on Nersc 2018-04-17 10:48:33 +01:00
334da7f452 Hadrons: can trace which module is throwing an error 2018-04-13 18:45:31 +02:00
4669ecd4ba Hadrons: build improvement 2018-04-13 18:21:18 +02:00
4573b34cac Hadrons: scalar SU(N) 2-pt functions with momentum 2018-04-13 18:21:00 +02:00
17f57e85d1 Merge branch 'develop' into feature/hadrons 2018-04-06 22:53:11 +01:00
c8d4d184ee XML push fragment fix 2018-04-06 22:53:01 +01:00
17f27b1ebd Hadrons: eigenpack writer fix 2018-04-06 22:52:11 +01:00
a16bbecb8a Hadrons: more feedback 2018-04-06 19:38:20 +01:00
7c9b0dd842 Hadrons: top level name for eigenpack metadata 2018-04-06 19:32:22 +01:00
6b7228b3e6 Hadrons: better metadata for eigenpack 2018-04-06 19:29:53 +01:00
f117552334 post-merge fix 2018-04-06 18:38:46 +01:00
a21a160029 Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/serialisation/XmlIO.cc
2018-04-06 18:34:19 +01:00
1569a374a9 XML interface polish, XML fragments can be pushed into a writer 2018-04-06 18:32:14 +01:00
eddf023b8a pugixml 1.9 update 2018-04-06 16:17:22 +01:00
6b8ffbe735 Hadrons: genetic minimum value type fix 2018-04-06 15:41:31 +01:00
81050535a5 Hadrons: truncate eigenvalues when loading partial eigenpack 2018-04-06 13:48:58 +01:00
7dcf5c90e3 Hadrons: eigenpack must be referred by solver when used 2018-04-06 13:16:28 +01:00
9ce00f26f9 not special characters in std::vector operator<< 2018-04-04 17:44:56 +01:00
85c253ed4a Test_serialisation MPI fix 2018-04-04 17:19:34 +01:00
ccfc0a5a89 Hadrons: better string representation of module parameters 2018-04-04 17:19:22 +01:00
d3f857b1c9 Hadrons: proper metadata for eigenpacks 2018-04-04 16:36:37 +01:00
fb62035aa0 Hadrons: do not create RB coarse grids 2018-04-03 19:49:11 +01:00
0260bc7705 Hadrons: eigen pack writing only for boss node 2018-04-03 18:55:46 +01:00
68e6a58f12 Hadrons: several Lanczos fixes and improvements 2018-04-03 17:42:21 +01:00
640515e3d8 Merge branch 'develop' into feature/hadrons 2018-03-30 17:43:49 +01:00
f089bf5629 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-03-30 16:17:26 +01:00
276f113f28 IO uses master boss node for metadata. 2018-03-30 16:17:05 +01:00
97c579f637 Merge branch 'develop' into feature/hadrons 2018-03-30 16:04:44 +01:00
a13c109111 deterministic initialisation of field metadata 2018-03-30 16:03:01 +01:00
ab6afd18ac Still compile if no LIME 2018-03-30 13:39:20 +01:00
5bde64d48b Barrier required in parallel when we use ftell 2018-03-30 12:41:30 +01:00
2f5add4d5f Creation of file 2018-03-30 12:30:58 +01:00
c5a885dcd6 I/O benchmark 2018-03-29 19:57:41 +01:00
a4d8512fb8 Revert "Lattice serialisation, just HDF5 for the moment"
This reverts commit 8a0cf0194f.
2018-03-27 17:55:42 +01:00
5ec903044d Serial IO code cleaning for std:: convention 2018-03-27 17:11:50 +01:00
8a0cf0194f Lattice serialisation, just HDF5 for the moment 2018-03-26 19:16:16 +01:00
1c680d4b7a Merge branch 'develop' into feature/hadrons 2018-03-26 13:52:44 +01:00
c9c073eee4 Changes in messages in test dwf mixedprec 2018-03-23 11:27:56 +00:00
f290b2e908 Fix to pass CI tests 2018-03-23 11:14:23 +00:00
5f8225461b Fencing mixedcg test propagator write. LIME is still optional in Grid 2018-03-23 10:37:58 +00:00
e9323460c7 Merge branch 'develop' into feature/hadrons 2018-03-22 10:48:37 +00:00
20e186a1e0 Merge pull request #158 from goracle/dev-pull
Make compilation faster by moving print of git hash.
2018-03-22 10:45:17 +00:00
6ef4af989b Merge pull request #159 from goracle/dev-precsafe
Add dimension check to precisionChange.
2018-03-22 10:41:53 +00:00
ccde8b817f Add dimension check to precisionChange. 2018-03-21 20:58:04 -04:00
68168bf72d Revert "Add dimension match check to precisionChange."
This reverts commit 8f601d9b39.
2018-03-21 20:51:38 -04:00
e93d0feaa7 Merge branch 'dev-pull' of github.com:goracle/Grid into dev-pull 2018-03-21 20:39:30 -04:00
8f601d9b39 Add dimension match check to precisionChange. 2018-03-21 20:38:19 -04:00
5436308e4a Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-03-21 14:26:29 +00:00
07fe7d0cbe Save file in current dir; print checksums 2018-03-21 14:26:04 +00:00
60b57706c4 Small bug fix in the shm file names 2018-03-21 13:57:30 +00:00
58c2f60b69 Merge branch 'feature/hadrons' into feature/qed-fvol 2018-03-20 20:19:18 +00:00
bfa3a7b3b0 Merge branch 'feature/hadrons' into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/Modules/MGauge/StochEm.cc
#	extras/Hadrons/modules.inc
2018-03-20 20:17:59 +00:00
954e38bebe Put a username in the path 2018-03-20 18:16:15 +00:00
b1a38bde7a Extra test for Gparity with plaquette action 2018-03-20 18:01:32 +00:00
2581875edc Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-03-19 18:00:08 +00:00
f212b0a963 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2018-03-19 17:57:13 +00:00
62702dbcb8 Fixing bug in the Point sink causing NaNs 2018-03-19 17:56:53 +00:00
41d6cab033 Merge branch 'develop' into feature/hadrons 2018-03-19 13:30:21 +00:00
5a31e747c9 Merge commit 'd5ce66f6ab2c44a12def7b6d26df80d6e646b1fb' into feature/hadrons 2018-03-19 13:19:09 +00:00
cbc73a3fd1 Hadrons: CG guesser fix 2018-03-19 13:11:38 +00:00
6c6d43eb4e Drop RB on coarse space ; that was a mistake 2018-03-17 09:35:01 +00:00
e1dcfd3553 typo fix 2018-03-16 23:10:47 +00:00
888838473a 4GB clean the offsets in parallel IO for multifile records 2018-03-16 21:54:56 +00:00
01568b0e62 Add a new SHM option 2018-03-16 21:54:28 +00:00
d5ce66f6ab Extra SHM option 2018-03-16 21:37:03 +00:00
d86936a3de Eliminating deprecated lex_sites 2018-03-16 12:26:39 +00:00
d516938707 Hadrons: eigen packs I/O and deflation interface 2018-03-14 14:55:47 +00:00
72344d1418 Hadrons: change default Schur convention to DiagTwo 2018-03-13 17:10:54 +00:00
7ecf6ab38b Merge branch 'develop' into feature/hadrons 2018-03-13 16:11:59 +00:00
2d4d70d3ec Hadrons: LCL fixes 2018-03-13 16:10:36 +00:00
78f8d47528 Hadrons: environment access to derived objects 2018-03-13 16:10:16 +00:00
b85f987b0b Hadrons: error message channel verbose during profiling 2018-03-13 16:09:22 +00:00
f57afe2079 Hadrons: much cleaner eigenpack implementation, to be tested 2018-03-13 13:51:09 +00:00
0fb84fa34b Make compilation faster by moving print of git hash. 2018-03-12 17:03:48 -04:00
8462bbfe63 Gamma input for meson contraction with round brackets 2018-03-12 18:02:12 +00:00
229977c955 Hadrons: minor memory fix for ShiftProbe module 2018-03-09 21:56:27 +00:00
e485a07133 Hadrons: garbage collector debug output 2018-03-09 21:56:01 +00:00
0880747edb Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-03-09 20:44:42 +00:00
b801e1fcd6 fclose should be called through a call to close() 2018-03-09 20:44:10 +00:00
70ec2faa98 Hadrons: maximum iteration specified for tests and error if 0 2018-03-09 19:53:55 +00:00
2f849ee252 declaration fix 2018-03-08 23:34:00 +00:00
bb6ed44339 Merge branch 'develop' into feature/hadrons 2018-03-08 23:09:28 +00:00
360cface33 Grid tensor serialisation fully implemented and tested 2018-03-08 19:12:03 +00:00
80302e95a8 MILC Interface 2018-03-08 15:34:03 +00:00
caf2f6b274 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-03-08 09:52:25 +00:00
c49be8988b Grid tensor serialisation 2018-03-08 09:51:22 +00:00
971c2379bd std::vector to tensor conversion + test units 2018-03-08 09:50:39 +00:00
94b0d66e4c Merge pull request #157 from goracle/dev-pull
Add print of the current git hash on Grid init.
2018-03-08 16:09:28 +09:00
5e8af396fd Add print of the current git hash on Grid init. 2018-03-07 13:11:51 -05:00
9942723189 Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/serialisation/BaseIO.h
2018-03-07 15:22:16 +00:00
a7d19dbb64 Merge branch 'develop' of github.com:paboyle/Grid into develop
# Conflicts:
#	lib/serialisation/BaseIO.h
2018-03-07 15:13:54 +00:00
90dbe03e17 Conversion of Grid tensors to std::vector made more elegant, also pair syntax changed to (x y) to avoid issues with JSON/XML 2018-03-07 15:12:32 +00:00
8b14096990 Conversion of Grid tensors to std::vector made more elegant, also pair syntax changed to (x y) to avoid issues with JSON/XML 2018-03-07 15:12:18 +00:00
b938202081 Overlapped Comm for Wilson DhopInternal 2018-03-07 14:08:43 +00:00
e79ef469ac Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/serialisation/BaseIO.h
2018-03-06 19:25:51 +00:00
485c5db0fe conversion of Grid tensors to nested std::vector in preparation for tensor serialisation 2018-03-06 19:22:03 +00:00
c793947209 Add overloaded Photon constructors, with default parameters for IR improvements and infinite-volume G(x=0). 2018-03-06 16:27:26 +00:00
3e9ee053a1 Merge branch 'develop' into feature/hadrons 2018-03-05 20:01:38 +00:00
dda6c69d5b Hadrons: scalar SU(N) shift probes 2018-03-05 20:00:29 +00:00
cd51b9af99 Torture yourself with namespace lookup 101 2018-03-05 19:58:13 +00:00
c399c2b44d Guido broke the charge conjugate plaquette action with premature optimisation.
This sector of the code does not matter for anything other than Guido's quenched HMC
studies, and any plaq specific optimisations should be retained in a private branch
instead of destroying the code simplicity.
2018-03-05 12:55:41 +00:00
af7de7a294 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-03-05 12:22:41 +00:00
1dc86efd26 Finalize protection 2018-03-05 12:22:18 +00:00
f32555dcc5 Merge branch 'develop' into feature/hadrons 2018-03-03 15:31:52 +00:00
30391cb2eb Merge pull request #155 from fionnoh/develop
Some changes needed for deflation interface
2018-03-03 13:43:59 +00:00
e93c883470 Hadrons: basic GraphViz visualisation 2018-03-03 13:42:36 +00:00
2e88408f5c Some changes needed for deflation interface 2018-03-02 22:27:41 +00:00
fcac5c0772 Hadrons: scalar SU(N) fixes 2018-03-02 19:20:23 +00:00
90f4000935 Hadrons: scheduler debug less verbose 2018-03-02 19:20:01 +00:00
480708b9a0 Hadrons: safer error handling for HadronsXmlRun 2018-03-02 19:19:37 +00:00
c4baf876d4 Hadrons: graph consistency check 2018-03-02 18:40:18 +00:00
2f4dac3531 Hadrons: legal update 2018-03-02 18:10:58 +00:00
3ec6890850 Merge branch 'feature/hadrons' of github.com:paboyle/Grid into feature/hadrons 2018-03-02 17:56:08 +00:00
018801d973 Hadrons: legal update 2018-03-02 17:56:00 +00:00
1d83521daa Hadrons: scalar SU(N) EMT 2018-03-02 17:55:18 +00:00
fc5670c6a4 Merge pull request #151 from guelpers/feature/hadrons
Feature/hadrons
2018-03-02 17:54:43 +00:00
d9c435e282 Hadrons: Scalar SU(N) transverse projection module 2018-03-02 17:35:12 +00:00
614a0e8277 Hadrons: Scalar SU(N) utility functions 2018-03-02 17:34:23 +00:00
aaf39222c3 update my fork and fixed conflicts 2018-03-02 17:08:08 +00:00
550142bd6a Hadrons: more code cleaning 2018-03-02 14:30:45 +00:00
c0a929aef7 Hadrons: code cleaning 2018-03-02 14:29:54 +00:00
37fe944224 Hadrons: scalar kinetic term 2018-03-02 14:14:11 +00:00
315a42843f changes requested for the pull request 2018-03-02 11:47:38 +00:00
83a101db83 Hadrons: more LCL fixes 2018-03-02 11:05:02 +00:00
c4274e1660 Hadrons: LCL cleaning 2018-03-02 10:18:33 +00:00
ba6db55cb0 Hadrons: reverse last commit 2018-03-01 23:30:58 +00:00
e5ea84d531 Hadrons: LCL: orthogonalise coarse evec 2018-03-01 19:33:11 +00:00
15767a1491 Hadrons: LCL fine convergence test 2018-03-01 18:04:08 +00:00
4d2a32ae7a Hadrons: z-Mobius message fix 2018-03-01 18:03:44 +00:00
5b937e3644 Hadrons: VM memory profiling fix 2018-03-01 17:28:38 +00:00
e418b044f7 Hadrons: code cleaning 2018-03-01 12:57:28 +00:00
b8b05f143f Hadrons: Lanczos more conservative type names 2018-03-01 12:53:16 +00:00
6ec42b4b82 LCL: external storage fix 2018-03-01 12:27:29 +00:00
abb7d4d2f5 Hadrons: z-Mobius action 2018-02-27 19:32:19 +00:00
16ebbfff29 Hadrons: Schur convention globally defined through a macro 2018-02-27 18:45:23 +00:00
4828226095 Hadrons: prettier log 2018-02-27 14:43:51 +00:00
8a049f27b8 Hadrons: Lanczos code improvement 2018-02-27 13:46:59 +00:00
43578a3eb4 Hadrons: copyright update 2018-02-26 19:24:19 +00:00
fdbd42e542 Hadrons: first implementation of local coherence Lanczos 2018-02-26 19:22:43 +00:00
e7e4cee4f3 Merge branch 'develop' into feature/hadrons 2018-02-26 15:05:05 +00:00
ec3954ff5f QedFVol: Add input parameter G(x=0) for infinite-volume photon 2018-02-23 14:53:05 +00:00
0f468e2179 OverlappedComm for Staggered 5D and 4D. 2018-02-22 12:50:09 +00:00
8e61286741 Merge branch 'develop' into feature/qed-fvol 2018-02-20 15:33:35 +00:00
4790e99817 Extra communicator free that I had missed.
Hard to audit them all as this is complex
2018-02-20 15:12:31 +00:00
2dd63aa7a4 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-02-20 14:29:26 +00:00
559a501140 Deflation interface for solvers 2018-02-20 14:29:08 +00:00
945684c470 updates for deflation in the RB solver 2018-02-20 14:28:38 +00:00
e30a80a234 Relaxed constraints on MPI thread mode when not using multiple comms threads 2018-02-15 17:13:36 +00:00
69e4ecc1d2 QedFVol: Fix single precision build error 2018-02-14 17:37:18 +00:00
5f483df16b Merge branch 'develop' into feature/qed-fvol 2018-02-14 16:35:04 +00:00
4680a977c3 QedFVol: set infinite-volume photon propagator to 1 at x=0,
so that momentum-spage photon propagator is non-negative.
Need to check whether this is sufficient for all volumes.
2018-02-14 16:30:09 +00:00
de42456171 updated my fork and conflicts fixed 2018-02-14 13:57:56 +00:00
d55212c998 restructure SeqConservedCurrent for DWF to need less memory 2018-02-14 10:45:18 +00:00
c96483e3bd Whitespace only change 2018-02-13 11:39:07 +00:00
c6e1f64573 Test for QED 2018-02-13 09:30:23 +00:00
ae31a6a760 Move deflate to right class 2018-02-13 02:11:37 +00:00
dd8f2a64fe INterface to suit hadrons on Lanczos 2018-02-13 02:08:49 +00:00
724cf02d4a QedFVol: Implement infinite-volume photon 2018-02-12 17:18:10 +00:00
7b8b2731e7 Conj error for complex coeffs 2018-02-12 16:06:31 +00:00
237a8ec918 Communicator leak fixed (I think) 2018-02-12 13:27:20 +00:00
49a0ae73eb Insertion of photon field in seqential conserved current 2018-02-12 09:36:08 +00:00
315f1146cd QedFVol: Fix output of VPCounterTerms module. 2018-02-08 20:40:45 +00:00
9f202782c5 QedFVol: Change format of scalar VP output files, and save diagrams without charge factors for consistency with ChargedProp module. 2018-02-07 20:31:50 +00:00
594a262dcc QedFVol: Remove redundant file Communicator_mpi.cc 2018-02-07 11:37:01 +00:00
7f8ca54285 Merge branch 'develop' into feature/qed-fvol 2018-02-07 10:11:00 +00:00
c5b23c367e QedFVol: Fix segmentation fault when multiple propagator modules are used. 2018-02-05 11:46:33 +00:00
b6fe03eb26 BugFix: Now the stochatic EM potential weight is generated when calling for the first time 2018-02-02 15:29:38 +00:00
f37ed4958b Implement IR improvement, with coefficients set in input file. 2018-02-02 11:56:51 +00:00
896f3a8002 Fix to MPI for Hokusai system 2018-02-01 18:51:51 +00:00
5f85473d6b QedFVol: Move Projection class into Result class 2018-02-01 16:16:13 +00:00
ac3b0ebc58 QedFVol: New structure for ChargedProp output files 2018-02-01 12:31:32 +00:00
f0fcdf75b5 Update README.md 2018-01-30 12:44:20 +01:00
53bffb83d4 Updating README with new SKL target 2018-01-30 12:42:36 +01:00
cd44e851f1 Fixing compilation error in FundtoHirep 2018-01-30 06:04:30 +01:00
fb24e3a7d2 Adding utilities for perf profiling 2018-01-29 11:11:45 +01:00
655a69259a Added support for GCC compilation for Skylake AVX512 2018-01-28 17:02:46 +01:00
4e0cf0cc28 QedFVol: Fix bug in ScalarVP.cc due to double use of temporary object. Still getting mpi3 errors when configured with enable-comms=mpi[-auto]. 2018-01-27 15:15:25 +00:00
507c4e9efc Correcting an missing semicolumn in avx512 2018-01-27 10:59:55 +01:00
cdf550845f QedFVol: Fix bugs in StochEm.cc and ChargedProp.cc (still only works without MPI). 2018-01-26 21:25:20 +00:00
3db7a5387b BROKEN: Adapted scalarVP, UnitEm and VPCounterTerms modules to new Hadrons. Currently getting an assertion error from Communicator_mpi3.cc when I try to run. 2018-01-26 16:33:48 +00:00
f8a5194c70 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-01-25 13:46:37 +01:00
cff3bae155 Adding support for general Nc in the benchmark outputs 2018-01-25 13:46:31 +01:00
90dffc73c8 Merge branch 'feature/hadrons' into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/Modules/MGauge/StochEm.cc
#	extras/Hadrons/Modules/MScalar/ChargedProp.cc
#	extras/Hadrons/Modules/MScalar/ChargedProp.hpp
#	extras/Hadrons/modules.inc
#	lib/communicator/Communicator_mpi.cc
2018-01-24 16:41:44 +00:00
a1151fc734 Hadrons: MPI-safe serial IO 2018-01-23 17:26:50 +00:00
ab3baeb38f Implement contractions and data output in functions; calculate diagrams S, X and 4C separately; output 2E and 2T instead of sunset_shifted, sunset_unshifted, tadpole_shifted, tadpole_unshifted; add comments. 2018-01-23 17:07:45 +00:00
389731d373 changed SeqConservedSummed.hpp to work with new hadrons interface 2018-01-23 10:11:33 +00:00
6e3ce7423e Hadrons: don't display module list at startup (too long) 2018-01-22 20:04:05 +00:00
15f15a7cfd Merge branch 'develop' into feature/hadrons
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/modules.inc
2018-01-22 20:03:36 +00:00
0e5f626226 Hadrons: module for scalar operator divergence 2018-01-22 19:38:19 +00:00
97b9c6f03d No option for interior/exterior split of asm kernels since different directions get interleaved 2018-01-22 11:04:19 +00:00
63982819c6 No option to overlap comms and compute for asm implementation since different directions are interleaved
in the kernels, introducing if else structure would be too painful
2018-01-22 11:03:39 +00:00
6fec507bef merged new hadrons interface 2018-01-22 10:09:20 +00:00
219b3bd34f Remove freeVpTensor object 2018-01-19 17:14:11 +00:00
b00d2d2c39 Correction of Representations compilation and small compilation error for Intel 17 2018-01-17 13:46:12 +00:00
f1b3e21830 Merge branch 'feature/clover' into develop 2018-01-17 10:07:42 +00:00
24162c9ead Staggered overlap comms comput 2018-01-09 13:02:52 +00:00
935cd1e173 conserved current insertion summed over Lorentzindex 2017-12-22 11:38:45 +00:00
55e39df30f tadpole insertion for DWF 2017-12-22 11:36:31 +00:00
581be32ed2 Implement infrared improvement for v=0 on-shell self-energy 2017-12-14 13:42:41 +00:00
6bc136b1d0 Add module for calculating diagrams required for HVP counter-terms 2017-12-13 17:31:01 +00:00
0c668bf46a QedFVol: Write to output files from one process only. 2017-11-07 14:46:39 +00:00
840814c776 QedFVol: Patch to fix MPI communicators error 2017-11-06 16:34:55 +00:00
95af55128e QedFVol: Redo optimisation of scalar VP (extra memory requirements were not the problem), and undo optimisation of charged propagator (which seemed to be causing HDF5 errors, although I don’t know why). 2017-11-03 18:46:16 +00:00
9f2a57e334 QedFVol: Undo optimisation of scalar VP, to reduce memory requirements 2017-11-03 13:10:11 +00:00
c645d33db5 QedFVol: Redo optimisation of charged propagator, and fix I/O bug 2017-11-03 10:59:26 +00:00
e0f1349524 QedFVol: Undo optimisation of charged propagator 2017-11-03 09:22:41 +00:00
79b761f923 Merge branch 'develop' into feature/qed-fvol
# Conflicts:
#	lib/communicator/Communicator_base.cc
2017-10-30 15:53:18 +00:00
0d4e31ca58 QedFVol: Calculate phase factors for momentum projections once per configuration only. 2017-10-30 15:46:50 +00:00
b07a354a33 QedFVol: output scalar propagator before FFT in spatial directions. 2017-10-30 14:20:44 +00:00
c433939795 QedFVol: Temporarily remove incomplete implementation of infinite-volume photon 2017-10-20 16:27:58 +01:00
b6a4c31b48 Merge branch 'feature/qed-fvol' of https://github.com/jch1g10/Grid into feature/qed-fvol 2017-10-20 16:25:07 +01:00
98b1439ff9 QedFVol: pass arbitrary input values to photon constructor in UnitEm 2017-10-20 16:24:09 +01:00
564738b1ff Add module for unit EM field 2017-10-17 14:03:57 +01:00
a80e43dbcf Added infinite-volume photon in Photon.h (not checked yet) 2017-10-11 16:44:51 -04:00
b99622d9fb QedFVol: fix problem with JSON wanting gcc 4.9 2017-09-28 13:34:33 -04:00
937c77ead2 Merge branch 'develop' into feature/qed-fvol 2017-09-28 16:25:20 +01:00
95e5a2ade3 Merge pull request #116 from jch1g10/feature/qed-fvol
Feature/qed fvol
2017-09-25 15:08:33 +01:00
91676d1dda Fix “MAP_ANONYMOUS undefined” error on OSX. 2017-09-01 15:48:30 +01:00
ac3611bb19 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/qed-fvol 2017-08-29 11:53:37 +01:00
cc4afb978d Fix bug in non-zero momentum projection 2017-08-24 17:31:44 +01:00
20e92a7009 QedVFol: Allow output of scalar propagator and vacuum polarisation projected to arbitrary lattice momentum, not just zero-momentum. 2017-06-12 18:27:32 +01:00
42f0afcbfa QedFVol: Output all scalar VP diagrams separately 2017-06-09 18:08:40 +01:00
20ac13fdf3 QedFVol: add ChargedProp as an input to ScalarVP module, instead of calculating scalar propagator within ScalarVP. 2017-06-08 17:43:39 +01:00
e38612e6fa QedFVol: Update ScalarVP module for compatibility with new scalar action 2017-06-07 17:42:00 +01:00
c2b2b71c5d Merge branch 'feature/qed-fvol' of https://github.com/paboyle/Grid into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/modules.inc
2017-06-07 16:59:47 +01:00
009f48a904 QedFVol: Add missing factor of 2 in free vacuum polarisation 2017-06-07 16:34:09 +01:00
5cfc0180aa QedFVol: Output free VP along with charged VP. 2017-05-09 12:46:57 +01:00
914f180fa3 QedFVol: Implement exact O(alpha) vacuum polarisation. 2017-05-09 11:46:25 +01:00
6cb563a40c QedFVol: Access HVP tensor using a vector<vector<ScalarField>> instead of vector<vector<ScalarField*>> 2017-05-05 17:12:41 +01:00
db3837be22 QedFVol: Change “double” to “Real” in ScalarVP.cc 2017-05-03 13:26:49 +01:00
2f0dd83016 Calculate HVP using a single contraction of O(alpha) charged propagators. 2017-05-03 12:53:41 +01:00
3ac27e5596 QedFVol: remove unnecessary copies of free propagator from shifted sources in ScalarVP 2017-04-27 14:17:50 +01:00
bd466a55a8 QedFVol: remove charge dependence in chargedProp function of ScalarVP 2017-04-25 10:04:03 +01:00
c8e6f58e24 Fix typos in ScalarVP 2017-04-13 17:04:37 +01:00
888988ad37 Merge branch 'feature/qed-fvol' of https://github.com/paboyle/Grid into feature/qed-fvol
# Conflicts:
#	lib/qcd/action/fermion/Fermion.h
2017-04-13 15:54:40 +01:00
e4a105a30b Merge branch 'feature/qed-fvol' of https://github.com/paboyle/Grid into feature/qed-fvol 2017-04-10 16:35:01 +01:00
26ebe41fef QedFVol: Implement charged propagator calculation within ScalarVP module 2017-04-10 16:33:54 +01:00
1e496fee74 Merge branch 'develop' into feature/qed-fvol
# Conflicts:
#	lib/qcd/action/fermion/Fermion.h
2017-04-03 19:02:57 +01:00
9f755e0379 Add functions momD1 and momD2 to ScalarVP 2017-03-27 16:49:18 +01:00
4512dbdf58 Rename module ScalarFV to ScalarVP 2017-03-27 15:02:16 +01:00
483fd3cfa1 Add propagator expansion terms as inputs to ScalarFV 2017-03-27 13:24:51 +01:00
85516e9c7c Output all terms of scalar propagator separately 2017-03-24 17:13:55 +00:00
0c006fbfaa Add ScalarFV inputs to ScalarFV.hpp 2017-03-24 11:59:09 +00:00
54c10a42cc Add source and emField inputs to ScalarFV module 2017-03-24 11:42:32 +00:00
ef0fe2bcc1 Added empty ScalarFV module 2017-03-21 11:39:46 +00:00
602 changed files with 38936 additions and 10429 deletions

25
.gitignore vendored
View File

@ -83,6 +83,7 @@ ltmain.sh
.Trashes
ehthumbs.db
Thumbs.db
.dirstamp
# build directory #
###################
@ -97,11 +98,8 @@ build.sh
# Eigen source #
################
lib/Eigen/*
# FFTW source #
################
lib/fftw/*
Grid/Eigen
Eigen/*
# libtool macros #
##################
@ -112,20 +110,7 @@ m4/libtool.m4
################
gh-pages/
# Buck files #
##############
.buck*
buck-out
BUCK
make-bin-BUCK.sh
# generated sources #
#####################
lib/qcd/spin/gamma-gen/*.h
lib/qcd/spin/gamma-gen/*.cc
# vs code editor files #
########################
.vscode/
.vscode/settings.json
settings.json
Grid/qcd/spin/gamma-gen/*.h
Grid/qcd/spin/gamma-gen/*.cc

View File

@ -9,6 +9,11 @@ matrix:
- os: osx
osx_image: xcode8.3
compiler: clang
env: PREC=single
- os: osx
osx_image: xcode8.3
compiler: clang
env: PREC=double
before_install:
- export GRIDDIR=`pwd`
@ -16,9 +21,11 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
install:
- export CWD=`pwd`
- echo $CWD
- export CC=$CC$VERSION
- export CXX=$CXX$VERSION
- echo $PATH
@ -31,17 +38,24 @@ install:
- which $CXX
- $CXX --version
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
script:
- ./bootstrap.sh
- mkdir build
- cd build
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
- mkdir lime
- cd lime
- mkdir build
- cd build
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
- tar xf lime-1.3.2.tar.gz
- cd lime-1.3.2
- ./configure --prefix=$CWD/build/lime/install
- make -j4
- make install
- cd $CWD/build
- ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- echo make clean
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- make check

View File

@ -48,6 +48,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/serialisation/Serialisation.h>
#include <Grid/threads/Threads.h>
#include <Grid/util/Util.h>
#include <Grid/util/Sha.h>
#include <Grid/communicator/Communicator.h>
#include <Grid/cartesian/Cartesian.h>
#include <Grid/tensors/Tensors.h>

View File

@ -1,4 +1,9 @@
#pragma once
// Force Eigen to use MKL if Grid has been configured with --enable-mkl
#ifdef USE_MKL
#define EIGEN_USE_MKL_ALL
#endif
#if defined __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"

View File

@ -21,6 +21,32 @@ if BUILD_HDF5
extra_headers+=serialisation/Hdf5Type.h
endif
all: version-cache
version-cache:
@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
a="uncommited changes";\
else\
a="clean";\
fi;\
echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
if [ -e version-cache ]; then\
d=`diff vertmp version-cache`;\
if [ "$${d}" != "" ]; then\
mv vertmp version-cache;\
rm -f Version.h;\
fi;\
else\
mv vertmp version-cache;\
rm -f Version.h;\
fi;\
rm -f vertmp
Version.h:
cp version-cache Version.h
.PHONY: version-cache
#
# Libraries
#
@ -30,8 +56,8 @@ include Eigen.inc
lib_LIBRARIES = libGrid.a
CCFILES += $(extra_sources)
HFILES += $(extra_headers)
HFILES += $(extra_headers) Config.h Version.h
libGrid_a_SOURCES = $(CCFILES)
libGrid_adir = $(pkgincludedir)
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
libGrid_adir = $(includedir)/Grid
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)

View File

@ -39,6 +39,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/approx/MultiShiftFunction.h>
#include <Grid/algorithms/approx/Forecast.h>
#include <Grid/algorithms/iterative/Deflation.h>
#include <Grid/algorithms/iterative/ConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateResidual.h>
#include <Grid/algorithms/iterative/NormalEquations.h>

View File

@ -51,7 +51,7 @@ namespace Grid {
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0;
virtual void HermOp(const Field &in, Field &out)=0;
};
@ -309,36 +309,59 @@ namespace Grid {
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
protected:
Matrix &_Mat;
Field tmp;
RealD mass;
double tMpc;
double tIP;
double tMeo;
double taxpby_norm;
uint64_t ncall;
public:
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
void Report(void)
{
std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
std::cout << GridLogMessage << " HermOpAndNorm.IP "<< tIP /ncall<<" usec "<<std::endl;
std::cout << GridLogMessage << " Mpc.MeoMoe "<< tMeo/ncall<<" usec "<<std::endl;
std::cout << GridLogMessage << " Mpc.axpby_norm "<< taxpby_norm/ncall<<" usec "<<std::endl;
}
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
{
assert( _Mat.isTrivialEE() );
mass = _Mat.Mass();
tMpc=0;
tIP =0;
tMeo=0;
taxpby_norm=0;
ncall=0;
}
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
GridLogIterative.TimingMode(1);
std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl;
ncall++;
tMpc-=usecond();
n2 = Mpc(in,out);
std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl;
tMpc+=usecond();
tIP-=usecond();
ComplexD dot= innerProduct(in,out);
std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl;
tIP+=usecond();
n1 = real(dot);
}
virtual void HermOp(const Field &in, Field &out){
std::cout << GridLogIterative << " HermOp "<<std::endl;
Mpc(in,out);
ncall++;
tMpc-=usecond();
_Mat.Meooe(in,out);
_Mat.Meooe(out,tmp);
tMpc+=usecond();
taxpby_norm-=usecond();
axpby(out,-1.0,mass*mass,tmp,in);
taxpby_norm+=usecond();
}
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid);
Field tmp2(in._grid);
std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
_Mat.Mooee(in,out);
_Mat.Mooee(out,tmp);
std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
tMeo-=usecond();
_Mat.Meooe(in,out);
_Mat.Meooe(out,tmp2);
std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl;
RealD nn=axpy_norm(out,-1.0,tmp2,tmp);
std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl;
_Mat.Meooe(out,tmp);
tMeo+=usecond();
taxpby_norm-=usecond();
RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
taxpby_norm+=usecond();
return nn;
}
virtual RealD MpcDag (const Field &in, Field &out){

View File

@ -54,6 +54,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
@ -69,7 +70,6 @@ class ConjugateGradient : public OperatorFunction<Field> {
Linop.HermOpAndNorm(psi, mmp, d, b);
r = src - mmp;
p = r;
@ -96,38 +96,44 @@ class ConjugateGradient : public OperatorFunction<Field> {
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer;
GridStopWatch InnerTimer;
GridStopWatch AxpyNormTimer;
GridStopWatch LinearCombTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
for (k = 1; k <= MaxIterations*1000; k++) {
c = cp;
MatrixTimer.Start();
Linop.HermOpAndNorm(p, mmp, d, qq);
Linop.HermOp(p, mmp);
MatrixTimer.Stop();
LinalgTimer.Start();
// RealD qqck = norm2(mmp);
// ComplexD dck = innerProduct(p,mmp);
InnerTimer.Start();
ComplexD dc = innerProduct(p,mmp);
InnerTimer.Stop();
d = dc.real();
a = c / d;
b_pred = a * (a * qq - d) / c;
AxpyNormTimer.Start();
cp = axpy_norm(r, -a, mmp, r);
AxpyNormTimer.Stop();
b = cp / c;
// Fuse these loops ; should be really easy
psi = a * p + psi;
p = p * b + r;
LinearCombTimer.Start();
parallel_for(int ss=0;ss<src._grid->oSites();ss++){
vstream(psi[ss], a * p[ss] + psi[ss]);
vstream(p [ss], b * p[ss] + r[ss]);
}
LinearCombTimer.Stop();
LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
// Stopping condition
if (cp <= rsq) {
@ -144,10 +150,13 @@ class ConjugateGradient : public OperatorFunction<Field> {
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "Time breakdown "<<std::endl;
std::cout << GridLogPerformance << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);

View File

@ -43,6 +43,7 @@ namespace Grid {
public:
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
int verbose;
MultiShiftFunction shifts;
@ -163,7 +164,16 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
for(int s=0;s<nshift;s++) {
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
}
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch AXPYTimer;
GridStopWatch ShiftTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
// Iteration loop
int k;
@ -171,7 +181,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
for (k=1;k<=MaxIterations;k++){
a = c /cp;
AXPYTimer.Start();
axpy(p,a,p,r);
AXPYTimer.Stop();
// Note to self - direction ps is iterated seperately
// for each shift. Does not appear to have any scope
@ -180,6 +192,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
// However SAME r is used. Could load "r" and update
// ALL ps[s]. 2/3 Bandwidth saving
// New Kernel: Load r, vector of coeffs, vector of pointers ps
AXPYTimer.Start();
for(int s=0;s<nshift;s++){
if ( ! converged[s] ) {
if (s==0){
@ -190,22 +203,34 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
}
}
}
AXPYTimer.Stop();
cp=c;
MatrixTimer.Start();
//Linop.HermOpAndNorm(p,mmp,d,qq); // d is used
// The below is faster on KNL
Linop.HermOp(p,mmp);
d=real(innerProduct(p,mmp));
Linop.HermOpAndNorm(p,mmp,d,qq);
MatrixTimer.Stop();
AXPYTimer.Start();
axpy(mmp,mass[0],p,mmp);
AXPYTimer.Stop();
RealD rn = norm2(p);
d += rn*mass[0];
bp=b;
b=-cp/d;
AXPYTimer.Start();
c=axpy_norm(r,b,mmp,r);
AXPYTimer.Stop();
// Toggle the recurrence history
bs[0] = b;
iz = 1-iz;
ShiftTimer.Start();
for(int s=1;s<nshift;s++){
if((!converged[s])){
RealD z0 = z[s][1-iz];
@ -215,6 +240,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
}
}
ShiftTimer.Stop();
for(int s=0;s<nshift;s++){
int ss = s;
@ -257,6 +283,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
if ( all_converged ){
SolverTimer.Stop();
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
@ -269,8 +298,19 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
RealD cn = norm2(src);
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
}
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMarix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
// ugly hack
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;

View File

@ -0,0 +1,104 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_DEFLATION_H
#define GRID_DEFLATION_H
namespace Grid {
template<class Field>
class ZeroGuesser: public LinearFunction<Field> {
public:
virtual void operator()(const Field &src, Field &guess) { guess = zero; };
};
template<class Field>
class SourceGuesser: public LinearFunction<Field> {
public:
virtual void operator()(const Field &src, Field &guess) { guess = src; };
};
////////////////////////////////
// Fine grid deflation
////////////////////////////////
template<class Field>
class DeflatedGuesser: public LinearFunction<Field> {
private:
const std::vector<Field> &evec;
const std::vector<RealD> &eval;
public:
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
virtual void operator()(const Field &src,Field &guess) {
guess = zero;
assert(evec.size()==eval.size());
auto N = evec.size();
for (int i=0;i<N;i++) {
const Field& tmp = evec[i];
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
}
guess.checkerboard = src.checkerboard;
}
};
template<class FineField, class CoarseField>
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
private:
const std::vector<FineField> &subspace;
const std::vector<CoarseField> &evec_coarse;
const std::vector<RealD> &eval_coarse;
public:
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
const std::vector<CoarseField> &_evec_coarse,
const std::vector<RealD> &_eval_coarse)
: subspace(_subspace),
evec_coarse(_evec_coarse),
eval_coarse(_eval_coarse)
{
}
void operator()(const FineField &src,FineField &guess) {
int N = (int)evec_coarse.size();
CoarseField src_coarse(evec_coarse[0]._grid);
CoarseField guess_coarse(evec_coarse[0]._grid); guess_coarse = zero;
blockProject(src_coarse,src,subspace);
for (int i=0;i<N;i++) {
const CoarseField & tmp = evec_coarse[i];
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
}
blockPromote(guess_coarse,guess,subspace);
guess.checkerboard = src.checkerboard;
};
};
}
#endif

View File

@ -57,8 +57,9 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
parallel_region
{
std::vector < vobj > B(Nm); // Thread private
std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
for(int j=j0; j<j1; ++j) B[j]=0.;
@ -149,19 +150,6 @@ void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, boo
basisReorderInPlace(_v,sort_vals,idx);
}
// PAB: faster to compute the inner products first then fuse loops.
// If performance critical can improve.
template<class Field>
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
result = zero;
assert(_v.size()==eval.size());
int N = (int)_v.size();
for (int i=0;i<N;i++) {
Field& tmp = _v[i];
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
}
}
/////////////////////////////////////////////////////////////
// Implicitly restarted lanczos
/////////////////////////////////////////////////////////////
@ -181,6 +169,7 @@ enum IRLdiagonalisation {
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
{
public:
LinearFunction<Field> &_HermOp;
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
@ -243,6 +232,7 @@ class ImplicitlyRestartedLanczos {
/////////////////////////
public:
//////////////////////////////////////////////////////////////////
// PAB:
//////////////////////////////////////////////////////////////////
@ -490,15 +480,13 @@ until convergence
Field B(grid); B.checkerboard = evec[0].checkerboard;
// power of two search pattern; not every evalue in eval2 is assessed.
int allconv =1;
for(int jj = 1; jj<=Nstop; jj*=2){
int j = Nstop-jj;
RealD e = eval2_copy[j]; // Discard the evalue
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
if ( j > Nconv ) {
Nconv=j+1;
jj=Nstop; // Terminate the scan
}
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
allconv=0;
}
}
// Do evec[0] for good measure
@ -506,8 +494,10 @@ until convergence
int j=0;
RealD e = eval2_copy[0];
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox);
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
}
if ( allconv ) Nconv = Nstop;
// test if we converged, if so, terminate
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
// if( Nconv>=Nstop || beta_k < betastp){

View File

@ -28,7 +28,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#ifndef GRID_LOCAL_COHERENCE_IRL_H
#define GRID_LOCAL_COHERENCE_IRL_H
namespace Grid {
struct LanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
@ -45,6 +48,7 @@ struct LanczosParams : Serializable {
struct LocalCoherenceLanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
bool, saveEvecs,
bool, doFine,
bool, doFineRead,
bool, doCoarse,
@ -70,21 +74,24 @@ public:
typedef Lattice<Fobj> FineField;
LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
std::vector<FineField> &subspace;
ProjectedHermOp(LinearOperatorBase<FineField>& linop, Aggregation<Fobj,CComplex,nbasis> &aggregate) :
_Linop(linop),
_Aggregate(aggregate) { };
ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) :
_Linop(linop), subspace(_subspace)
{
assert(subspace.size() >0);
};
void operator()(const CoarseField& in, CoarseField& out) {
GridBase *FineGrid = subspace[0]._grid;
int checkerboard = subspace[0].checkerboard;
FineField fin (FineGrid); fin.checkerboard= checkerboard;
FineField fout(FineGrid); fout.checkerboard = checkerboard;
GridBase *FineGrid = _Aggregate.FineGrid;
FineField fin(FineGrid);
FineField fout(FineGrid);
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
}
};
@ -99,24 +106,27 @@ public:
OperatorFunction<FineField> & _poly;
LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
std::vector<FineField> &subspace;
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop,
Aggregation<Fobj,CComplex,nbasis> &aggregate) :
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
LinearOperatorBase<FineField>& linop,
std::vector<FineField> & _subspace) :
_poly(poly),
_Linop(linop),
_Aggregate(aggregate) { };
subspace(_subspace)
{ };
void operator()(const CoarseField& in, CoarseField& out) {
GridBase *FineGrid = _Aggregate.FineGrid;
FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard;
FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard;
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
GridBase *FineGrid = subspace[0]._grid;
int checkerboard = subspace[0].checkerboard;
FineField fin (FineGrid); fin.checkerboard =checkerboard;
FineField fout(FineGrid);fout.checkerboard =checkerboard;
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
}
};
@ -132,19 +142,23 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc
LinearFunction<CoarseField> & _Poly;
OperatorFunction<FineField> & _smoother;
LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
RealD _coarse_relax_tol;
RealD _coarse_relax_tol;
std::vector<FineField> &_subspace;
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
OperatorFunction<FineField> &smoother,
LinearOperatorBase<FineField> &Linop,
Aggregation<Fobj,CComplex,nbasis> &Aggregate,
std::vector<FineField> &subspace,
RealD coarse_relax_tol=5.0e3)
: _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { };
: _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
_coarse_relax_tol(coarse_relax_tol)
{ };
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
CoarseField v(B);
RealD eval_poly = eval;
// Apply operator
_Poly(B,v);
@ -168,14 +182,13 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc
}
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
GridBase *FineGrid = _Aggregate.FineGrid;
int checkerboard = _Aggregate.checkerboard;
GridBase *FineGrid = _subspace[0]._grid;
int checkerboard = _subspace[0].checkerboard;
FineField fB(FineGrid);fB.checkerboard =checkerboard;
FineField fv(FineGrid);fv.checkerboard =checkerboard;
_Aggregate.PromoteFromSubspace(B,fv);
blockPromote(B,fv,_subspace);
_smoother(_Linop,fv,fB);
RealD eval_poly = eval;
@ -217,27 +230,67 @@ protected:
int _checkerboard;
LinearOperatorBase<FineField> & _FineOp;
// FIXME replace Aggregation with vector of fine; the code reuse is too small for
// the hassle and complexity of cross coupling.
Aggregation<Fobj,CComplex,nbasis> _Aggregate;
std::vector<RealD> evals_fine;
std::vector<RealD> evals_coarse;
std::vector<CoarseField> evec_coarse;
std::vector<RealD> &evals_fine;
std::vector<RealD> &evals_coarse;
std::vector<FineField> &subspace;
std::vector<CoarseField> &evec_coarse;
private:
std::vector<RealD> _evals_fine;
std::vector<RealD> _evals_coarse;
std::vector<FineField> _subspace;
std::vector<CoarseField> _evec_coarse;
public:
LocalCoherenceLanczos(GridBase *FineGrid,
GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard) :
GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard) :
_CoarseGrid(CoarseGrid),
_FineGrid(FineGrid),
_Aggregate(CoarseGrid,FineGrid,checkerboard),
_FineOp(FineOp),
_checkerboard(checkerboard)
_checkerboard(checkerboard),
evals_fine (_evals_fine),
evals_coarse(_evals_coarse),
subspace (_subspace),
evec_coarse(_evec_coarse)
{
evals_fine.resize(0);
evals_coarse.resize(0);
};
void Orthogonalise(void ) { _Aggregate.Orthogonalise(); }
//////////////////////////////////////////////////////////////////////////
// Alternate constructore, external storage for use by Hadrons module
//////////////////////////////////////////////////////////////////////////
LocalCoherenceLanczos(GridBase *FineGrid,
GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard,
std::vector<FineField> &ext_subspace,
std::vector<CoarseField> &ext_coarse,
std::vector<RealD> &ext_eval_fine,
std::vector<RealD> &ext_eval_coarse
) :
_CoarseGrid(CoarseGrid),
_FineGrid(FineGrid),
_FineOp(FineOp),
_checkerboard(checkerboard),
evals_fine (ext_eval_fine),
evals_coarse(ext_eval_coarse),
subspace (ext_subspace),
evec_coarse (ext_coarse)
{
evals_fine.resize(0);
evals_coarse.resize(0);
};
void Orthogonalise(void ) {
CoarseScalar InnerProd(_CoarseGrid);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
};
template<typename T> static RealD normalise(T& v)
{
@ -246,43 +299,44 @@ public:
v = v * (1.0/nn);
return nn;
}
/*
void fakeFine(void)
{
int Nk = nbasis;
_Aggregate.subspace.resize(Nk,_FineGrid);
_Aggregate.subspace[0]=1.0;
_Aggregate.subspace[0].checkerboard=_checkerboard;
normalise(_Aggregate.subspace[0]);
subspace.resize(Nk,_FineGrid);
subspace[0]=1.0;
subspace[0].checkerboard=_checkerboard;
normalise(subspace[0]);
PlainHermOp<FineField> Op(_FineOp);
for(int k=1;k<Nk;k++){
_Aggregate.subspace[k].checkerboard=_checkerboard;
Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]);
normalise(_Aggregate.subspace[k]);
subspace[k].checkerboard=_checkerboard;
Op(subspace[k-1],subspace[k]);
normalise(subspace[k]);
}
}
*/
void testFine(RealD resid)
{
assert(evals_fine.size() == nbasis);
assert(_Aggregate.subspace.size() == nbasis);
assert(subspace.size() == nbasis);
PlainHermOp<FineField> Op(_FineOp);
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
for(int k=0;k<nbasis;k++){
assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1);
assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
}
}
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
{
assert(evals_fine.size() == nbasis);
assert(_Aggregate.subspace.size() == nbasis);
assert(subspace.size() == nbasis);
//////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
for(int k=0;k<evec_coarse.size();k++){
if ( k < nbasis ) {
@ -302,34 +356,34 @@ public:
PlainHermOp<FineField> Op(_FineOp);
evals_fine.resize(Nm);
_Aggregate.subspace.resize(Nm,_FineGrid);
subspace.resize(Nm,_FineGrid);
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
int Nconv;
IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false);
IRL.calc(evals_fine,subspace,src,Nconv,false);
// Shrink down to number saved
assert(Nstop>=nbasis);
assert(Nconv>=nbasis);
evals_fine.resize(nbasis);
_Aggregate.subspace.resize(nbasis,_FineGrid);
subspace.resize(nbasis,_FineGrid);
}
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
int Nstop, int Nk, int Nm,RealD resid,
RealD MaxIt, RealD betastp, int MinRes)
{
Chebyshev<FineField> Cheby(cheby_op);
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_Aggregate);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate);
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
//////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
evals_coarse.resize(Nm);
evec_coarse.resize(Nm,_CoarseGrid);

View File

@ -95,19 +95,34 @@ namespace Grid {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver) :
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out, Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
@ -129,7 +144,6 @@ namespace Grid {
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
/////////////////////////////////////////////////////
@ -146,8 +160,12 @@ namespace Grid {
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
guess(src_o, sol_o);
Mtmp = sol_o;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl;
// Fionn A2A boolean behavioural control
if (subGuess) sol_o = sol_o-Mtmp;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
@ -162,11 +180,15 @@ namespace Grid {
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl;
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
}
}
};
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
@ -179,17 +201,32 @@ namespace Grid {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0) : _HermitianRBSolver(HermitianRBSolver)
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0, const bool initSubGuess = false) : _HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=cb;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
@ -225,7 +262,11 @@ namespace Grid {
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
guess(src_o,sol_o);
Mtmp = sol_o;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
// Fionn A2A boolean behavioural control
if (subGuess) sol_o = sol_o-Mtmp;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
@ -238,12 +279,16 @@ namespace Grid {
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
}
}
};
@ -256,19 +301,34 @@ namespace Grid {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver) :
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
CBfactorise = 0;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix,class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
@ -305,8 +365,12 @@ namespace Grid {
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
guess(src_o,tmp);
Mtmp = tmp;
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
// Fionn A2A boolean behavioural control
if (subGuess) tmp = tmp-Mtmp;
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
@ -319,12 +383,16 @@ namespace Grid {
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
}
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
@ -335,19 +403,34 @@ namespace Grid {
private:
LinearFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver) :
SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
@ -385,7 +468,11 @@ namespace Grid {
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
// _HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
_HermitianRBSolver(src_o,tmp); assert(tmp.checkerboard==Odd);
guess(src_o,tmp);
Mtmp = tmp;
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
// Fionn A2A boolean behavioural control
if (subGuess) tmp = tmp-Mtmp;
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
///////////////////////////////////////////////////
@ -399,12 +486,16 @@ namespace Grid {
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
std::cout << GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid " << std::sqrt(nr / ns) << " nr " << nr << " ns " << ns << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
}
}
};

View File

@ -277,7 +277,9 @@ public:
uint8_t *cp = (uint8_t *)ptr;
if ( ptr ) {
// One touch per 4k page, static OMP loop to catch same loop order
#ifdef GRID_OMP
#pragma omp parallel for schedule(static)
#endif
for(size_type n=0;n<bytes;n+=4096){
cp[n]=0;
}

View File

@ -44,11 +44,15 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) {
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
assert (provided == MPI_THREAD_MULTIPLE);
//If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
(nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) )
assert(0);
}
Grid_quiesce_nodes();
// Never clean up as done once.
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
GlobalSharedMemory::Init(communicator_world);
@ -85,9 +89,17 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
MPI_Comm optimal_comm;
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine
////////////////////////////////////////////////////
// Remap using the shared memory optimising routine
// The remap creates a comm which must be freed
////////////////////////////////////////////////////
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm);
InitFromMPICommunicator(processors,optimal_comm);
SetCommunicator(optimal_comm);
///////////////////////////////////////////////////
// Free the temp communicator
///////////////////////////////////////////////////
MPI_Comm_free(&optimal_comm);
}
//////////////////////////////////
@ -183,8 +195,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
} else {
srank = 0;
comm_split = parent.communicator;
// std::cout << " Inherited communicator " <<comm_split <<std::endl;
int ierr = MPI_Comm_dup (parent.communicator,&comm_split);
assert(ierr==0);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -196,6 +208,11 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
// Take the right SHM buffers
//////////////////////////////////////////////////////////////////////////////////////////////////////
SetCommunicator(comm_split);
///////////////////////////////////////////////
// Free the temp communicator
///////////////////////////////////////////////
MPI_Comm_free(&comm_split);
if(0){
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
@ -210,6 +227,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
{
////////////////////////////////////////////////////
// Creates communicator, and the communicator_halo
////////////////////////////////////////////////////
_ndimension = processors.size();
_processor_coor.resize(_ndimension);

View File

@ -133,6 +133,7 @@ class SharedMemory
public:
SharedMemory() {};
~SharedMemory();
///////////////////////////////////////////////////////////////////////////////////////
// set the buffers & sizes
///////////////////////////////////////////////////////////////////////////////////////

View File

@ -27,6 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#include <Grid/GridCore.h>
#include <pwd.h>
namespace Grid {
@ -113,19 +114,150 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
assert(WorldNode!=-1);
_ShmSetup=1;
}
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
// Gray encode support
int BinaryToGray (int binary) {
int gray = (binary>>1)^binary;
return gray;
}
int Log2Size(int TwoToPower,int MAXLOG2)
{
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = -1;
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
if ( (0x1<<i) == WorldShmSize ) {
for(int i=0;i<=MAXLOG2;i++){
if ( (0x1<<i) == TwoToPower ) {
log2size = i;
break;
}
}
return log2size;
}
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
{
#ifdef HYPERCUBE
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
assert(log2size != -1);
////////////////////////////////////////////////////////////////
// Identify the hypercube coordinate of this node using hostname
////////////////////////////////////////////////////////////////
// n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits
// i runs 0..7 3 bits
// r runs 0..3 2 bits
// 2^10 = 1024 nodes
const int maxhdim = 10;
std::vector<int> HyperCubeCoords(maxhdim,0);
std::vector<int> RootHyperCubeCoords(maxhdim,0);
int R;
int I;
int N;
const int namelen = _POSIX_HOST_NAME_MAX;
char name[namelen];
// Parse ICE-XA hostname to get hypercube location
gethostname(name,namelen);
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
assert(nscan==3);
int nlo = N%9;
int nhi = N/9;
uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
uint32_t rootcoor = hypercoor;
//////////////////////////////////////////////////////////////////
// Print debug info
//////////////////////////////////////////////////////////////////
for(int d=0;d<maxhdim;d++){
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
}
std::string hname(name);
std::cout << "hostname "<<hname<<std::endl;
std::cout << "R " << R << " I " << I << " N "<< N
<< " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
//////////////////////////////////////////////////////////////////
// broadcast node 0's base coordinate for this partition.
//////////////////////////////////////////////////////////////////
MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm);
hypercoor=hypercoor-rootcoor;
assert(hypercoor<WorldSize);
assert(hypercoor>=0);
//////////////////////////////////////
// Printing
//////////////////////////////////////
for(int d=0;d<maxhdim;d++){
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
}
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
// in a maximally symmetrical way
////////////////////////////////////////////////////////////////
int ndimension = processors.size();
std::vector<int> processor_coor(ndimension);
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
std::vector<int> HyperCoor(ndimension);
int dim = 0;
for(int l2=0;l2<log2size;l2++){
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
ShmDims[dim]*=2;
dim=(dim+1)%ndimension;
}
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////
for(int d=0;d<ndimension;d++){
NodeDims[d] = WorldDims[d]/ShmDims[d];
}
////////////////////////////////////////////////////////////////
// Map Hcube according to physical lattice
// must partition. Loop over dims and find out who would join.
////////////////////////////////////////////////////////////////
int hcoor = hypercoor;
for(int d=0;d<ndimension;d++){
int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
int msk = (0x1<<bits)-1;
HyperCoor[d]=hcoor & msk;
HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
hcoor = hcoor >> bits;
}
////////////////////////////////////////////////////////////////
// Check processor counts match
////////////////////////////////////////////////////////////////
int Nprocessors=1;
for(int i=0;i<ndimension;i++){
Nprocessors*=processors[i];
}
assert(WorldSize==Nprocessors);
////////////////////////////////////////////////////////////////
// Establish mapping between lexico physics coord and WorldRank
////////////////////////////////////////////////////////////////
int rank;
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
/////////////////////////////////////////////////////////////////
// Build the new communicator
/////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0);
#else
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
assert(log2size != -1);
////////////////////////////////////////////////////////////////
@ -174,14 +306,77 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
/////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0);
#endif
}
////////////////////////////////////////////////////////////////////////////////////////////
// SHMGET
////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_MPI3_SHMGET
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
std::vector<int> shmids(WorldShmSize);
if ( WorldShmRank == 0 ) {
for(int r=0;r<WorldShmSize;r++){
size_t size = bytes;
key_t key = IPC_PRIVATE;
int flags = IPC_CREAT | SHM_R | SHM_W;
#ifdef SHM_HUGETLB
if (Hugepages) flags|=SHM_HUGETLB;
#endif
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
int errsv = errno;
printf("Errno %d\n",errsv);
printf("key %d\n",key);
printf("size %lld\n",size);
printf("flags %d\n",flags);
perror("shmget");
exit(1);
}
}
}
MPI_Barrier(WorldShmComm);
MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
MPI_Barrier(WorldShmComm);
for(int r=0;r<WorldShmSize;r++){
WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
if (WorldShmCommBufs[r] == (uint64_t *)-1) {
perror("Shared memory attach failure");
shmctl(shmids[r], IPC_RMID, NULL);
exit(2);
}
}
MPI_Barrier(WorldShmComm);
///////////////////////////////////
// Mark for clean up
///////////////////////////////////
for(int r=0;r<WorldShmSize;r++){
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
}
MPI_Barrier(WorldShmComm);
_ShmAlloc=1;
_ShmAllocBytes = bytes;
}
#endif
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs mapping intended
////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -191,7 +386,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
WorldShmCommBufs.resize(WorldShmSize);
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbf and others map filesystems as mappable huge pages
// Hugetlbfs and others map filesystems as mappable huge pages
////////////////////////////////////////////////////////////////////////////////////////////
char shm_name [NAME_MAX];
for(int r=0;r<WorldShmSize;r++){
@ -218,6 +413,49 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
};
#endif // MMAP
#ifdef GRID_MPI3_SHM_NONE
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbf and others map filesystems as mappable huge pages
////////////////////////////////////////////////////////////////////////////////////////////
char shm_name [NAME_MAX];
assert(WorldShmSize == 1);
for(int r=0;r<WorldShmSize;r++){
int fd=-1;
int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
#ifdef MAP_POPULATE
mmap_flag|=MAP_POPULATE;
#endif
#ifdef MAP_HUGETLB
if ( flags ) mmap_flag |= MAP_HUGETLB;
#endif
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
if ( ptr == (void *)MAP_FAILED ) {
printf("mmap %s failed\n",shm_name);
perror("failed mmap"); assert(0);
}
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
@ -232,6 +470,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
MPI_Barrier(WorldShmComm);
@ -243,7 +482,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
size_t size = bytes;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
struct passwd *pw = getpwuid (getuid());
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
shm_unlink(shm_name);
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
@ -259,7 +499,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#endif
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); }
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
if ( ptr == (void * )MAP_FAILED ) {
perror("failed mmap");
assert(0);
}
assert(((uint64_t)ptr&0x3F)==0);
WorldShmCommBufs[r] =ptr;
@ -274,7 +518,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
size_t size = bytes ;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
struct passwd *pw = getpwuid (getuid());
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
int fd=shm_open(shm_name,O_RDWR,0666);
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
@ -292,6 +537,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
}
#endif
////////////////////////////////////////////////////////
// Global shared functionality finished
// Now move to per communicator functionality
@ -318,11 +566,12 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
heap_size = GlobalSharedMemory::ShmAllocBytes();
for(int r=0;r<ShmSize;r++){
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ;
uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
MPI_Allreduce(MPI_IN_PLACE,&sr,1,MPI_UINT32_T,MPI_SUM,comm);
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[sr];
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
// std::cout << "SetCommunicator ShmCommBufs ["<< r<< "] = "<< ShmCommBufs[r]<< " wsr = "<<wsr<<std::endl;
}
ShmBufferFreeAll();
@ -391,5 +640,12 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
return (void *) remote;
}
}
SharedMemory::~SharedMemory()
{
int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised);
if ( !MPI_is_finalised ) {
MPI_Comm_free(&ShmComm);
}
};
}

View File

@ -122,5 +122,7 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
{
return NULL;
}
SharedMemory::~SharedMemory()
{};
}

View File

@ -45,31 +45,33 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int ent = 0;
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int stride=rhs._grid->_slice_stride[dimension];
if ( cbmask == 0x3 ) {
parallel_for_nest2(int n=0;n<e1;n++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*stride;
int bo = n*e2;
buffer[off+bo+b]=rhs._odata[so+o+b];
table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
}
}
} else {
int bo=0;
std::vector<std::pair<int,int> > table;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*stride;
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb &cbmask ) {
table.push_back(std::pair<int,int> (bo++,o+b));
table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
}
}
}
parallel_for(int i=0;i<table.size();i++){
buffer[off+table[i].first]=rhs._odata[so+table[i].second];
}
}
parallel_for(int i=0;i<ent;i++){
buffer[table[i].first]=rhs._odata[table[i].second];
}
}
@ -140,31 +142,35 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int stride=rhs._grid->_slice_stride[dimension];
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent =0;
if ( cbmask ==0x3 ) {
parallel_for_nest2(int n=0;n<e1;n++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
rhs._odata[so+o+b]=buffer[bo+b];
table[ent++] = std::pair<int,int>(so+o+b,bo+b);
}
}
} else {
std::vector<std::pair<int,int> > table;
int bo=0;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) {
table.push_back(std::pair<int,int> (so+o+b,bo++));
table[ent++]=std::pair<int,int> (so+o+b,bo++);
}
}
}
parallel_for(int i=0;i<table.size();i++){
// std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
rhs._odata[table[i].first]=buffer[table[i].second];
}
}
parallel_for(int i=0;i<ent;i++){
rhs._odata[table[i].first]=buffer[table[i].second];
}
}
@ -228,29 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
int e2=rhs._grid->_slice_block[dimension];
int stride = rhs._grid->_slice_stride[dimension];
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent=0;
if(cbmask == 0x3 ){
parallel_for_nest2(int n=0;n<e1;n++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
//lhs._odata[lo+o]=rhs._odata[ro+o];
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
table[ent++] = std::pair<int,int>(lo+o,ro+o);
}
}
} else {
parallel_for_nest2(int n=0;n<e1;n++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
if ( ocb&cbmask ) {
//lhs._odata[lo+o]=rhs._odata[ro+o];
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
table[ent++] = std::pair<int,int>(lo+o,ro+o);
}
}
}
}
parallel_for(int i=0;i<ent;i++){
lhs._odata[table[i].first]=rhs._odata[table[i].second];
}
}
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
@ -269,16 +278,28 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
int e2=rhs._grid->_slice_block [dimension];
int stride = rhs._grid->_slice_stride[dimension];
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent=0;
double t_tab,t_perm;
if ( cbmask == 0x3 ) {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
}}
} else {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) {
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
}
if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
}}
}
}}
parallel_for(int i=0;i<ent;i++){
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
}
}
//////////////////////////////////////////////////////
@ -291,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
double t_local;
if ( sshift[0] == sshift[1] ) {
Cshift_local(ret,rhs,dimension,shift,0x3);
} else {
@ -299,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
}
}
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
{
GridBase *grid = rhs._grid;
int fd = grid->_fdimensions[dimension];
@ -325,11 +348,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
int sx = (x+sshift)%rd;
// FIXME : This must change where we have a
// Rotate slice.
// Document how this works ; why didn't I do this when I first wrote it...
// wrap is whether sshift > rd.
// num is sshift mod rd.
//
@ -365,10 +384,8 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
}
return ret;
}
}
#endif

View File

@ -54,13 +54,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
if ( !comm_dim ) {
// std::cout << "Cshift_local" <<std::endl;
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
} else if ( splice_dim ) {
// std::cout << "Cshift_comms_simd" <<std::endl;
//std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift);
} else {
// std::cout << "Cshift_comms" <<std::endl;
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift);
}
return ret;
@ -91,9 +91,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vob
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
//std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if ( sshift[0] == sshift[1] ) {
//std::cout << "Single pass Cshift_comms" <<std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
} else {
//std::cout << "Two pass Cshift_comms" <<std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
}
@ -175,6 +178,10 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
int simd_layout = grid->_simd_layout[dimension];
int comm_dim = grid->_processors[dimension] >1 ;
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
assert(comm_dim==1);
assert(simd_layout==2);
assert(shift>=0);

File diff suppressed because it is too large Load Diff

View File

@ -244,19 +244,11 @@ namespace Grid {
template<class sobj,class vobj> strong_inline
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
axpy(ret,a,x,y);
return norm2(ret);
return axpy_norm_fast(ret,a,x,y);
}
template<class sobj,class vobj> strong_inline
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
axpby(ret,a,b,x,y);
return norm2(ret); // FIXME implement parallel norm in ss loop
return axpby_norm_fast(ret,a,b,x,y);
}
}

View File

@ -256,9 +256,42 @@ public:
_odata[ss]=r._odata[ss];
}
}
Lattice(Lattice&& r){ // move constructor
_grid = r._grid;
checkerboard = r.checkerboard;
_odata=std::move(r._odata);
}
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
{
_grid = r._grid;
checkerboard = r.checkerboard;
_odata =std::move(r._odata);
return *this;
}
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
return *this;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;
}
virtual ~Lattice(void) = default;
void reset(GridBase* grid) {
@ -277,15 +310,6 @@ public:
return *this;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;
}
// *=,+=,-= operators inherit behvour from correspond */+/- operation
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {

View File

@ -179,7 +179,7 @@ namespace Grid {
return ret;
}
#define DECLARE_RELATIONAL(op,functor) \
#define DECLARE_RELATIONAL_EQ(op,functor) \
template<class vsimd,IfSimd<vsimd> = 0>\
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
{\
@ -198,11 +198,6 @@ namespace Grid {
typedef typename vsimd::scalar_type scalar;\
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
}\
template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
{ \
return lhs._internal op rhs._internal; \
} \
template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
{ \
@ -212,14 +207,21 @@ namespace Grid {
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
{ \
return lhs op rhs._internal; \
}
} \
#define DECLARE_RELATIONAL(op,functor) \
DECLARE_RELATIONAL_EQ(op,functor) \
template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
{ \
return lhs._internal op rhs._internal; \
}
DECLARE_RELATIONAL(<,slt);
DECLARE_RELATIONAL(<=,sle);
DECLARE_RELATIONAL(>,sgt);
DECLARE_RELATIONAL(>=,sge);
DECLARE_RELATIONAL(==,seq);
DECLARE_RELATIONAL_EQ(==,seq);
DECLARE_RELATIONAL(!=,sne);
#undef DECLARE_RELATIONAL

View File

@ -52,23 +52,5 @@ namespace Grid {
}
};
// LatticeCoordinate();
// FIXME for debug; deprecate this; made obscelete by
template<class vobj> void lex_sites(Lattice<vobj> &l){
Real *v_ptr = (Real *)&l._odata[0];
size_t o_len = l._grid->oSites();
size_t v_len = sizeof(vobj)/sizeof(vRealF);
size_t vec_len = vRealF::Nsimd();
for(int i=0;i<o_len;i++){
for(int j=0;j<v_len;j++){
for(int vv=0;vv<vec_len;vv+=2){
v_ptr[i*v_len*vec_len+j*vec_len+vv ]= i+vv*500;
v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
}
}}
}
}
#endif

View File

@ -33,41 +33,94 @@ namespace Grid {
// Deterministic Reduction operations
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
ComplexD nrm = innerProduct(arg,arg);
auto nrm = innerProduct(arg,arg);
return std::real(nrm);
}
// Double inner product
template<class vobj>
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
{
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type;
scalar_type nrm;
GridBase *grid = left._grid;
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
const int pad = 8;
ComplexD inner;
Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
for(int ss=myoff;ss<mywork+myoff; ss++){
vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]);
vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
}
sumarray[thr]=TensorRemove(vnrm) ;
// All threads sum across SIMD; reduce serial work at end
// one write per cacheline with streaming store
ComplexD tmp = Reduce(TensorRemove(vinner)) ;
vstream(sumarray[thr*pad],tmp);
}
vector_type vvnrm; vvnrm=zero; // sum across threads
inner=0.0;
for(int i=0;i<grid->SumArraySize();i++){
vvnrm = vvnrm+sumarray[i];
inner = inner+sumarray[i*pad];
}
nrm = Reduce(vvnrm);// sum across simd
right._grid->GlobalSum(nrm);
return nrm;
right._grid->GlobalSum(inner);
return inner;
}
/////////////////////////
// Fast axpby_norm
// z = a x + b y
// return norm z
/////////////////////////
template<class sobj,class vobj> strong_inline RealD
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
{
sobj one(1.0);
return axpby_norm_fast(z,a,one,x,y);
}
template<class sobj,class vobj> strong_inline RealD
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
{
const int pad = 8;
z.checkerboard = x.checkerboard;
conformable(z,x);
conformable(x,y);
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type;
RealD nrm;
GridBase *grid = x._grid;
Vector<RealD> sumarray(grid->SumArraySize()*pad);
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(x._grid->oSites(),thr,mywork,myoff);
// private to thread; sub summation
decltype(innerProductD(z._odata[0],z._odata[0])) vnrm=zero;
for(int ss=myoff;ss<mywork+myoff; ss++){
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vnrm = vnrm + innerProductD(tmp,tmp);
vstream(z._odata[ss],tmp);
}
vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
}
nrm = 0.0; // sum across threads; linear in thread count but fast
for(int i=0;i<grid->SumArraySize();i++){
nrm = nrm+sumarray[i*pad];
}
z._grid->GlobalSum(nrm);
return nrm;
}
template<class Op,class T1>
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
@ -221,6 +274,115 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
}
}
template<class vobj>
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
{
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
typedef typename vobj::scalar_type scalar_type;
std::vector<scalar_type> lsSum;
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
}
template <class vobj>
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
{
// std::cout << GridLogMessage << "Start prep" << std::endl;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
GridBase *grid = lhs._grid;
assert(grid!=NULL);
conformable(grid,rhs._grid);
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
// std::cout << GridLogMessage << "Start alloc" << std::endl;
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
std::vector<iScalar<scalar_type>> extracted(Nsimd); // splitting the SIMD
// std::cout << GridLogMessage << "End alloc" << std::endl;
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
for(int r=0;r<rd;r++){
lvSum[r]=zero;
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
// std::cout << GridLogMessage << "End prep" << std::endl;
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
vector_type vv;
parallel_for(int r=0;r<rd;r++)
{
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss = so + n * stride + b;
vv = TensorRemove(innerProduct(lhs._odata[ss], rhs._odata[ss]));
lvSum[r] = lvSum[r] + vv;
}
}
}
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
for(int rt=0;rt<rd;rt++){
iScalar<vector_type> temp;
temp._internal = lvSum[rt];
extract(temp,extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
}
}
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
}
template <class vobj>
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
{
typedef typename vobj::scalar_type scalar_type;
GridBase *grid = lhs._grid;
int fd = result.size();
int ld = lsSum.size();
// sum over nodes.
std::vector<scalar_type> gsum;
gsum.resize(fd, scalar_type(0.0));
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
gsum[t]=lsSum[lt];
}
}
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
grid->GlobalSumVector(&gsum[0], fd);
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
result = gsum;
}
template<class vobj>
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
{

View File

@ -158,10 +158,19 @@ namespace Grid {
// tens of seconds per trajectory so this is clean in all reasonable cases,
// and margin of safety is orders of magnitude.
// We could hack Sitmo to skip in the higher order words of state if necessary
//
// Replace with 2^30 ; avoid problem on large volumes
//
/////////////////////////////////////////////////////////////////////////////////////
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
const int shift = 30;
uint64_t skip = site;
skip = skip<<40;
skip = skip<<shift;
assert((skip >> shift)==site); // check for overflow
eng.discard(skip);
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
}
@ -242,7 +251,7 @@ namespace Grid {
dist[0].reset();
for(int idx=0;idx<words;idx++){
fillScalar(buf[idx],dist[0],_generators[0]);
fillScalar(buf[idx],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
@ -274,7 +283,7 @@ namespace Grid {
RealF *pointer=(RealF *)&l;
dist[0].reset();
for(int i=0;i<2*vComplexF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]);
fillScalar(pointer[i],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
@ -282,7 +291,7 @@ namespace Grid {
RealD *pointer=(RealD *)&l;
dist[0].reset();
for(int i=0;i<2*vComplexD::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]);
fillScalar(pointer[i],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
@ -290,7 +299,7 @@ namespace Grid {
RealF *pointer=(RealF *)&l;
dist[0].reset();
for(int i=0;i<vRealF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]);
fillScalar(pointer[i],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
@ -308,6 +317,19 @@ namespace Grid {
std::seed_seq src(seeds.begin(),seeds.end());
Seed(src,0);
}
void SeedUniqueString(const std::string &s){
std::vector<int> seeds;
std::stringstream sha;
seeds = GridChecksum::sha256_seeds(s);
for(int i=0;i<seeds.size();i++) {
sha << std::hex << seeds[i];
}
std::cout << GridLogMessage << "Intialising serial RNG with unique string '"
<< s << "'" << std::endl;
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
SeedFixedIntegers(seeds);
}
};
class GridParallelRNG : public GridRNGbase {
@ -368,6 +390,14 @@ namespace Grid {
_time_counter += usecond()- inner_time_counter;
};
void SeedUniqueString(const std::string &s){
std::vector<int> seeds;
seeds = GridChecksum::sha256_seeds(s);
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
<< s << "'" << std::endl;
std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
SeedFixedIntegers(seeds);
}
void SeedFixedIntegers(const std::vector<int> &seeds){
// Everyone generates the same seed_seq based on input seeds

View File

@ -485,7 +485,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
template<class vobj>
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
void ExtractSliceLocal(Lattice<vobj> &lowDim, const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
@ -520,7 +520,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slic
template<class vobj>
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine)
{
typedef typename vobj::scalar_object sobj;
@ -599,6 +599,51 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
extract1(in_vobj, out_ptrs, 0);
}
}
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type
unvectorizeToRevLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
{
typedef typename vobj::vector_type vtype;
GridBase* in_grid = in._grid;
out.resize(in_grid->lSites());
int ndim = in_grid->Nd();
int in_nsimd = vtype::Nsimd();
std::vector<std::vector<int> > in_icoor(in_nsimd);
for(int lane=0; lane < in_nsimd; lane++){
in_icoor[lane].resize(ndim);
in_grid->iCoorFromIindex(in_icoor[lane], lane);
}
parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
//Assemble vector of pointers to output elements
std::vector<sobj*> out_ptrs(in_nsimd);
std::vector<int> in_ocoor(ndim);
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
std::vector<int> lcoor(in_grid->Nd());
for(int lane=0; lane < in_nsimd; lane++){
for(int mu=0;mu<ndim;mu++)
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
int lex;
Lexicographic::IndexFromCoorReversed(lcoor, lex, in_grid->_ldimensions);
out_ptrs[lane] = &out[lex];
}
//Unpack into those ptrs
const vobj & in_vobj = in._odata[in_oidx];
extract1(in_vobj, out_ptrs, 0);
}
}
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value
@ -648,10 +693,59 @@ vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
}
}
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value
&& !isSIMDvectorized<sobj>::value, void>::type
vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
{
typedef typename vobj::vector_type vtype;
GridBase* grid = out._grid;
assert(in.size()==grid->lSites());
int ndim = grid->Nd();
int nsimd = vtype::Nsimd();
std::vector<std::vector<int> > icoor(nsimd);
for(int lane=0; lane < nsimd; lane++){
icoor[lane].resize(ndim);
grid->iCoorFromIindex(icoor[lane],lane);
}
parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index
//Assemble vector of pointers to output elements
std::vector<sobj*> ptrs(nsimd);
std::vector<int> ocoor(ndim);
grid->oCoorFromOindex(ocoor, oidx);
std::vector<int> lcoor(grid->Nd());
for(int lane=0; lane < nsimd; lane++){
for(int mu=0;mu<ndim;mu++){
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
}
int lex;
Lexicographic::IndexFromCoorReversed(lcoor, lex, grid->_ldimensions);
ptrs[lane] = &in[lex];
}
//pack from those ptrs
vobj vecobj;
merge1(vecobj, ptrs, 0);
out._odata[oidx] = vecobj;
}
}
//Convert a Lattice from one precision to another
template<class VobjOut, class VobjIn>
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
assert(out._grid->Nd() == in._grid->Nd());
assert(out._grid->FullDimensions() == in._grid->FullDimensions());
out.checkerboard = in.checkerboard;
GridBase *in_grid=in._grid;
GridBase *out_grid = out._grid;

View File

@ -86,7 +86,7 @@ protected:
Colours &Painter;
int active;
int timing_mode;
int topWidth{-1};
int topWidth{-1}, chanWidth{-1};
static int timestamp;
std::string name, topName;
std::string COLOUR;
@ -126,6 +126,7 @@ public:
}
}
void setTopWidth(const int w) {topWidth = w;}
void setChanWidth(const int w) {chanWidth = w;}
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
@ -136,7 +137,12 @@ public:
stream << std::setw(log.topWidth);
}
stream << log.topName << log.background()<< " : ";
stream << log.colour() << std::left << log.name << log.background() << " : ";
stream << log.colour() << std::left;
if (log.chanWidth > 0)
{
stream << std::setw(log.chanWidth);
}
stream << log.name << log.background() << " : ";
if ( log.timestamp ) {
log.StopWatch->Stop();
GridTime now = log.StopWatch->Elapsed();

View File

@ -91,7 +91,7 @@ class BinaryIO {
typedef typename vobj::scalar_object sobj;
GridBase *grid = lat._grid;
int lsites = grid->lSites();
uint64_t lsites = grid->lSites();
std::vector<sobj> scalardata(lsites);
unvectorizeToLexOrdArray(scalardata,lat);
@ -110,11 +110,11 @@ class BinaryIO {
lsites = 1;
}
#pragma omp parallel
PARALLEL_REGION
{
uint32_t nersc_csum_thr = 0;
#pragma omp for
PARALLEL_FOR_LOOP_INTERN
for (uint64_t local_site = 0; local_site < lsites; local_site++)
{
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
@ -124,7 +124,7 @@ class BinaryIO {
}
}
#pragma omp critical
PARALLEL_CRITICAL
{
nersc_csum += nersc_csum_thr;
}
@ -146,21 +146,23 @@ class BinaryIO {
std::vector<int> local_start =grid->LocalStarts();
std::vector<int> global_vol =grid->FullDimensions();
#pragma omp parallel
PARALLEL_REGION
{
std::vector<int> coor(nd);
uint32_t scidac_csuma_thr=0;
uint32_t scidac_csumb_thr=0;
uint32_t site_crc=0;
#pragma omp for
PARALLEL_FOR_LOOP_INTERN
for(uint64_t local_site=0;local_site<lsites;local_site++){
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
/*
* Scidac csum is rather more heavyweight
* FIXME -- 128^3 x 256 x 16 will overflow.
*/
int global_site;
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
@ -181,7 +183,7 @@ class BinaryIO {
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
}
#pragma omp critical
PARALLEL_CRITICAL
{
scidac_csuma^= scidac_csuma_thr;
scidac_csumb^= scidac_csumb_thr;
@ -261,7 +263,7 @@ class BinaryIO {
GridBase *grid,
std::vector<fobj> &iodata,
std::string file,
Integer offset,
uint64_t& offset,
const std::string &format, int control,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
@ -370,7 +372,7 @@ class BinaryIO {
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
std::ifstream fin;
fin.open(file, std::ios::binary | std::ios::in);
fin.open(file, std::ios::binary | std::ios::in);
if (control & BINARYIO_MASTER_APPEND)
{
fin.seekg(-sizeof(fobj), fin.end);
@ -429,14 +431,20 @@ class BinaryIO {
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
}
std::cout << GridLogDebug << "MPI read I/O set view " << file << std::endl;
std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
assert(ierr == 0);
std::cout << GridLogDebug << "MPI read I/O write all " << file << std::endl;
std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
assert(ierr == 0);
MPI_Offset os;
MPI_File_get_position(fh, &os);
MPI_File_get_byte_offset(fh, os, &disp);
offset = disp;
MPI_File_close(&fh);
MPI_Type_free(&fileArray);
MPI_Type_free(&localArray);
@ -446,16 +454,20 @@ class BinaryIO {
} else {
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
std::ofstream fout;
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
try {
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
if (offset) { // Must already exist and contain data
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
} else { // Allow create
fout.open(file,std::ios::binary|std::ios::out);
}
} catch (const std::fstream::failure& exc) {
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
// std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
#ifdef USE_MPI_IO
MPI_Abort(MPI_COMM_WORLD,1);
#else
@ -489,6 +501,7 @@ class BinaryIO {
exit(1);
#endif
}
offset = fout.tellp();
fout.close();
}
timer.Stop();
@ -523,7 +536,7 @@ class BinaryIO {
static inline void readLatticeObject(Lattice<vobj> &Umu,
std::string file,
munger munge,
Integer offset,
uint64_t offset,
const std::string &format,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
@ -533,7 +546,7 @@ class BinaryIO {
typedef typename vobj::Realified::scalar_type word; word w=0;
GridBase *grid = Umu._grid;
int lsites = grid->lSites();
uint64_t lsites = grid->lSites();
std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
@ -544,7 +557,7 @@ class BinaryIO {
GridStopWatch timer;
timer.Start();
parallel_for(int x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
vectorizeFromLexOrdArray(scalardata,Umu);
grid->Barrier();
@ -560,7 +573,7 @@ class BinaryIO {
static inline void writeLatticeObject(Lattice<vobj> &Umu,
std::string file,
munger munge,
Integer offset,
uint64_t offset,
const std::string &format,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
@ -569,7 +582,7 @@ class BinaryIO {
typedef typename vobj::scalar_object sobj;
typedef typename vobj::Realified::scalar_type word; word w=0;
GridBase *grid = Umu._grid;
int lsites = grid->lSites();
uint64_t lsites = grid->lSites();
std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
@ -580,7 +593,7 @@ class BinaryIO {
GridStopWatch timer; timer.Start();
unvectorizeToLexOrdArray(scalardata,Umu);
parallel_for(int x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
grid->Barrier();
timer.Stop();
@ -597,7 +610,7 @@ class BinaryIO {
static inline void readRNG(GridSerialRNG &serial,
GridParallelRNG &parallel,
std::string file,
Integer offset,
uint64_t offset,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
uint32_t &scidac_csumb)
@ -610,8 +623,8 @@ class BinaryIO {
std::string format = "IEEE32BIG";
GridBase *grid = parallel._grid;
int gsites = grid->gSites();
int lsites = grid->lSites();
uint64_t gsites = grid->gSites();
uint64_t lsites = grid->lSites();
uint32_t nersc_csum_tmp = 0;
uint32_t scidac_csuma_tmp = 0;
@ -626,7 +639,7 @@ class BinaryIO {
nersc_csum,scidac_csuma,scidac_csumb);
timer.Start();
parallel_for(int lidx=0;lidx<lsites;lidx++){
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
std::vector<RngStateType> tmp(RngStateCount);
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
parallel.SetState(tmp,lidx);
@ -659,7 +672,7 @@ class BinaryIO {
static inline void writeRNG(GridSerialRNG &serial,
GridParallelRNG &parallel,
std::string file,
Integer offset,
uint64_t offset,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
uint32_t &scidac_csumb)
@ -670,8 +683,8 @@ class BinaryIO {
typedef std::array<RngStateType,RngStateCount> RNGstate;
GridBase *grid = parallel._grid;
int gsites = grid->gSites();
int lsites = grid->lSites();
uint64_t gsites = grid->gSites();
uint64_t lsites = grid->lSites();
uint32_t nersc_csum_tmp;
uint32_t scidac_csuma_tmp;
@ -684,7 +697,7 @@ class BinaryIO {
timer.Start();
std::vector<RNGstate> iodata(lsites);
parallel_for(int lidx=0;lidx<lsites;lidx++){
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
std::vector<RngStateType> tmp(RngStateCount);
parallel.GetState(tmp,lidx);
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
@ -693,7 +706,6 @@ class BinaryIO {
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb);
iodata.resize(1);
{
std::vector<RngStateType> tmp(RngStateCount);

View File

@ -182,6 +182,11 @@ class GridLimeReader : public BinaryIO {
{
filename= _filename;
File = fopen(filename.c_str(), "r");
if (File == nullptr)
{
std::cerr << "cannot open file '" << filename << "'" << std::endl;
abort();
}
LimeR = limeCreateReader(File);
}
/////////////////////////////////////////////
@ -245,10 +250,8 @@ class GridLimeReader : public BinaryIO {
////////////////////////////////////////////
// Read a generic serialisable object
////////////////////////////////////////////
template<class serialisable_object>
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
void readLimeObject(std::string &xmlstring,std::string record_name)
{
std::string xmlstring;
// should this be a do while; can we miss a first record??
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
@ -262,18 +265,29 @@ class GridLimeReader : public BinaryIO {
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
XmlReader RD(&xmlc[0],"");
read(RD,object_name,object);
xmlstring = std::string(&xmlc[0]);
return;
}
}
assert(0);
}
template<class serialisable_object>
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
{
std::string xmlstring;
readLimeObject(xmlstring, record_name);
XmlReader RD(xmlstring, true, "");
read(RD,object_name,object);
}
};
class GridLimeWriter : public BinaryIO {
class GridLimeWriter : public BinaryIO
{
public:
///////////////////////////////////////////////////
// FIXME: format for RNG? Now just binary out instead
// FIXME: collective calls or not ?
@ -282,17 +296,24 @@ class GridLimeWriter : public BinaryIO {
FILE *File;
LimeWriter *LimeW;
std::string filename;
bool boss_node;
GridLimeWriter( bool isboss = true) {
boss_node = isboss;
}
void open(const std::string &_filename) {
filename= _filename;
File = fopen(filename.c_str(), "w");
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
if ( boss_node ) {
File = fopen(filename.c_str(), "w");
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
}
}
/////////////////////////////////////////////
// Close the file
/////////////////////////////////////////////
void close(void) {
fclose(File);
if ( boss_node ) {
fclose(File);
}
// limeDestroyWriter(LimeW);
}
///////////////////////////////////////////////////////
@ -300,77 +321,122 @@ class GridLimeWriter : public BinaryIO {
///////////////////////////////////////////////////////
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
{
LimeRecordHeader *h;
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
assert(limeWriteRecordHeader(h, LimeW) >= 0);
limeDestroyHeader(h);
if ( boss_node ) {
LimeRecordHeader *h;
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
assert(limeWriteRecordHeader(h, LimeW) >= 0);
limeDestroyHeader(h);
}
return LIME_SUCCESS;
}
////////////////////////////////////////////
// Write a generic serialisable object
////////////////////////////////////////////
template<class serialisable_object>
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name)
void writeLimeObject(int MB,int ME,XmlWriter &writer,std::string object_name,std::string record_name)
{
std::string xmlstring;
{
XmlWriter WR("","");
write(WR,object_name,object);
xmlstring = WR.XmlString();
}
// std::cout << "WriteLimeObject" << record_name <<std::endl;
uint64_t nbytes = xmlstring.size();
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
int err;
LimeRecordHeader *h = limeCreateHeader(MB, ME,const_cast<char *>(record_name.c_str()), nbytes);
assert(h!= NULL);
if ( boss_node ) {
std::string xmlstring = writer.docString();
err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
limeDestroyHeader(h);
// std::cout << " File offset is now"<<ftello(File) << std::endl;
// std::cout << "WriteLimeObject" << record_name <<std::endl;
uint64_t nbytes = xmlstring.size();
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
int err;
LimeRecordHeader *h = limeCreateHeader(MB, ME,const_cast<char *>(record_name.c_str()), nbytes);
assert(h!= NULL);
err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
limeDestroyHeader(h);
}
}
////////////////////////////////////////////
template<class serialisable_object>
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, const unsigned int scientificPrec = 0)
{
XmlWriter WR("","");
if (scientificPrec)
{
WR.scientificFormat(true);
WR.setPrecision(scientificPrec);
}
write(WR,object_name,object);
writeLimeObject(MB, ME, WR, object_name, record_name);
}
////////////////////////////////////////////////////
// Write a generic lattice field and csum
////////////////////////////////////////////
// This routine is Collectively called by all nodes
// in communicator used by the field._grid
////////////////////////////////////////////////////
template<class vobj>
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
{
////////////////////////////////////////////
// Create record header
////////////////////////////////////////////
typedef typename vobj::scalar_object sobj;
int err;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
////////////////////////////////////////////////////////////////////
// NB: FILE and iostream are jointly writing disjoint sequences in the
// the same file through different file handles (integer units).
//
// These are both buffered, so why I think this code is right is as follows.
//
// i) write record header to FILE *File, telegraphing the size.
// ii) ftello reads the offset from FILE *File .
// i) write record header to FILE *File, telegraphing the size; flush
// ii) ftello reads the offset from FILE *File .
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
// Closes iostream and flushes.
// iv) fseek on FILE * to end of this disjoint section.
// v) Continue writing scidac record.
////////////////////////////////////////////////////////////////////
uint64_t offset = ftello(File);
// std::cout << " Writing to offset "<<offset << std::endl;
GridBase *grid = field._grid;
assert(boss_node == field._grid->IsBoss() );
////////////////////////////////////////////
// Create record header
////////////////////////////////////////////
typedef typename vobj::scalar_object sobj;
int err;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
uint64_t PayloadSize = sizeof(sobj) * grid->_gsites;
if ( boss_node ) {
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
fflush(File);
}
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
////////////////////////////////////////////////
// Check all nodes agree on file position
////////////////////////////////////////////////
uint64_t offset1;
if ( boss_node ) {
offset1 = ftello(File);
}
grid->Broadcast(0,(void *)&offset1,sizeof(offset1));
///////////////////////////////////////////
// The above is collective. Write by other means into the binary record
///////////////////////////////////////////
std::string format = getFormatString<vobj>();
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
// fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<<offset << std::endl;
err=limeWriterCloseRecord(LimeW); assert(err>=0);
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
///////////////////////////////////////////
// Wind forward and close the record
///////////////////////////////////////////
if ( boss_node ) {
fseek(File,0,SEEK_END);
uint64_t offset2 = ftello(File); // std::cout << " now at offset "<<offset2 << std::endl;
assert( (offset2-offset1) == PayloadSize);
}
/////////////////////////////////////////////////////////////
// Check MPI-2 I/O did what we expect to file
/////////////////////////////////////////////////////////////
if ( boss_node ) {
err=limeWriterCloseRecord(LimeW); assert(err>=0);
}
////////////////////////////////////////
// Write checksum element, propagaing forward from the BinaryIO
// Always pair a checksum with a binary object, and close message
@ -380,26 +446,32 @@ class GridLimeWriter : public BinaryIO {
std::stringstream streamb; streamb << std::hex << scidac_csumb;
checksum.suma= streama.str();
checksum.sumb= streamb.str();
// std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
if ( boss_node ) {
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
}
}
};
class ScidacWriter : public GridLimeWriter {
public:
template<class SerialisableUserFile>
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
{
scidacFile _scidacFile(grid);
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
}
ScidacWriter(bool isboss =true ) : GridLimeWriter(isboss) { };
template<class SerialisableUserFile>
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
{
scidacFile _scidacFile(grid);
if ( this->boss_node ) {
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
}
}
////////////////////////////////////////////////
// Write generic lattice field in scidac format
////////////////////////////////////////////////
template <class vobj, class userRecord>
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
const unsigned int recordScientificPrec = 0)
{
GridBase * grid = field._grid;
@ -415,9 +487,12 @@ class ScidacWriter : public GridLimeWriter {
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
if ( this->boss_node ) {
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML), recordScientificPrec);
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
}
// Collective call
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
}
};
@ -484,6 +559,8 @@ class ScidacReader : public GridLimeReader {
class IldgWriter : public ScidacWriter {
public:
IldgWriter(bool isboss) : ScidacWriter(isboss) {};
///////////////////////////////////
// A little helper
@ -568,7 +645,6 @@ class IldgWriter : public ScidacWriter {
writeLimeIldgLFN(header.ildg_lfn); // rec
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
// limeDestroyWriter(LimeW);
fclose(File);
}
};
@ -644,9 +720,11 @@ class IldgReader : public GridLimeReader {
//////////////////////////////////
// ILDG format record
std::string xmlstring(&xmlc[0]);
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
XmlReader RD(&xmlc[0],"");
XmlReader RD(xmlstring, true, "");
read(RD,"ildgFormat",ildgFormat_);
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
@ -661,13 +739,13 @@ class IldgReader : public GridLimeReader {
}
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
FieldMetaData_.ildg_lfn = std::string(&xmlc[0]);
FieldMetaData_.ildg_lfn = xmlstring;
found_ildgLFN = 1;
}
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
XmlReader RD(&xmlc[0],"");
XmlReader RD(xmlstring, true, "");
read(RD,"FieldMetaData",FieldMetaData_);
format = FieldMetaData_.floating_point;
@ -681,18 +759,17 @@ class IldgReader : public GridLimeReader {
}
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
std::string xmls(&xmlc[0]);
// is it a USQCD info field
if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {
if ( xmlstring.find(std::string("usqcdInfo")) != std::string::npos ) {
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
XmlReader RD(&xmlc[0],"");
XmlReader RD(xmlstring, true, "");
read(RD,"usqcdInfo",usqcdInfo_);
found_usqcdInfo = 1;
}
}
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
XmlReader RD(&xmlc[0],"");
XmlReader RD(xmlstring, true, "");
read(RD,"scidacChecksum",scidacChecksum_);
found_scidacChecksum = 1;
}

View File

@ -136,8 +136,9 @@ struct scidacRecord : Serializable {
int, typesize,
int, datacount);
scidacRecord() { version =1.0; }
scidacRecord()
: version(1.0), recordtype(0), colors(0), spins(0), typesize(0), datacount(0)
{}
};
////////////////////////

View File

@ -81,18 +81,16 @@ namespace Grid {
std::string, creation_date,
std::string, archive_date,
std::string, floating_point);
FieldMetaData(void) {
nd=4;
dimension.resize(4);
boundary.resize(4);
scidac_checksuma=0;
scidac_checksumb=0;
checksum=0;
}
// WARNING: non-initialised values might lead to twisted parallel IO
// issues, std::string are fine because they initliase to size 0
// as per C++ standard.
FieldMetaData(void)
: nd(4), dimension(4,0), boundary(4, ""), data_start(0),
link_trace(0.), plaquette(0.), checksum(0),
scidac_checksuma(0), scidac_checksumb(0), sequence_number(0)
{}
};
namespace QCD {
using namespace Grid;

View File

@ -57,7 +57,7 @@ namespace Grid {
// for the header-reader
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
{
int offset=0;
uint64_t offset=0;
std::map<std::string,std::string> header;
std::string line;
@ -139,7 +139,7 @@ namespace Grid {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
GridBase *grid = Umu._grid;
int offset = readHeader(file,Umu._grid,header);
uint64_t offset = readHeader(file,Umu._grid,header);
FieldMetaData clone(header);
@ -236,21 +236,25 @@ namespace Grid {
GaugeStatistics(Umu,header);
MachineCharacteristics(header);
int offset;
truncate(file);
uint64_t offset;
// Sod it -- always write 3x3 double
header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE_3x3");
GaugeSimpleUnmunger<fobj3D,sobj> munge;
offset = writeHeader(header,file);
if ( grid->IsBoss() ) {
truncate(file);
offset = writeHeader(header,file);
}
grid->Broadcast(0,(void *)&offset,sizeof(offset));
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb);
header.checksum = nersc_csum;
writeHeader(header,file);
if ( grid->IsBoss() ) {
writeHeader(header,file);
}
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
<<std::hex<<header.checksum
@ -278,7 +282,7 @@ namespace Grid {
header.plaquette=0.0;
MachineCharacteristics(header);
int offset;
uint64_t offset;
#ifdef RNG_RANLUX
header.floating_point = std::string("UINT64");
@ -293,12 +297,18 @@ namespace Grid {
header.data_type = std::string("SITMO");
#endif
truncate(file);
offset = writeHeader(header,file);
if ( grid->IsBoss() ) {
truncate(file);
offset = writeHeader(header,file);
}
grid->Broadcast(0,(void *)&offset,sizeof(offset));
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
header.checksum = nersc_csum;
offset = writeHeader(header,file);
if ( grid->IsBoss() ) {
offset = writeHeader(header,file);
}
std::cout<<GridLogMessage
<<"Written NERSC RNG STATE "<<file<< " checksum "
@ -313,7 +323,7 @@ namespace Grid {
GridBase *grid = parallel._grid;
int offset = readHeader(file,grid,header);
uint64_t offset = readHeader(file,grid,header);
FieldMetaData clone(header);

View File

@ -49,7 +49,8 @@ inline double usecond(void) {
typedef std::chrono::system_clock GridClock;
typedef std::chrono::time_point<GridClock> GridTimePoint;
typedef std::chrono::milliseconds GridTime;
typedef std::chrono::milliseconds GridMillisecs;
typedef std::chrono::microseconds GridTime;
typedef std::chrono::microseconds GridUsecs;
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
@ -57,6 +58,11 @@ inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milli
stream << time.count()<<" ms";
return stream;
}
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::microseconds & time)
{
stream << time.count()<<" usec";
return stream;
}
class GridStopWatch {
private:
@ -96,6 +102,9 @@ public:
assert(running == false);
return (uint64_t) accumulator.count();
}
bool isRunning(void){
return running;
}
};
}

View File

@ -1,7 +1,7 @@
/**
* pugixml parser - version 1.6
* pugixml parser - version 1.9
* --------------------------------------------------------
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at http://pugixml.org/
*
* This library is distributed under the MIT License. See notice at the end
@ -17,6 +17,9 @@
// Uncomment this to enable wchar_t mode
// #define PUGIXML_WCHAR_MODE
// Uncomment this to enable compact mode
// #define PUGIXML_COMPACT
// Uncomment this to disable XPath
// #define PUGIXML_NO_XPATH
@ -46,7 +49,7 @@
#endif
/**
* Copyright (c) 2006-2015 Arseny Kapoulkine
* Copyright (c) 2006-2018 Arseny Kapoulkine
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -59,7 +62,7 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
/**
* pugixml parser - version 1.6
* pugixml parser - version 1.9
* --------------------------------------------------------
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at http://pugixml.org/
*
* This library is distributed under the MIT License. See notice at the end
@ -13,7 +13,7 @@
#ifndef PUGIXML_VERSION
// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
# define PUGIXML_VERSION 160
# define PUGIXML_VERSION 190
#endif
// Include user configuration file (this can define various configuration macros)
@ -72,6 +72,44 @@
# endif
#endif
// If the platform is known to have move semantics support, compile move ctor/operator implementation
#ifndef PUGIXML_HAS_MOVE
# if __cplusplus >= 201103
# define PUGIXML_HAS_MOVE
# elif defined(_MSC_VER) && _MSC_VER >= 1600
# define PUGIXML_HAS_MOVE
# endif
#endif
// If C++ is 2011 or higher, add 'noexcept' specifiers
#ifndef PUGIXML_NOEXCEPT
# if __cplusplus >= 201103
# define PUGIXML_NOEXCEPT noexcept
# elif defined(_MSC_VER) && _MSC_VER >= 1900
# define PUGIXML_NOEXCEPT noexcept
# else
# define PUGIXML_NOEXCEPT
# endif
#endif
// Some functions can not be noexcept in compact mode
#ifdef PUGIXML_COMPACT
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT
#else
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT PUGIXML_NOEXCEPT
#endif
// If C++ is 2011 or higher, add 'override' qualifiers
#ifndef PUGIXML_OVERRIDE
# if __cplusplus >= 201103
# define PUGIXML_OVERRIDE override
# elif defined(_MSC_VER) && _MSC_VER >= 1700
# define PUGIXML_OVERRIDE override
# else
# define PUGIXML_OVERRIDE
# endif
#endif
// Character interface macros
#ifdef PUGIXML_WCHAR_MODE
# define PUGIXML_TEXT(t) L ## t
@ -133,13 +171,13 @@ namespace pugi
// This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
const unsigned int parse_eol = 0x0020;
// This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
const unsigned int parse_wconv_attribute = 0x0040;
// This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
const unsigned int parse_wnorm_attribute = 0x0080;
// This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
const unsigned int parse_declaration = 0x0100;
@ -158,6 +196,11 @@ namespace pugi
// is a valid document. This flag is off by default.
const unsigned int parse_fragment = 0x1000;
// This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of
// the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments.
// This flag is off by default.
const unsigned int parse_embed_pcdata = 0x2000;
// The default parsing mode.
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
@ -184,16 +227,16 @@ namespace pugi
};
// Formatting flags
// Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
const unsigned int format_indent = 0x01;
// Write encoding-specific BOM to the output stream. This flag is off by default.
const unsigned int format_write_bom = 0x02;
// Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
const unsigned int format_raw = 0x04;
// Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
const unsigned int format_no_declaration = 0x08;
@ -206,6 +249,9 @@ namespace pugi
// Write every attribute on a new line with appropriate indentation. This flag is off by default.
const unsigned int format_indent_attributes = 0x40;
// Don't output empty element tags, instead writing an explicit start and end tag even if there are no children. This flag is off by default.
const unsigned int format_no_empty_element_tags = 0x80;
// The default set of formatting flags.
// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
const unsigned int format_default = format_indent;
@ -225,7 +271,7 @@ namespace pugi
class xml_node;
class xml_text;
#ifndef PUGIXML_NO_XPATH
class xpath_node;
class xpath_node_set;
@ -268,7 +314,7 @@ namespace pugi
// Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
xml_writer_file(void* file);
virtual void write(const void* data, size_t size);
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
private:
void* file;
@ -283,7 +329,7 @@ namespace pugi
xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
virtual void write(const void* data, size_t size);
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
private:
std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
@ -299,13 +345,13 @@ namespace pugi
private:
xml_attribute_struct* _attr;
typedef void (*unspecified_bool_type)(xml_attribute***);
public:
// Default constructor. Constructs an empty attribute.
xml_attribute();
// Constructs attribute from internal pointer
explicit xml_attribute(xml_attribute_struct* attr);
@ -354,6 +400,8 @@ namespace pugi
// Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
bool set_value(int rhs);
bool set_value(unsigned int rhs);
bool set_value(long rhs);
bool set_value(unsigned long rhs);
bool set_value(double rhs);
bool set_value(float rhs);
bool set_value(bool rhs);
@ -367,6 +415,8 @@ namespace pugi
xml_attribute& operator=(const char_t* rhs);
xml_attribute& operator=(int rhs);
xml_attribute& operator=(unsigned int rhs);
xml_attribute& operator=(long rhs);
xml_attribute& operator=(unsigned long rhs);
xml_attribute& operator=(double rhs);
xml_attribute& operator=(float rhs);
xml_attribute& operator=(bool rhs);
@ -417,7 +467,7 @@ namespace pugi
// Borland C++ workaround
bool operator!() const;
// Comparison operators (compares wrapped node pointers)
bool operator==(const xml_node& r) const;
bool operator!=(const xml_node& r) const;
@ -438,7 +488,7 @@ namespace pugi
// Get node value, or "" if node is empty or it has no value
// Note: For <node>text</node> node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes.
const char_t* value() const;
// Get attribute list
xml_attribute first_attribute() const;
xml_attribute last_attribute() const;
@ -450,7 +500,7 @@ namespace pugi
// Get next/previous sibling in the children list of the parent node
xml_node next_sibling() const;
xml_node previous_sibling() const;
// Get parent node
xml_node parent() const;
@ -478,7 +528,7 @@ namespace pugi
// Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
bool set_name(const char_t* rhs);
bool set_value(const char_t* rhs);
// Add attribute with specified name. Returns added attribute, or empty attribute on errors.
xml_attribute append_attribute(const char_t* name);
xml_attribute prepend_attribute(const char_t* name);
@ -532,11 +582,11 @@ namespace pugi
template <typename Predicate> xml_attribute find_attribute(Predicate pred) const
{
if (!_root) return xml_attribute();
for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
if (pred(attrib))
return attrib;
return xml_attribute();
}
@ -544,11 +594,11 @@ namespace pugi
template <typename Predicate> xml_node find_child(Predicate pred) const
{
if (!_root) return xml_node();
for (xml_node node = first_child(); node; node = node.next_sibling())
if (pred(node))
return node;
return xml_node();
}
@ -558,7 +608,7 @@ namespace pugi
if (!_root) return xml_node();
xml_node cur = first_child();
while (cur._root && cur._root != _root)
{
if (pred(cur)) return cur;
@ -590,7 +640,7 @@ namespace pugi
// Recursively traverse subtree with xml_tree_walker
bool traverse(xml_tree_walker& walker);
#ifndef PUGIXML_NO_XPATH
// Select single node by evaluating XPath query. Returns first node from the resulting node set.
xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const;
@ -601,11 +651,11 @@ namespace pugi
xpath_node_set select_nodes(const xpath_query& query) const;
// (deprecated: use select_node instead) Select single node by evaluating XPath query.
xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
xpath_node select_single_node(const xpath_query& query) const;
PUGIXML_DEPRECATED xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
PUGIXML_DEPRECATED xpath_node select_single_node(const xpath_query& query) const;
#endif
// Print subtree using a writer object
void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
@ -701,6 +751,8 @@ namespace pugi
// Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
bool set(int rhs);
bool set(unsigned int rhs);
bool set(long rhs);
bool set(unsigned long rhs);
bool set(double rhs);
bool set(float rhs);
bool set(bool rhs);
@ -714,6 +766,8 @@ namespace pugi
xml_text& operator=(const char_t* rhs);
xml_text& operator=(int rhs);
xml_text& operator=(unsigned int rhs);
xml_text& operator=(long rhs);
xml_text& operator=(unsigned long rhs);
xml_text& operator=(double rhs);
xml_text& operator=(float rhs);
xml_text& operator=(bool rhs);
@ -867,11 +921,11 @@ namespace pugi
private:
int _depth;
protected:
// Get current traversal depth
int depth() const;
public:
xml_tree_walker();
virtual ~xml_tree_walker();
@ -942,13 +996,14 @@ namespace pugi
char_t* _buffer;
char _memory[192];
// Non-copyable semantics
xml_document(const xml_document&);
const xml_document& operator=(const xml_document&);
xml_document& operator=(const xml_document&);
void create();
void destroy();
void _create();
void _destroy();
void _move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
public:
// Default constructor, makes empty document
@ -957,6 +1012,12 @@ namespace pugi
// Destructor, invalidates all node/attribute handles to this document
~xml_document();
#ifdef PUGIXML_HAS_MOVE
// Move semantics support
xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
xml_document& operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
#endif
// Removes all nodes, leaving the empty document
void reset();
@ -970,7 +1031,7 @@ namespace pugi
#endif
// (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
PUGIXML_DEPRECATED xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
// Load document from zero-terminated string. No encoding conversions are applied.
xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
@ -1051,7 +1112,7 @@ namespace pugi
// Non-copyable semantics
xpath_variable(const xpath_variable&);
xpath_variable& operator=(const xpath_variable&);
public:
// Get variable name
const char_t* name() const;
@ -1095,10 +1156,10 @@ namespace pugi
xpath_variable_set(const xpath_variable_set& rhs);
xpath_variable_set& operator=(const xpath_variable_set& rhs);
#if __cplusplus >= 201103
#ifdef PUGIXML_HAS_MOVE
// Move semantics support
xpath_variable_set(xpath_variable_set&& rhs);
xpath_variable_set& operator=(xpath_variable_set&& rhs);
xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
xpath_variable_set& operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
#endif
// Add a new variable or get the existing one, if the types match
@ -1139,29 +1200,29 @@ namespace pugi
// Destructor
~xpath_query();
#if __cplusplus >= 201103
#ifdef PUGIXML_HAS_MOVE
// Move semantics support
xpath_query(xpath_query&& rhs);
xpath_query& operator=(xpath_query&& rhs);
xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT;
xpath_query& operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT;
#endif
// Get query expression return type
xpath_value_type return_type() const;
// Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
bool evaluate_boolean(const xpath_node& n) const;
// Evaluate expression as double value in the specified context; performs type conversion if necessary.
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
double evaluate_number(const xpath_node& n) const;
#ifndef PUGIXML_NO_STL
// Evaluate expression as string value in the specified context; performs type conversion if necessary.
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
string_t evaluate_string(const xpath_node& n) const;
#endif
// Evaluate expression as string value in the specified context; performs type conversion if necessary.
// At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
@ -1188,7 +1249,7 @@ namespace pugi
// Borland C++ workaround
bool operator!() const;
};
#ifndef PUGIXML_NO_EXCEPTIONS
// XPath exception class
class PUGIXML_CLASS xpath_exception: public std::exception
@ -1201,26 +1262,26 @@ namespace pugi
explicit xpath_exception(const xpath_parse_result& result);
// Get error message
virtual const char* what() const throw();
virtual const char* what() const throw() PUGIXML_OVERRIDE;
// Get parse result
const xpath_parse_result& result() const;
};
#endif
// XPath node class (either xml_node or xml_attribute)
class PUGIXML_CLASS xpath_node
{
private:
xml_node _node;
xml_attribute _attribute;
typedef void (*unspecified_bool_type)(xpath_node***);
public:
// Default constructor; constructs empty XPath node
xpath_node();
// Construct XPath node from XML node/attribute
xpath_node(const xml_node& node);
xpath_node(const xml_attribute& attribute, const xml_node& parent);
@ -1228,13 +1289,13 @@ namespace pugi
// Get node/attribute, if any
xml_node node() const;
xml_attribute attribute() const;
// Get parent of contained node/attribute
xml_node parent() const;
// Safe bool conversion operator
operator unspecified_bool_type() const;
// Borland C++ workaround
bool operator!() const;
@ -1260,13 +1321,13 @@ namespace pugi
type_sorted, // Sorted by document order (ascending)
type_sorted_reverse // Sorted by document order (descending)
};
// Constant iterator type
typedef const xpath_node* const_iterator;
// We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work
typedef const xpath_node* iterator;
// Default constructor. Constructs empty set.
xpath_node_set();
@ -1275,49 +1336,49 @@ namespace pugi
// Destructor
~xpath_node_set();
// Copy constructor/assignment operator
xpath_node_set(const xpath_node_set& ns);
xpath_node_set& operator=(const xpath_node_set& ns);
#if __cplusplus >= 201103
#ifdef PUGIXML_HAS_MOVE
// Move semantics support
xpath_node_set(xpath_node_set&& rhs);
xpath_node_set& operator=(xpath_node_set&& rhs);
xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
xpath_node_set& operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
#endif
// Get collection type
type_t type() const;
// Get collection size
size_t size() const;
// Indexing operator
const xpath_node& operator[](size_t index) const;
// Collection iterators
const_iterator begin() const;
const_iterator end() const;
// Sort the collection in ascending/descending order by document order
void sort(bool reverse = false);
// Get first node in the collection by document order
xpath_node first() const;
// Check if collection is empty
bool empty() const;
private:
type_t _type;
xpath_node _storage;
xpath_node* _begin;
xpath_node* _end;
void _assign(const_iterator begin, const_iterator end, type_t type);
void _move(xpath_node_set& rhs);
void _move(xpath_node_set& rhs) PUGIXML_NOEXCEPT;
};
#endif
@ -1325,7 +1386,7 @@ namespace pugi
// Convert wide string to UTF8
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
// Convert UTF8 to wide string
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
@ -1333,13 +1394,13 @@ namespace pugi
// Memory allocation function interface; returns pointer to allocated memory or NULL on failure
typedef void* (*allocation_function)(size_t size);
// Memory deallocation function interface
typedef void (*deallocation_function)(void* ptr);
// Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
// Get current memory management functions
allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
@ -1375,7 +1436,7 @@ namespace std
#endif
/**
* Copyright (c) 2006-2015 Arseny Kapoulkine
* Copyright (c) 2006-2018 Arseny Kapoulkine
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -1388,7 +1449,7 @@ namespace std
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

View File

@ -1,6 +1,6 @@
pugixml 1.6 - an XML processing library
pugixml 1.9 - an XML processing library
Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
Report bugs and download new versions at http://pugixml.org/
This is the distribution of pugixml, which is a C++ XML processing library,
@ -28,7 +28,7 @@ The distribution contains the following folders:
This library is distributed under the MIT License:
Copyright (c) 2006-2015 Arseny Kapoulkine
Copyright (c) 2006-2018 Arseny Kapoulkine
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation

View File

@ -90,17 +90,20 @@ namespace QCD {
// That probably makes for GridRedBlack4dCartesian grid.
// s,sp,c,spc,lc
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
template<typename vtype> using iSpinColourSpinColourMatrix = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
@ -127,10 +130,28 @@ namespace QCD {
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
// SpinColourSpinColour matrix
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
// SpinColourSpinColour matrix
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
// LorentzColour
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
@ -229,6 +250,9 @@ namespace QCD {
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
typedef Lattice<vSpinColourSpinColourMatrix> LatticeSpinColourSpinColourMatrix;
typedef Lattice<vSpinColourSpinColourMatrixF> LatticeSpinColourSpinColourMatrixF;
typedef Lattice<vSpinColourSpinColourMatrixD> LatticeSpinColourSpinColourMatrixD;
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;

Some files were not shown because too many files have changed in this diff Show More