1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-10-24 17:54:47 +01:00

Compare commits

..

731 Commits

Author SHA1 Message Date
de8b2dcca3 Hadrons: faster A2A matrix load 2019-01-11 16:12:49 +00:00
efe000341d Hadrons: contractor fixes 2019-01-11 16:12:16 +00:00
11086c5c25 Hadrons: first stab at MPI contractor 2019-01-10 16:29:57 +00:00
Peter Boyle
91a7fe247b Merge branch 'DanielRichtmann-feature/wilsonmg' into develop 2019-01-02 14:40:31 +00:00
Peter Boyle
8a1be021d3 Merge branch 'feature/wilsonmg' of https://github.com/DanielRichtmann/Grid into DanielRichtmann-feature/wilsonmg 2019-01-02 14:39:59 +00:00
fd66325321 pure QED test and copyright update 2018-12-14 17:39:11 +00:00
c637c0c48c James H.'s code for general size Wilson loops 2018-12-14 17:37:09 +00:00
c4b472176c Photon code fix 2018-12-14 17:36:38 +00:00
856476a890 big cleanup of the Photon class + QED Coulomb gauge 2018-12-13 21:52:38 +00:00
c509bd3fe2 Merge branch 'feature/resilient-io' into develop 2018-12-01 12:57:43 +00:00
49b934310b resilient I/O fix 2018-11-27 20:17:09 +00:00
01e8cf5017 Merge branch 'develop' into feature/resilient-io 2018-11-27 19:09:59 +00:00
12f4499502 HDF5 serialiser fix 2018-11-27 19:09:50 +00:00
05aec72887 Hadrons: application parameter for resilient I/O 2018-11-27 18:46:43 +00:00
136d3802cb binary parallel IO can do read tests and eventually re-write in case of failure 2018-11-27 18:38:24 +00:00
a4c55406ed checksummed HDF5 IO 2018-11-27 17:43:19 +00:00
c7f33ca2a8 Revert "Hadrons: A2A vector write can fail and retry"
This reverts commit 10fc263675.
2018-11-27 17:27:26 +00:00
0e3035c51d Revert "optional non-fatal checksum fail in Lime lattice read (with error codes)"
This reverts commit bccfd4cbb3.
2018-11-27 17:27:20 +00:00
10fc263675 Hadrons: A2A vector write can fail and retry 2018-11-26 19:47:03 +00:00
bccfd4cbb3 optional non-fatal checksum fail in Lime lattice read (with error codes) 2018-11-26 19:45:51 +00:00
0b50d4a328 log time fix 2018-11-23 15:51:27 +00:00
e232257cb6 Hadrons: A2AAslashVector modul cleaning and renaming 2018-11-22 19:43:49 +00:00
09451b5e48 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-11-22 15:45:24 +00:00
6364aa8acf Merge branch 'feature/contractor' into develop 2018-11-22 15:44:46 +00:00
b9e84ecab7 Hadrons: minor code cleaning 2018-11-22 15:44:30 +00:00
41032fef44 Optional RW mode for Hdf5Reader 2018-11-21 18:36:50 +00:00
d77bc88170 Optional support for faster CRC32C checksum through Intel IPP 2018-11-19 17:21:53 +00:00
494b3c9e57 Hadrons: contractor more IO fix 2018-11-19 16:26:53 +00:00
2ba19a9e07 Hadrons: contractor IO fix 2018-11-19 16:17:51 +00:00
5d7cc29eaf Hadrons: contractor token @traj@ for trajectory number in input file 2018-11-19 16:04:01 +00:00
f22a27d7f9 Hadrons: contractor trajectory loop and file output 2018-11-19 15:45:04 +00:00
Peter Boyle
33a0bbb17b Const correctness 2018-11-19 11:27:57 +00:00
f592ec8baa Hadrons: contractor performance fix 2018-11-16 20:59:49 +00:00
8b007b5c24 Hadrons: remove the use of OpenMP reductions 2018-11-16 20:00:29 +00:00
9bb170576d Merge pull request #177 from guelpers/develop
Hadrons module to electrify a gauge
2018-11-14 16:04:09 +00:00
Vera Guelpers
a7e3977b75 Merge remote-tracking branch 'upstream/develop' into develop 2018-11-13 14:56:23 +00:00
Vera Guelpers
995f20e45d Hadrons: some renamings 2018-11-13 14:54:48 +00:00
Vera Guelpers
d058b4e681 Merge branch 'feature/seqA2A' into develop 2018-11-13 13:27:24 +00:00
8e0d2f3402 Hadrons: support for twisted boundary conditions 2018-11-12 17:16:18 +00:00
2ac57370f1 Hadrons: contractor translation average normalisation 2018-11-12 16:04:35 +00:00
344e832a4e Hadrons: contractor faster transpose and finer timings 2018-11-12 15:59:54 +00:00
cfe281f1a4 Hadrons: diskvectors measure hash performance in debug output 2018-11-12 15:59:11 +00:00
f5422c7334 Hadrons: more contractor instrumentation 2018-11-09 16:23:53 +00:00
68c76a410d Hadrons: more contractor improvements 2018-11-08 19:24:29 +00:00
69b6ba0a73 Hadrons: contractor fixes and improvements 2018-11-08 18:46:28 +00:00
65349b07a7 Hadrons: simpler A2A perf functions 2018-11-08 18:44:44 +00:00
7cd9914f0e Hadrons: automatically resize output in MKL A2A matrix kernels 2018-11-08 17:40:57 +00:00
Peter Boyle
f3f24b3017 Optional Twisted BC's added, in "DoubleStore" for WilsonImpl.
Untested but doesn't affect answers when twists are all zero. The zero is the default behaviour
for ImplParams.
2018-11-08 12:55:25 +00:00
Vera Guelpers
8ef4657805 Merge remote-tracking branch 'upstream/develop' into feature/seqA2A 2018-11-08 09:00:06 +00:00
Vera Guelpers
78c1086f8b Hadrons: sequential Aslash insertion and propagator on A2A vector 2018-11-08 08:58:09 +00:00
Peter Boyle
68c13045d6 Added a test for Felix and Michael to look at 2018-11-07 23:40:15 +00:00
Peter Boyle
e9b6f58fdc Allow shrinking machine in orthog direction for extract slice local 2018-11-07 23:39:18 +00:00
Peter Boyle
839605c45c Verbose reduce 2018-11-07 23:38:46 +00:00
1ff1422e07 Hadrons: contractor lighter output 2018-11-07 20:02:53 +00:00
32376f0437 Hadrons: contractor performances 2018-11-07 19:59:11 +00:00
0c6e581336 Hadrons: first stab at general contraction code, needs serious testing 2018-11-07 19:16:55 +00:00
Vera Guelpers
e0a79a5bbf Hadrons: PR#177: Electrify gauge: Single Precision fix 2018-11-07 15:01:22 +00:00
Vera Guelpers
4c016cc1a4 Merge remote-tracking branch 'upstream/develop' into develop 2018-11-07 14:03:12 +00:00
Peter Boyle
2205b1e63e Add CXX to grid-config 2018-11-07 13:32:46 +00:00
Peter Boyle
6f421c7a6f Block solver in the SchurRedBlack plus timing report cleaner 2018-11-07 12:26:56 +00:00
Peter Boyle
b62b9ac214 Patch to broken assertion 2018-11-06 22:18:17 +00:00
88d9922e4f Hadrons: fast A2A matrix contraction kernels 2018-11-06 19:49:09 +00:00
9734e3ee58 Hadrons: (somewhat) faster build 2018-11-06 19:47:41 +00:00
Peter Boyle
8c3a599148 Block solver test 2018-11-06 16:44:58 +00:00
Azusa Yamaguchi
4a47b11876 Block CG improvements to develop 2018-11-06 12:49:05 +00:00
Vera Guelpers
f1382cf81d Merge remote-tracking branch 'upstream/develop' into develop 2018-11-06 10:29:52 +00:00
Vera Guelpers
85699daef2 Hadrons: Module to electrify a gauge field 2018-11-06 10:27:18 +00:00
1651111d18 Hadrons: final, portable form of the contractor benchmark 2018-11-05 21:29:13 +00:00
1ed4ea344d Merge branch 'develop' into feature/contractor 2018-11-05 11:42:02 +00:00
8f514ae550 Hadrons: Lanczos 32bit IO 2018-11-05 11:41:10 +00:00
4a7415e83c Hadrons: contractor benchmark update 2018-10-23 21:00:54 +01:00
0ffcfea724 Hadrons: contractor benchmark 2018-10-23 17:08:16 +01:00
febe41cc1d Hadrons: improvement on PR #176 2018-10-23 12:48:15 +01:00
62173395b8 Merge pull request #176 from guelpers/develop
Hadrons: full volume noise source for A2A
2018-10-23 12:29:35 +01:00
b48611b80f Merge branch 'develop' into feature/contractor 2018-10-22 18:27:18 +01:00
6b559d68aa Hadrons: eigenpack converter can do test reads 2018-10-22 11:10:18 +01:00
1982cc58dd Hadrons: A2A vectors I/O filename fix 2018-10-21 01:20:05 +01:00
2e2e5ce596 SciDAC I/O print data checksums 2018-10-19 20:36:32 +01:00
7d84dca8e9 Merge branch 'develop' into feature/contractor 2018-10-18 23:46:58 +01:00
2d3916418e Hadrons: more precision fix 2018-10-18 23:45:13 +01:00
21304e2139 Hadrons: fix to allow single-prec build again 2018-10-18 19:58:50 +01:00
7b850eb48b Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-10-18 19:46:25 +01:00
a3ace57e01 Hadrons copyright update 2018-10-18 19:46:11 +01:00
b1c3cbe35e Hadrons: A2A vectors I/O 2018-10-18 19:44:58 +01:00
f31d6bfec2 Hadrons: contractor cleaning and better error check 2018-10-18 17:50:35 +01:00
a7cfa26901 Hadrons: reverse A2A matrix load for better DiskVector cache reuse 2018-10-18 17:50:16 +01:00
f333f3e575 Hadrons: DiskVector save-on-eviction and faster CRC32 for Eigen matrices 2018-10-18 17:48:25 +01:00
2b4e253473 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-10-17 20:28:20 +01:00
0ba3d469c7 Benchmark IO in single and double precision 2018-10-17 20:27:34 +01:00
f709329d96 Hadrons: first version of a contractor utility 2018-10-17 20:26:48 +01:00
f05b25dae4 Hadrons: A2AMatrix load 2018-10-17 20:26:26 +01:00
3e1d268fa3 Hadrons: DiskVector optimisation 2018-10-17 20:25:32 +01:00
Vera Guelpers
109c74bed8 Hadrons: full volume noise source for A2A 2018-10-16 14:56:12 +01:00
3023287fd9 Hadrons: 3-index RO access to Eigen disk vector 2018-10-16 14:44:14 +01:00
b3d6805638 Merge branch 'feature/contractor' into develop 2018-10-16 11:29:37 +01:00
291bc2a1f0 IO benchmark on a list of directories 2018-10-15 17:25:08 +01:00
2f368c33fc Hadrons: copyright update 2018-10-15 15:51:45 +01:00
9592115341 Hadrons: NPR and gauge fixing linking fix 2018-10-15 15:49:42 +01:00
Peter Boyle
24c07694bc Mixed precision now supported in MADWF 2018-10-14 00:22:52 +01:00
Peter Boyle
f0229025e2 MADWF working across a range of actions 2018-10-13 19:55:03 +01:00
Peter Boyle
6de9a45a09 NPR first cut by Julia Kettle 2018-10-12 11:00:58 +01:00
Peter Boyle
03c3d495a2 First cut (non functional NPR code) developed by Julia Kettle 2018-10-12 10:59:33 +01:00
Peter Boyle
49f25e08e8 PauliVillars based 4D -> 5D reconstruction with Fourier Accelerated PV inverse
by Christoph. Differs from the one by Rudy in BFM since it vectorises the twisted
4D solves in pairs.
2018-10-11 12:35:32 +01:00
efc0c65056 Hadrons: DiskVector Eigen specialisation with binary I/O and sha256 correctness check 2018-10-08 19:02:00 +01:00
936eaac8e1 function to get the sha256 string 2018-10-08 19:00:50 +01:00
fe6a372f75 Hadrons: fixes and cleaning in the scalar SU(N) part 2018-10-08 15:14:08 +01:00
148fc052bd Hadrons: Aslash field, tested 2018-10-05 21:04:10 +01:00
c073341a10 Hadrons: more cleaning 2018-10-05 19:50:41 +01:00
78299daaac Hadrons: code cleaning 2018-10-05 16:47:52 +01:00
866449c804 Hadrons: integration of Peter's A2Autils 2018-10-05 16:42:44 +01:00
d69a52079f Merge remote-tracking branch 'gh/feature/a2a-integration' into feature/aslashfield 2018-10-05 15:39:09 +01:00
9f4f8a14a3 Hadrons: code cleaning 2018-10-05 15:38:01 +01:00
f6593dc881 Hadrons: A2A block performance counter fix 2018-10-05 15:11:01 +01:00
Peter Boyle
b46d31d4b6 MKL enable on Eigen if Grid is configured to use MKL 2018-10-05 11:29:40 +01:00
58567fc650 Hadrons: big update abstracting the block meson field routine, tested & working, performance counters broken and code dirty 2018-10-04 20:01:49 +01:00
Peter Boyle
7c57cac670 Adding A2A utils class for containing kernels. 2018-10-04 18:57:41 +01:00
d0b21bf1ff Merge branch 'feature/eigenpack-convert' into develop 2018-10-04 18:26:45 +01:00
a1825d1f59 Hadrons: final fix for multiprec eigenpacks 2018-10-04 18:25:26 +01:00
5a3e83ff7b Hadrons: new layer in eigenpacks class hierarchy 2018-10-03 14:45:01 +01:00
52569d98d8 Hadrons: multiprec eigenpack I/O fix 2018-10-03 14:24:43 +01:00
b351103c29 Hadrons: eigenpack load module with 32bit I/O 2018-10-02 21:07:56 +01:00
118cca4681 Hadrons: linking fix 2018-10-02 20:08:49 +01:00
44de727cd2 Hadrons: eigenpack support for multiprecision I/O 2018-10-02 19:51:09 +01:00
888ebc3cf9 Hadrons: better name for the EP converter 2018-10-02 15:22:18 +01:00
6c031a1b81 Merge branch 'feature/eigenpack-convert' into develop 2018-10-02 14:57:30 +01:00
02aa4bd762 Hadrons: cleaner eigenpack convert log 2018-10-02 13:43:25 +01:00
9aafa8ee60 Hadrons: eigenpack converter generalised for RB/5d grids 2018-10-02 13:34:17 +01:00
430b98b354 fix previous commit 2018-10-02 13:12:46 +01:00
84189867ef Hadrons: eigenpack converter with RB grids (to be generalised) 2018-10-02 13:05:05 +01:00
4ab8cfbe2a Hadrons: more verbose eigenpack convert 2018-10-02 12:24:45 +01:00
aadd9f4468 Eigenpack converter, to be tested, HadronsXmlRun moved to Utilities directory 2018-10-02 00:02:34 +01:00
8fbb27ce13 Hadrons: less code duplication in eigenpack IO 2018-10-01 20:15:21 +01:00
21bba95909 Hadrons: eigenpack metadata is no ignored anymore when reading 2018-10-01 19:33:45 +01:00
6448fe7121 More flexible XML control in Lime files 2018-10-01 19:32:50 +01:00
2458a11d1d Hadrons: precision cast module 2018-09-29 18:00:08 +01:00
d0ca7c3fe6 Hadrons: big update for getGrid, grids are now created automatically 2018-09-29 17:55:19 +01:00
57f899d79c Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-09-29 15:50:59 +01:00
e881a0c157 Merge commit 'beed527ea37c90fd5e19b82d326eb8adc8eba5ff' into develop 2018-09-29 15:50:21 +01:00
f411657118 JSON update 2018-09-29 15:48:05 +01:00
Peter Boyle
7458c6174b Use operator() for indexing internal indices 2018-09-27 06:42:02 +01:00
Peter Boyle
21b269d0f9 Move the Grid.pdf out of a deep directory 2018-09-27 06:36:25 +01:00
Peter Boyle
083af92ac2 Update from chulwoo ; high level link for Grid.pdf in documentation 2018-09-27 06:30:40 +01:00
Peter Boyle
2c162577b5 HMC documentation 2018-09-25 23:28:17 +01:00
Peter Boyle
b1c4e96382 Updates to actions etc.. 2018-09-24 22:10:30 +01:00
Peter Boyle
a55c6f34f3 Updated docs 2018-09-24 15:44:35 +01:00
Peter Boyle
beed527ea3 Carletons chapter 2018-09-24 15:09:51 +01:00
eaa633cf69 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-09-21 18:16:22 +01:00
c632455129 Hadrons: meson field IO fix 2018-09-21 18:16:01 +01:00
c012899ed5 Hadrons: big update after templating of get/createGrid 2018-09-21 18:15:33 +01:00
paboyle
8bab544c2f Updated manual pdf 2018-09-20 18:51:11 +01:00
paboyle
76fc06a5dc Updates with todo from Carleton 2018-09-20 18:50:11 +01:00
4af6c7e7aa Hadrons: copyright update 2018-09-14 12:51:48 +01:00
f60fbcfc4d Hadrons: mixed precision CG, to be tested 2018-09-14 12:47:55 +01:00
464c81706e Hadrons: defaults Impls for different precisions 2018-09-14 12:46:43 +01:00
408130b808 Hadrons: header list fix 2018-09-10 17:38:54 +01:00
375edd1370 file forgotten in last commit 2018-09-10 17:37:29 +01:00
6d912f6c67 Hadrons: general guesser factory 2018-09-10 17:36:54 +01:00
6d1d28955e Guesser class is redundant, switching to LinearFunction 2018-09-10 17:35:54 +01:00
920b471761 Hadrons tests update 2018-09-10 15:32:13 +01:00
63c21767ba Hadrons: grids stored with hash of SIMD type (for mixed-precision setups) 2018-09-10 15:31:39 +01:00
7b6b712565 function to convert std::vector to string 2018-09-10 15:17:32 +01:00
35abd05ee9 mute Version.h cache creation 2018-09-10 15:16:59 +01:00
dd36e60f6a compilation fix for hypercube optimal communicator 2018-09-08 18:07:29 +01:00
cb6c548e21 Hadrons: code cleaning 2018-09-07 20:40:55 +01:00
02c4ccf621 Hadrons: diskvector debug message for writes 2018-09-07 20:33:49 +01:00
fd24588212 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-09-07 20:25:11 +01:00
b800bb3ecb Hadrons: disk vector cache policy to last touch 2018-09-07 20:24:48 +01:00
f8abd0978b Hadrons copyright update 2018-09-07 20:10:07 +01:00
12c7c493bf Hadrons: disk-based container 2018-09-07 20:04:54 +01:00
paboyle
c7c9072313 Documentation 2018-09-06 16:01:42 +01:00
2bf3be5fae Hadrons: copyright and code cleaning 2018-09-04 18:25:10 +01:00
3a40e4fc69 Hadrons: scalar SU(N) 2-pt guard against negative momenta components 2018-09-04 18:24:07 +01:00
2e69e03f6f Hadrons: CosmHol configs IO module 2018-09-04 18:23:28 +01:00
a09f9bb528 Hadrons: code cleaning 2018-09-04 18:22:21 +01:00
f0e341d726 Hadrons: module list generator fix 2018-09-04 18:22:04 +01:00
6f09df0daf Hadrons: A2A matrix IO fix 2018-09-02 01:46:22 +01:00
26cee605b8 Hadrons: copyright update 2018-09-01 21:30:30 +01:00
b3fa18c229 copyright script never removes authorship 2018-09-01 21:29:58 +01:00
2940c9bcfd Hadrons: dedicated IO class for A2A matrices 2018-09-01 21:09:01 +01:00
0bb532f72b more explicit clean git tree message 2018-09-01 20:02:18 +01:00
fada2aa0f7 Hadrons: precision fix 2018-09-01 20:00:12 +01:00
c193e4e675 Aslash expression in Mathematica notebook 2018-09-01 19:59:58 +01:00
3ee682f676 more Version.h fine tuning 2018-09-01 19:58:16 +01:00
d85ec3bac2 build system minor fix 2018-09-01 19:54:21 +01:00
b52d8eb1e3 better Version.h implementation 2018-09-01 19:49:13 +01:00
ee630d2e8b Hadrons: smearing plaquette output 2018-09-01 17:38:32 +01:00
2f0af79869 Hadrons: scalar SU(N) NPR update 2018-09-01 17:36:35 +01:00
1b7fb79ec0 CI fix 2018-08-28 18:26:37 +01:00
2db1a4628c build system minor fix 2018-08-28 18:26:30 +01:00
6aa047d842 Hadrons module template fix 2018-08-28 17:17:00 +01:00
8779c32ae1 Merge branch 'feature/hadrons' into develop 2018-08-28 17:10:33 +01:00
c527dc3358 CI fix 2018-08-28 17:10:08 +01:00
6b42577b6b gitignore update 2018-08-28 16:58:37 +01:00
fb3596f968 Hadrons: precision fixes 2018-08-28 16:58:23 +01:00
f3a0158213 code cleaning 2018-08-28 16:56:07 +01:00
0250aa9347 file committed in error 2018-08-28 16:55:48 +01:00
3df6743396 more build system cleaning and patch for bad include in Eigen 2018-08-28 16:54:57 +01:00
fb7d021b9d Hadrons: moving Hadrons to root directory, build system improvements 2018-08-28 15:00:40 +01:00
5f206df775 Hadrons: meson field cache friendly cache copy 2018-08-15 17:29:44 +01:00
7727e81113 Hadrons: slight improvement on previous commit 2018-08-14 20:18:47 +01:00
c4115544a5 Hadrons: application option to save graph 2018-08-14 20:03:53 +01:00
08c47328ba Hadrons: meson field kernel performance for each block 2018-08-14 17:35:42 +01:00
09001aedca Hadrons: meson fields saved in single precision 2018-08-14 17:19:38 +01:00
2c67304716 Hadrons: meson field code cleaning 2018-08-14 17:00:05 +01:00
dc6d8686de Hadrons: meson field chunked HDF5 IO 2018-08-14 16:40:29 +01:00
cc2780bea3 Hadrons: meson field parallel IO 2018-08-14 14:55:13 +01:00
6e5a2b7922 fix previous commit 2018-08-14 14:07:54 +01:00
f4878d3a13 Hadrons: meson field threaded cache copy 2018-08-14 14:02:37 +01:00
89d2fac92e Hadrons: copyright update 2018-08-14 12:19:14 +01:00
f2d3e41cf2 Hadrons: meson field: HDF5 perf, gamma input and Eigen tensors allocated by Grid 2018-08-13 20:18:33 +01:00
3c27bb36d4 Hadrons: direct timer access 2018-08-13 20:17:45 +01:00
603d59f389 Hadrons: code cleaning 2018-08-13 20:17:24 +01:00
07a0ef3f95 Hadrons: global measurement time profile 2018-08-13 16:44:57 +01:00
503259f9c9 Hadrons: meson field HDF5 IO done and tested 2018-08-12 16:52:12 +01:00
5be6a51044 Hadrons: meson fields code cleaning and momentum phases 2018-08-11 15:13:43 +01:00
ac69f042b1 Hadrons: module RNG uniquely seeded with <run id> + <module name> + <trajectory> 2018-08-10 18:27:00 +01:00
133d5c2e34 Merge branch 'develop' into feature/hadrons 2018-08-10 16:36:40 +01:00
2a94244890 configure: --with-openssl option and LIME is now mandatory 2018-08-10 16:36:11 +01:00
a15a2dfd29 Merge branch 'develop' into feature/hadrons 2018-08-10 16:08:22 +01:00
093bb02633 Hadrons: execute message for time diluted noise 2018-08-10 16:07:48 +01:00
99a85116f8 Hadrons: module and VM instrumentation 2018-08-10 16:07:30 +01:00
paboyle
27cdb79063 Sha used to seed from a unique string 2018-08-10 15:11:01 +01:00
f4cbfd63ff Hadrons: more meson field cleaning, needs IO now 2018-08-09 18:39:58 +01:00
2b794b6aa7 Hadrons: module generating random lattices for testing purposes 2018-08-09 17:16:42 +01:00
d0244a059f Hadrons: cleaning cleaning... 2018-08-09 00:38:17 +01:00
dcdd891d7d Hadrons: precision fix 2018-08-09 00:13:53 +01:00
6d2df9de79 Hadrons: even more cleaning 2018-08-08 23:15:55 +01:00
41d4e37bae Hadrons: more cleaning 2018-08-08 19:04:44 +01:00
ee5c0cc9b6 Hadrons: code cleaning 2018-08-08 18:45:06 +01:00
0a4020eb4d Hadrons: copyright fix 2018-08-07 18:42:52 +01:00
b2de26589b Hadrons: code cleaning and copyright update 2018-08-07 18:40:48 +01:00
0677adb4dd Hadrons: overhaul of A2A for production 2018-08-07 18:27:59 +01:00
231cc95be6 Hadrons: eigenvalues precision fix 2018-08-07 18:27:19 +01:00
639f9cab82 Hadrons: schedule loading fix 2018-08-07 18:26:49 +01:00
4eac4e575e Hadrons: meson fields indentation fix 2018-08-06 12:42:25 +01:00
3f0f92cda6 Hadrons: first cleaning/integration of A2A/meson fields 2018-08-06 12:11:52 +01:00
d2650e89bd Hadrons: VM exception for object type (solves infinite loop in scheduler) 2018-08-06 12:11:00 +01:00
2962123cba Hadrons: diluted noise polish 2018-08-05 01:44:37 +01:00
830168ec37 Hadrons: first try at diluted noise class (tested) 2018-08-04 12:32:58 +01:00
584c921ca0 Eigen support fix (use of Grid as a library was broken) 2018-08-03 21:07:58 +01:00
81347b4d16 gitignore update 2018-08-03 19:58:52 +01:00
2cfa0b0e6b Merge pull request #174 from fionnoh/a2a_basics
A2A basics
2018-08-03 16:32:14 +01:00
fionnoh
fa5dee76b1 Included Peter's A2AMeson field and Eigen changes 2018-08-03 15:15:54 +01:00
fionnoh
8d1679c6b8 Merge branch 'feature/hadrons-a2a' of https://github.com/paboyle/Grid into a2a_basics 2018-08-03 15:12:24 +01:00
Peter Boyle
3791a38f7c Optimised the MesonField a bit more 2018-08-01 08:27:27 +01:00
Peter Boyle
142f7b0c86 Updated the A2A Meson Field module 2018-07-31 15:58:02 +01:00
fionnoh
891ad66eab Included changes to Hadrons RBPrecCG solver needed for subtraction of guess 2018-07-31 11:26:07 +01:00
Peter Boyle
60c43151c5 Merge branch 'feature/hadrons-a2a' of https://github.com/paboyle/Grid into feature/hadrons-a2a 2018-07-31 01:09:02 +01:00
paboyle
e036800261 Eigen fix 2018-07-31 01:08:42 +01:00
Peter Boyle
62900def36 Merge branch 'feature/hadrons-a2a' of https://github.com/paboyle/Grid into feature/hadrons-a2a 2018-07-31 00:36:26 +01:00
paboyle
e3a309a73f Eigen happiness 2018-07-31 00:35:17 +01:00
fionnoh
ad6c1c0c4e The basics of what is needed in Grid and Hadrons for the A2A class and module, with none of the contraction or MF code. 2018-07-30 18:40:50 +01:00
Peter Boyle
00b92a91b5 Optimising 2018-07-28 23:46:22 +01:00
paboyle
65533741f7 7 moms 2018-07-28 16:17:47 +01:00
Peter Boyle
dc0259fbda Merge pull request #173 from fionnoh/feature/hadrons-a2a
Changes to meson field benchmark. Now includes the gammas in the fina…
2018-07-27 23:03:56 +01:00
Peter Boyle
131a6785d4 Merge branch 'feature/hadrons-a2a' into feature/hadrons-a2a 2018-07-27 23:03:42 +01:00
paboyle
44f4f5c8e2 Momentum loop 2018-07-27 23:00:16 +01:00
fionnoh
2679df034f Changes to meson field benchmark. Now includes the gammas in the final part of the naive method, both methods compute
lhs^dag*Gamma*rhs (previously Gamma*lhs^dag*rhs), and checks results.
2018-07-27 18:31:10 +01:00
bf71162b97 Hadrons: backtrace on abort 2018-07-26 19:20:12 +01:00
299e828d83 Merge branch 'develop' into feature/hadrons 2018-07-26 16:49:49 +01:00
ef5452cddf Hadrons: smarter memory profiler 2018-07-26 16:47:45 +01:00
80de748737 Hadrons: new exceptions which can save a integer 2018-07-26 16:47:25 +01:00
paboyle
71e1006ba8 Updated meson field benchmark for dirac structures 2018-07-26 09:09:29 +01:00
00f31ae83f Merge pull request #163 from goracle/unstaged
Add printing of whether there are unstaged changes in the git hash print
2018-07-25 19:00:00 +00:00
cce339deaf Merge pull request #172 from fionnoh/feature/hadrons
feature/hadrons -> feature/hadrons-a2a
2018-07-25 17:20:19 +00:00
fionnoh
24128ff109 Changes needed for MF benchmark to work with comms correctly 2018-07-23 15:51:37 +01:00
fionnoh
34e9d3f0ca Moved the creation and resizing of the v and w high modes from the A2A class to the A2A module and made them an output of the module. This means that they have to be inputs of the contration modules and they will freed from memory if they are no longer needed. 2018-07-22 14:40:31 +01:00
fionnoh
c995788259 Added ImportUnphysicalFermion and included appropriate logic for 5d w vectors in A2A code 2018-07-21 00:08:11 +01:00
fionnoh
94c7198001 Added ZFIMPL to A2AMeson contraction 2018-07-20 23:08:22 +01:00
fionnoh
04d86fe9f3 Removed overly verbose print statement 2018-07-20 21:38:19 +01:00
fionnoh
b78074b6a0 Removed a Dminus from high mode v and removed duplication pf D_oo code 2018-07-20 16:55:24 +01:00
fionnoh
7dfd3cdae8 Inclusion of ExportPhysicalFermionSource that fixes a bug in the low mode w vectors 2018-07-20 15:45:43 +01:00
fionnoh
cecee1ef2c Merge branch 'develop' of github.com:paboyle/Grid into feature/hadrons 2018-07-20 13:37:50 +01:00
fionnoh
355d4b58be Merge branch 'feature/hadrons' of github.com:fionnoh/Grid into feature/hadrons 2018-07-19 16:07:54 +01:00
fionnoh
2c54a536f3 Moved the meson field inner product to its own header file 2018-07-19 15:56:52 +01:00
fionnoh
d868a45120 Cleaned up some stuff that was erroneously included in a previous "trash" commit. Leaving in the mySliceInnerProdct function for now as it speeds up mesonfield creation quite a lot for 24^3 tests 2018-07-16 16:19:59 +01:00
fionnoh
9deae8c962 A2A meson field contraction code 2018-07-16 14:18:45 +01:00
fionnoh
db86cdd7bd Possible trash commit 2018-07-10 13:30:45 +01:00
paboyle
ec9939c1ba Test for faster implementation of meson field inner loop
This should be possible to cache block at outer levels, global sum across nodes not performed
and deferred to caller to block them all into a big all reduce.
Nc=3 and Fermion is hard coded in an ugly way. We might think about benchmarking whether
a product without the conjugate should be made available by Grid.

It is not clear whether the explicit unroll, or the performing of conjugate on left once
was the real source of the speed up.

Gives 70-80 GF/s on my laptop (single) half that double, and 70GB/s to cache.

This is competitive with dslash and a reasonable stopping point for the optimisation. If necessary we can revisit.
2018-07-10 12:38:51 +01:00
fionnoh
f74617c124 Added ZFIMPL to meson field module 2018-07-03 14:04:53 +01:00
fionnoh
8c6a3921ed Merge remote-tracking branch 'upstream/feature/hadrons' into feature/hadrons 2018-07-03 11:35:14 +01:00
a8a15dd9d0 Hadrons: code cleaning 2018-07-02 17:52:39 +01:00
3ce68a751a Hadrons: stout smearing module 2018-07-02 17:52:04 +01:00
fionnoh
daa0977d01 Included a print statement that indicates that the guess is being subtracted from the solve. 2018-06-28 16:34:56 +01:00
fionnoh
a2929f4384 Removed A2A contraction module and replaced it with the beginnings of a meson field module 2018-06-28 16:17:26 +01:00
fionnoh
7fe3974c0a Included eigenPacks and action as references, not inputs, of A2A module. They now now longer need to be parameters in the meson field modules. 2018-06-28 16:14:49 +01:00
fionnoh
f7e86f81a0 Changes A2A class to make use of the new Solver class 2018-06-28 16:14:16 +01:00
fionnoh
fecec803d9 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2018-06-28 16:13:43 +01:00
fionnoh
8fe9a13cdd Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2018-06-28 16:13:07 +01:00
d2c42e6f42 Hadrons: scaled DWF action 2018-06-26 14:59:33 +01:00
Daniel Richtmann
2881b3e8e5 WilsonMG: Remove unnecessary static assertions 2018-06-26 14:42:30 +02:00
049cc518f4 Hadrons: introduction message 2 2018-06-25 19:08:39 +01:00
2e1c66897f Hadrons: introduction message 2018-06-25 19:08:22 +01:00
adcef36189 Hadrons: Möbius DWF action 2018-06-25 15:58:35 +01:00
fionnoh
2f121c41c9 Commiting reation of meson field code before a merge with the upstream branch feature/hadrons 2018-06-25 12:20:46 +01:00
e0ed7e300f Hadrons: spurious Dminus removed 2018-06-22 16:33:43 +02:00
485207901b Merge branch 'develop' into feature/hadrons 2018-06-22 16:15:32 +02:00
c760f0a4c3 Hadrons: remove make_5D/4D functions and FreeProp fix 2018-06-22 16:12:46 +02:00
c84eeedec3 Hadrons: GaugeProp module for z-Wilson actions 2018-06-22 15:53:22 +02:00
fionnoh
1ac3526f33 Small changes to the A2A header and module 2018-06-22 12:29:42 +01:00
fionnoh
0de090ee74 Temporarily added in the contraction code that produced the working 2-pt function. This is commited for reference only and will be removed in the next push. 2018-06-22 12:28:41 +01:00
91405de3f7 Hadrons: new solver exposing fermion matrix and generic source/solve import/export 2018-06-22 12:14:37 +02:00
fionnoh
8fccda301a Fixed a bug where the guess was always subtracted after the solve and included appropriate weights for the sources in the one case we're looking at now. More work needs to be done to make the 5d/4d source logic less brittle. 2018-06-21 16:36:59 +01:00
fionnoh
7a0abfac89 Restructured the class that computes and returns the A2A vectors. 2018-06-21 16:36:06 +01:00
fionnoh
ae37fda699 A more elegant way to subtract guesses from solve and a bool check before verifying residual 2018-06-20 16:07:40 +01:00
fionnoh
b5fc5e2030 All to all module update that hit a promising milestone. Commiting for a reference for future changes. 2018-06-20 10:59:07 +01:00
Daniel Richtmann
cc5d025ea4 WilsonMG: Adapt staggered GMRES/MR tests to "new" constructor 2018-06-18 16:20:20 +02:00
Daniel Richtmann
ddcb53bce2 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-06-13 09:50:37 +02:00
Daniel Richtmann
d1c80e1d46 WilsonMG: Correct years in copyright line 2018-06-13 09:44:09 +02:00
Daniel Richtmann
c73cc7d354 WilsonMG: Add tests with MG preconditioner running single precision, outer solver running in double 2018-06-12 16:10:48 +02:00
Daniel Richtmann
49fdc324a0 WilsonMG: Make MG correctness checks abort on failing tests 2018-06-12 16:10:48 +02:00
Daniel Richtmann
f32714a2d1 WilsonMG: Make running MG correctness checks optional via commandline 2018-06-12 16:10:48 +02:00
Daniel Richtmann
73a955be20 WilsonMG: Move tests for Wilson & WilsonClover into separate files 2018-06-12 16:10:48 +02:00
Daniel Richtmann
66b7a0f871 WilsonMG: Move multigrid class to separate file 2018-06-12 16:10:48 +02:00
Daniel Richtmann
2ab9d4bc56 WilsonMG: Fix random behavior in GMRES
From time to time I saw random since the basis vectors were not initialized
properly.
2018-06-12 15:01:31 +02:00
Daniel Richtmann
4f41cd114d WilsonMG: Add a mixed precision version of FGMRES
This version does everything in double prec but accepts a preconditioner working
in single precision.
2018-06-12 15:01:31 +02:00
Daniel Richtmann
11c4f5e32c WilsonMG: Provide command line switch for reading in input xml + move default params to constructor of MultiGridParams 2018-06-12 15:01:31 +02:00
Daniel Richtmann
e9b9550298 WilsonMG: Fix incompatibility with single prec MG in construction of simd layout on coarser grids 2018-06-12 15:01:31 +02:00
Daniel Richtmann
7564fedf68 WilsonMG: Set subspace to zero to avoid random behavior 2018-06-12 15:01:31 +02:00
8db0ef9736 Merge pull request #168 from jch1g10/feature/qed-fvol
Feature/qed fvol
2018-06-08 20:09:06 +02:00
Guido Cossu
95d4b46446 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-06-08 11:30:29 +01:00
paboyle
5dfd216a34 Better thread safety 2018-06-04 21:08:44 +01:00
paboyle
c2e8d0aa88 Solve g++ problem on the lanczos test 2018-06-04 18:34:15 +01:00
James Harrison
0fe5aeffbb Merge branch 'feature/hadrons' into feature/qed-fvol 2018-06-04 16:59:43 +01:00
James Harrison
7fbc469046 Merge branch 'develop' into feature/hadrons 2018-06-04 16:58:30 +01:00
paboyle
bf96a4bdbf Merge branch 'master' into develop 2018-06-04 14:03:11 +01:00
paboyle
84685c9bc3 Overflow fix 2018-06-04 13:42:07 +01:00
fionnoh
a8d4156997 Added a Hadrons module that computes the all-to-all v and w vectors 2018-05-31 17:18:58 +01:00
fionnoh
c18074869b Changes to Hadrons SchurRB solver to allow for a subtract_guess boolean to be passed 2018-05-31 17:17:16 +01:00
fionnoh
f4c6d39238 CHanges made to SchurRB solvers to allow for the subtraction of a guess after solve 2018-05-31 17:16:20 +01:00
200d35b38a Merge branch 'develop' into feature/hadrons 2018-05-28 11:52:47 +02:00
eb52e84d09 Merge branch 'feature/hadrons' of github.com:paboyle/Grid into feature/hadrons 2018-05-28 11:50:27 +02:00
72abc34764 Merge pull request #166 from guelpers/feature/hadrons
Feature/hadrons
2018-05-28 11:49:46 +02:00
e3164d4c7b Hadrons: env function to get volume in double 2018-05-28 11:39:17 +02:00
James Harrison
f5db386c55 Change MODULE_REGISTER_NS -> MODULE_REGISTER in UnitEM, ScalarVP and VPCounterTerms 2018-05-22 16:16:21 +01:00
James Harrison
294ee70a7a Merge branch 'feature/hadrons' into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/modules.inc
#	lib/qcd/action/gauge/Photon.h
2018-05-21 18:02:41 +01:00
Azusa Yamaguchi
013ea4e8d1 Merge branch 'feature/staggered-comms-compute' into develop 2018-05-21 13:11:56 +01:00
Azusa Yamaguchi
7fbbb31a50 Merge branch 'develop' into feature/staggered-comms-compute
Conflicts:
	lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
2018-05-21 13:07:29 +01:00
Azusa Yamaguchi
0e127b1fc7 New file single prec test 2018-05-21 12:57:13 +01:00
Azusa Yamaguchi
68c028b0a6 Comment 2018-05-21 12:54:25 +01:00
255d4992e1 Hadrons: stochastic scalar SU(N) free field fix 2018-05-18 20:49:55 +01:00
a0d399e5ce Hadrons: yet other attempts at EMT NPR 2018-05-18 20:49:26 +01:00
fd3b2e945a Hadrons: don't right result with empty stem 2018-05-18 20:48:24 +01:00
Daniel Richtmann
6c27c72585 WilsonMG: Provide more sensible default values for MG parameters 2018-05-16 17:26:09 +02:00
Daniel Richtmann
9c003d2d72 WilsonMG: Base wilson mg preconditioner entirely on existing infrastructure 2018-05-16 17:26:09 +02:00
Daniel Richtmann
4b8710970c WilsonMG: Switch to Galerkin coarsening in CoarsenedMatrix 2018-05-16 17:26:09 +02:00
Daniel Richtmann
68d686ec38 WilsonMG: Add functionality for applying G5 on coarse grids 2018-05-16 16:17:14 +02:00
Daniel Richtmann
c48b69ca81 WilsonMG: Implement Mdir & Mdiag in CoarsenedMatrix 2018-05-16 16:08:05 +02:00
Daniel Richtmann
df8c208f5c WilsonMG: Revert CoarsenedMatrix.h and Lattice_transfer.h back to state of develop branch 2018-05-16 16:02:54 +02:00
Daniel Richtmann
61812ab7f1 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-05-15 14:57:18 +02:00
b999984501 Merge branch 'develop' into feature/hadrons 2018-05-15 13:53:57 +01:00
Guido Cossu
7836cc2d74 No checksum output on log for scidac 2018-05-15 10:10:08 +01:00
a61e0df54b Travis fix for Lime 2018-05-14 19:56:12 +01:00
9d835afa35 Attempt at solving the FP exception in the QED code 2018-05-14 19:05:54 +01:00
5e3be47117 Hadrons: scalar SU(N) various fixes 2018-05-14 18:58:39 +01:00
48de706dd5 Merge branch 'develop' into feature/hadrons 2018-05-11 18:06:40 +01:00
f871fb0c6d check file is opened correctly in the Lime reader 2018-05-11 18:06:28 +01:00
93771f3099 Hadrons: scalar SU(N) stochastic free field 2018-05-10 22:29:48 +01:00
8cb205725b Merge branch 'develop' into feature/hadrons 2018-05-09 23:56:35 +01:00
9ad580d82f Hadrons: format fix 2018-05-07 21:38:15 +01:00
899f961d0d Hadrons: eigenvalue metadata saved with 16 significant digits 2018-05-07 21:37:03 +01:00
54d789204f more general implementation of the precision interface for serialisers 2018-05-07 21:17:46 +01:00
25828746f3 XML precision scientific with 16 digits by default 2018-05-07 21:04:31 +01:00
f362c00739 Hadrons: better handling of automatic directory creation 2018-05-07 19:43:40 +01:00
Guido Cossu
25d1cadd3b Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-05-07 18:55:09 +01:00
Guido Cossu
c24d53bbd1 Further debug of RNG I/O 2018-05-07 18:55:05 +01:00
2017e4e3b4 Hadrons: more verbose directory creation error 2018-05-07 18:12:22 +01:00
27a4d4c951 Hadrons: multi-file eigenpack in separate directory 2018-05-07 17:52:54 +01:00
2f92721249 Merge branch 'develop' into feature/hadrons 2018-05-07 17:26:47 +01:00
3c7a4106ed Trap for deadly empty comm thread option 2018-05-07 17:26:39 +01:00
3252059daf Hadrons: multi-file support for eigenpacks 2018-05-07 17:25:36 +01:00
paboyle
6eed167f0c Merge branch 'release/0.8.1' 2018-05-04 17:34:11 +01:00
paboyle
4ad0df6fde Bump volume for Gerardo 2018-05-04 17:33:23 +01:00
661381e881 Merge branch 'develop' into feature/hadrons 2018-05-04 14:52:17 +01:00
Peter Boyle
68a5079f33 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-05-04 14:13:54 +01:00
Peter Boyle
8634e19f1b Update 2018-05-04 14:13:35 +01:00
Azusa Yamaguchi
9ada378e38 Add timing 2018-05-04 10:58:01 +01:00
Vera Guelpers
9d9692d439 Fix double vs float in boundary phases 2018-05-03 16:40:16 +01:00
0659ae4014 Merge branch 'develop' into feature/hadrons 2018-05-03 16:20:22 +01:00
bfbf2f1fa0 no threaded stencil benchmark if OpenMP is not supported 2018-05-03 16:20:01 +01:00
dd6b796a01 Hadrons: scalar SU(N) volume factor fix 2018-05-03 16:19:17 +01:00
Vera Guelpers
52a856b4a8 FreeProp module for Hadrons 2018-05-03 12:33:20 +01:00
Vera Guelpers
04190ee7f3 5D free propagator for DWF and boundary conditions for free propagators 2018-05-03 12:31:36 +01:00
Azusa Yamaguchi
587bfcc0f4 Add Timing 2018-05-03 12:10:31 +01:00
Vera Guelpers
2700992ef5 Merge remote-tracking branch 'upstream/feature/hadrons' into feature/hadrons 2018-05-03 10:01:52 +01:00
Peter Boyle
8c658de179 Compressor speed up (a little); streaming stores 2018-05-02 17:52:16 +01:00
Guido Cossu
ba37d51ee9 Debugging the RNG IO 2018-05-02 15:32:06 +01:00
Azusa Yamaguchi
4f4181c54a Merge branch 'feature/staggered-comms-compute' of https://github.com/paboyle/Grid into feature/staggered-comms-compute 2018-05-02 14:59:13 +01:00
Guido Cossu
4d4ac2517b Adding Scalar field theory example for Scidac format 2018-05-02 14:36:32 +01:00
Guido Cossu
e568c24d1d Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-05-02 14:29:25 +01:00
Guido Cossu
b458326744 Checkpointer module update 2018-05-02 14:29:22 +01:00
Guido Cossu
6e7d5e2243 HMC: added Scidac checkpointer and support for metadata 2018-05-02 14:28:59 +01:00
Azusa Yamaguchi
b35169f1dd MultiShift for Staggered 2018-05-02 14:22:37 +01:00
Azusa Yamaguchi
441ad7498d add Iterative counter 2018-05-02 14:21:30 +01:00
Peter Boyle
6f6c5c549a Split off gparity 2018-05-02 14:11:23 +01:00
Peter Boyle
1584e17b54 Revert to fast versoin 2018-05-02 14:10:55 +01:00
Peter Boyle
12982a4455 Hypercube optimisation 2018-05-02 14:10:21 +01:00
Peter Boyle
172f412102 shmget reintroduce 2018-05-02 14:07:41 +01:00
Peter Boyle
a64497265d TIming 2018-05-02 14:07:28 +01:00
ca639c195f Merge branch 'develop' into feature/hadrons 2018-05-01 14:07:32 +01:00
edc28dcfbf Hadrons: scalar SU(N) 2-pt fix 2018-05-01 14:02:31 +01:00
Peter Boyle
c45f24a1b5 Improvements for tesseract 2018-04-30 21:50:00 +01:00
Dr Peter Boyle
aaf37ee4d7 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-27 11:45:13 +01:00
Dr Peter Boyle
1dddd17e3c Benchmark improvements from tesseract 2018-04-27 11:44:46 +01:00
paboyle
661f1d3e8e Merge branch 'release/0.8.0' into develop 2018-04-27 11:22:33 +01:00
paboyle
edcf9b9293 Merge branch 'release/0.8.0' 2018-04-27 11:13:19 +01:00
49b8501fd4 Merge branch 'develop' into feature/hadrons 2018-04-26 17:33:50 +01:00
d47484717e Hadrons: scalar SU(N) result handling improvement 2018-04-26 17:32:37 +01:00
Azusa Yamaguchi
96272f3841 Merge staggered fix linear operator and reduction 2018-04-26 10:33:19 +01:00
Azusa Yamaguchi
5c936d88a0 Merge branch 'feature/staggered-comms-compute' of https://github.com/paboyle/Grid into feature/staggered-comms-compute 2018-04-26 10:18:37 +01:00
Azusa Yamaguchi
1c64ee926e Faster staggered operator with m^2 term trivial used 2018-04-26 10:17:49 +01:00
Azusa Yamaguchi
2cbb72a81c Provide info if EE term is trivial (m^2 factor)
Better timing in staggered 4d case
2018-04-26 10:10:07 +01:00
Azusa Yamaguchi
31d83ee046 Enable special treatment of constEE cases 2018-04-26 10:08:46 +01:00
Azusa Yamaguchi
a9e8758a01 Improvements to staggered tests timings 2018-04-26 10:08:05 +01:00
Azusa Yamaguchi
3e125c5b61 Faster linalg on CG optimised against staggered
Sum overhead is bigger for staggered
2018-04-26 10:07:19 +01:00
Azusa Yamaguchi
eac6ec4b5e Faster reductions, important on single node staggered 2018-04-26 10:03:57 +01:00
Azusa Yamaguchi
213f8db6a2 Microsecond resultion 2018-04-26 10:01:39 +01:00
cc6eb51e3e Hadrons: macro refactoring for library portability 2018-04-25 16:49:14 +01:00
Vera Guelpers
507009089b Merge remote-tracking branch 'upstream/feature/hadrons' into feature/hadrons 2018-04-25 09:36:39 +01:00
b234784c8e Hadrons: scalar SU(N) takes operator pairs now 2018-04-24 19:52:12 +01:00
6ea2a8b7ca Hadrons: scheduler shows starting value 2018-04-24 19:51:47 +01:00
c1d0359aaa Hadrons: scalar SU(N) kinetic term saves trace 2018-04-24 19:51:22 +01:00
047ee4ad0b Hadrons: scalar SU(N) cleanup 2018-04-24 19:50:58 +01:00
a13106da0c Hadrons: scalar SU(N) gradient 2018-04-24 19:50:30 +01:00
75113e6523 Hadrons: Scalar SU(N) variable name update 2018-04-24 19:49:27 +01:00
325c73d051 Hadrons: module template update 2018-04-24 19:48:54 +01:00
b25a59e95e Hadrons: mitigation of GCC/Intel compiler bug not generating defaulted destructors 2018-04-24 17:20:25 +01:00
7c4533797f Hadrons: scalar SU(N) EMT improvement term optional 2018-04-23 22:46:39 +01:00
af84fd65bb Hadrons: missing dependency message improvement 2018-04-23 22:46:17 +01:00
Dan H
1a2613086a Fix print message. 2018-04-23 15:42:12 -04:00
Dan H
4f110c09a5 Add printing of whether there are unstaged changes in the git hash print. 2018-04-23 15:38:23 -04:00
6764362237 Hadrons: automatic directory creation fix 2018-04-23 18:45:39 +01:00
2fa2b0e0b1 Hadrons: Application header does not include all the modules 2018-04-23 17:57:17 +01:00
b61292f735 Hadrons: recursive mkdir function 2018-04-23 17:36:43 +01:00
ce7720e221 Hadrons: copyright update 2018-04-23 17:36:20 +01:00
853a5528dc Hadrons: template modules compilation optimisation 2018-04-23 17:35:01 +01:00
169f405c9c Hadrons: tests repaired 2018-04-23 12:48:34 +01:00
c6125b01ce Hadrons: Error and Warning channels always on 2018-04-23 12:48:17 +01:00
b0b5b34bff Hadrons: custom abort with module trace 2018-04-23 12:48:00 +01:00
1c9722357d Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/qcd/action/fermion/FermionOperator.h
2018-04-20 17:15:21 +01:00
334da7f452 Hadrons: can trace which module is throwing an error 2018-04-13 18:45:31 +02:00
4669ecd4ba Hadrons: build improvement 2018-04-13 18:21:18 +02:00
4573b34cac Hadrons: scalar SU(N) 2-pt functions with momentum 2018-04-13 18:21:00 +02:00
17f57e85d1 Merge branch 'develop' into feature/hadrons 2018-04-06 22:53:11 +01:00
17f27b1ebd Hadrons: eigenpack writer fix 2018-04-06 22:52:11 +01:00
a16bbecb8a Hadrons: more feedback 2018-04-06 19:38:20 +01:00
7c9b0dd842 Hadrons: top level name for eigenpack metadata 2018-04-06 19:32:22 +01:00
6b7228b3e6 Hadrons: better metadata for eigenpack 2018-04-06 19:29:53 +01:00
f117552334 post-merge fix 2018-04-06 18:38:46 +01:00
a21a160029 Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/serialisation/XmlIO.cc
2018-04-06 18:34:19 +01:00
6b8ffbe735 Hadrons: genetic minimum value type fix 2018-04-06 15:41:31 +01:00
81050535a5 Hadrons: truncate eigenvalues when loading partial eigenpack 2018-04-06 13:48:58 +01:00
7dcf5c90e3 Hadrons: eigenpack must be referred by solver when used 2018-04-06 13:16:28 +01:00
9ce00f26f9 not special characters in std::vector operator<< 2018-04-04 17:44:56 +01:00
85c253ed4a Test_serialisation MPI fix 2018-04-04 17:19:34 +01:00
ccfc0a5a89 Hadrons: better string representation of module parameters 2018-04-04 17:19:22 +01:00
d3f857b1c9 Hadrons: proper metadata for eigenpacks 2018-04-04 16:36:37 +01:00
fb62035aa0 Hadrons: do not create RB coarse grids 2018-04-03 19:49:11 +01:00
0260bc7705 Hadrons: eigen pack writing only for boss node 2018-04-03 18:55:46 +01:00
68e6a58f12 Hadrons: several Lanczos fixes and improvements 2018-04-03 17:42:21 +01:00
Daniel Richtmann
73ced656eb Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-04-03 17:51:11 +02:00
Daniel Richtmann
f69008edf1 WilsonMG: Add functionality to report timings to MG preconditioner 2018-04-03 17:26:49 +02:00
Daniel Richtmann
57a49ed22f WilsonMG: Read in MG parameters from xml in test 2018-04-03 16:03:11 +02:00
Daniel Richtmann
ff6413a764 WilsonMG: Make number of levels chooseable at runtime
I don't like this solution though :(
2018-04-03 15:57:33 +02:00
Daniel Richtmann
2530bfed01 WilsonMG: Move params instance from global scope to test main function 2018-04-03 14:50:48 +02:00
640515e3d8 Merge branch 'develop' into feature/hadrons 2018-03-30 17:43:49 +01:00
97c579f637 Merge branch 'develop' into feature/hadrons 2018-03-30 16:04:44 +01:00
Daniel Richtmann
74f79c5ac7 Revert "Add function to return full type as std::string"
This reverts commit 1cb745c8dc.
2018-03-29 12:03:50 +02:00
Daniel Richtmann
58c30c0cb1 WilsonMG: Add conformability checks in MG preconditioner 2018-03-28 13:24:39 +02:00
Daniel Richtmann
917a92118a WilsonMG: Move operator test to MG testing routine 2018-03-28 12:19:25 +02:00
a4d8512fb8 Revert "Lattice serialisation, just HDF5 for the moment"
This reverts commit 8a0cf0194f.
2018-03-27 17:55:42 +01:00
5ec903044d Serial IO code cleaning for std:: convention 2018-03-27 17:11:50 +01:00
Daniel Richtmann
04f9cf088d WilsonMG: Add more parameters to MultiGridParams struct 2018-03-27 17:13:11 +02:00
Daniel Richtmann
99107038f9 WilsonMG: Rationalize the level counting strategy 2018-03-27 17:06:33 +02:00
8a0cf0194f Lattice serialisation, just HDF5 for the moment 2018-03-26 19:16:16 +01:00
Daniel Richtmann
b78456bdf4 WilsonMG: Get rid of explicit include of GCR header 2018-03-26 15:41:53 +02:00
Daniel Richtmann
08543b6b11 WilsonMG: Provide a switch between V- and K-cycle 2018-03-26 15:37:17 +02:00
Daniel Richtmann
63ba33371f WilsonMG: Some minor refactoring 2018-03-26 15:34:53 +02:00
Daniel Richtmann
683a7d2ddd WilsonMG: Move comment to make clang-format happy 2018-03-26 14:59:40 +02:00
1c680d4b7a Merge branch 'develop' into feature/hadrons 2018-03-26 13:52:44 +01:00
Daniel Richtmann
afdcbf79d1 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-03-23 21:13:50 +01:00
Daniel Richtmann
3c3ec4e267 WilsonMG: Move tests for Wilson & WilsonClover into the same file 2018-03-23 21:12:27 +01:00
Daniel Richtmann
bbe1d5b49e WilsonMG: Temporarily use GMRES in construction of basis vectors
This can go back to CG once Mdag in CoarsenedMatrix works.
2018-03-23 20:02:27 +01:00
Daniel Richtmann
0f6009a29f WilsonMG: Huge refactor into something that could be considered an algorithm 2018-03-23 19:55:43 +01:00
Daniel Richtmann
1cfed3de7c WilsonMG: Add new logger for MG 2018-03-23 19:55:16 +01:00
Daniel Richtmann
edbc0d49d7 WilsonMG: Get rid of explicit GridTypeMappers in CoarsenedMatrix 2018-03-22 16:38:24 +01:00
e9323460c7 Merge branch 'develop' into feature/hadrons 2018-03-22 10:48:37 +00:00
James Harrison
58c2f60b69 Merge branch 'feature/hadrons' into feature/qed-fvol 2018-03-20 20:19:18 +00:00
James Harrison
bfa3a7b3b0 Merge branch 'feature/hadrons' into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/Modules/MGauge/StochEm.cc
#	extras/Hadrons/modules.inc
2018-03-20 20:17:59 +00:00
Guido Cossu
f212b0a963 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2018-03-19 17:57:13 +00:00
Guido Cossu
62702dbcb8 Fixing bug in the Point sink causing NaNs 2018-03-19 17:56:53 +00:00
41d6cab033 Merge branch 'develop' into feature/hadrons 2018-03-19 13:30:21 +00:00
5a31e747c9 Merge commit 'd5ce66f6ab2c44a12def7b6d26df80d6e646b1fb' into feature/hadrons 2018-03-19 13:19:09 +00:00
cbc73a3fd1 Hadrons: CG guesser fix 2018-03-19 13:11:38 +00:00
Daniel Richtmann
ee5cf6c8c5 WilsonMG: Some minor changes to GMRES implementations 2018-03-16 13:10:45 +01:00
d516938707 Hadrons: eigen packs I/O and deflation interface 2018-03-14 14:55:47 +00:00
72344d1418 Hadrons: change default Schur convention to DiagTwo 2018-03-13 17:10:54 +00:00
7ecf6ab38b Merge branch 'develop' into feature/hadrons 2018-03-13 16:11:59 +00:00
2d4d70d3ec Hadrons: LCL fixes 2018-03-13 16:10:36 +00:00
78f8d47528 Hadrons: environment access to derived objects 2018-03-13 16:10:16 +00:00
b85f987b0b Hadrons: error message channel verbose during profiling 2018-03-13 16:09:22 +00:00
f57afe2079 Hadrons: much cleaner eigenpack implementation, to be tested 2018-03-13 13:51:09 +00:00
Vera Guelpers
8462bbfe63 Gamma input for meson contraction with round brackets 2018-03-12 18:02:12 +00:00
229977c955 Hadrons: minor memory fix for ShiftProbe module 2018-03-09 21:56:27 +00:00
e485a07133 Hadrons: garbage collector debug output 2018-03-09 21:56:01 +00:00
70ec2faa98 Hadrons: maximum iteration specified for tests and error if 0 2018-03-09 19:53:55 +00:00
Daniel Richtmann
a66cecc509 WilsonMG: Fix invalid call to MR ctor 2018-03-09 17:34:29 +01:00
Daniel Richtmann
0f6cdf3d4b WilsonMG: Implement missing parts of CoarsenedMatrix 2018-03-09 16:56:16 +01:00
Daniel Richtmann
1e63b73a14 WilsonMG: Some cleanup/formatting 2018-03-09 16:50:19 +01:00
2f849ee252 declaration fix 2018-03-08 23:34:00 +00:00
bb6ed44339 Merge branch 'develop' into feature/hadrons 2018-03-08 23:09:28 +00:00
Azusa Yamaguchi
80302e95a8 MILC Interface 2018-03-08 15:34:03 +00:00
9942723189 Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/serialisation/BaseIO.h
2018-03-07 15:22:16 +00:00
Azusa Yamaguchi
b938202081 Overlapped Comm for Wilson DhopInternal 2018-03-07 14:08:43 +00:00
e79ef469ac Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/serialisation/BaseIO.h
2018-03-06 19:25:51 +00:00
James Harrison
c793947209 Add overloaded Photon constructors, with default parameters for IR improvements and infinite-volume G(x=0). 2018-03-06 16:27:26 +00:00
3e9ee053a1 Merge branch 'develop' into feature/hadrons 2018-03-05 20:01:38 +00:00
dda6c69d5b Hadrons: scalar SU(N) shift probes 2018-03-05 20:00:29 +00:00
cd51b9af99 Torture yourself with namespace lookup 101 2018-03-05 19:58:13 +00:00
f32555dcc5 Merge branch 'develop' into feature/hadrons 2018-03-03 15:31:52 +00:00
e93c883470 Hadrons: basic GraphViz visualisation 2018-03-03 13:42:36 +00:00
fcac5c0772 Hadrons: scalar SU(N) fixes 2018-03-02 19:20:23 +00:00
90f4000935 Hadrons: scheduler debug less verbose 2018-03-02 19:20:01 +00:00
480708b9a0 Hadrons: safer error handling for HadronsXmlRun 2018-03-02 19:19:37 +00:00
c4baf876d4 Hadrons: graph consistency check 2018-03-02 18:40:18 +00:00
2f4dac3531 Hadrons: legal update 2018-03-02 18:10:58 +00:00
3ec6890850 Merge branch 'feature/hadrons' of github.com:paboyle/Grid into feature/hadrons 2018-03-02 17:56:08 +00:00
018801d973 Hadrons: legal update 2018-03-02 17:56:00 +00:00
1d83521daa Hadrons: scalar SU(N) EMT 2018-03-02 17:55:18 +00:00
fc5670c6a4 Merge pull request #151 from guelpers/feature/hadrons
Feature/hadrons
2018-03-02 17:54:43 +00:00
d9c435e282 Hadrons: Scalar SU(N) transverse projection module 2018-03-02 17:35:12 +00:00
614a0e8277 Hadrons: Scalar SU(N) utility functions 2018-03-02 17:34:23 +00:00
Vera Guelpers
aaf39222c3 update my fork and fixed conflicts 2018-03-02 17:08:08 +00:00
550142bd6a Hadrons: more code cleaning 2018-03-02 14:30:45 +00:00
c0a929aef7 Hadrons: code cleaning 2018-03-02 14:29:54 +00:00
37fe944224 Hadrons: scalar kinetic term 2018-03-02 14:14:11 +00:00
Vera Guelpers
315a42843f changes requested for the pull request 2018-03-02 11:47:38 +00:00
83a101db83 Hadrons: more LCL fixes 2018-03-02 11:05:02 +00:00
c4274e1660 Hadrons: LCL cleaning 2018-03-02 10:18:33 +00:00
ba6db55cb0 Hadrons: reverse last commit 2018-03-01 23:30:58 +00:00
e5ea84d531 Hadrons: LCL: orthogonalise coarse evec 2018-03-01 19:33:11 +00:00
15767a1491 Hadrons: LCL fine convergence test 2018-03-01 18:04:08 +00:00
4d2a32ae7a Hadrons: z-Mobius message fix 2018-03-01 18:03:44 +00:00
5b937e3644 Hadrons: VM memory profiling fix 2018-03-01 17:28:38 +00:00
e418b044f7 Hadrons: code cleaning 2018-03-01 12:57:28 +00:00
b8b05f143f Hadrons: Lanczos more conservative type names 2018-03-01 12:53:16 +00:00
6ec42b4b82 LCL: external storage fix 2018-03-01 12:27:29 +00:00
abb7d4d2f5 Hadrons: z-Mobius action 2018-02-27 19:32:19 +00:00
16ebbfff29 Hadrons: Schur convention globally defined through a macro 2018-02-27 18:45:23 +00:00
4828226095 Hadrons: prettier log 2018-02-27 14:43:51 +00:00
8a049f27b8 Hadrons: Lanczos code improvement 2018-02-27 13:46:59 +00:00
43578a3eb4 Hadrons: copyright update 2018-02-26 19:24:19 +00:00
fdbd42e542 Hadrons: first implementation of local coherence Lanczos 2018-02-26 19:22:43 +00:00
e7e4cee4f3 Merge branch 'develop' into feature/hadrons 2018-02-26 15:05:05 +00:00
James Harrison
ec3954ff5f QedFVol: Add input parameter G(x=0) for infinite-volume photon 2018-02-23 14:53:05 +00:00
Azusa Yamaguchi
0f468e2179 OverlappedComm for Staggered 5D and 4D. 2018-02-22 12:50:09 +00:00
James Harrison
8e61286741 Merge branch 'develop' into feature/qed-fvol 2018-02-20 15:33:35 +00:00
James Harrison
69e4ecc1d2 QedFVol: Fix single precision build error 2018-02-14 17:37:18 +00:00
James Harrison
5f483df16b Merge branch 'develop' into feature/qed-fvol 2018-02-14 16:35:04 +00:00
James Harrison
4680a977c3 QedFVol: set infinite-volume photon propagator to 1 at x=0,
so that momentum-spage photon propagator is non-negative.
Need to check whether this is sufficient for all volumes.
2018-02-14 16:30:09 +00:00
Vera Guelpers
de42456171 updated my fork and conflicts fixed 2018-02-14 13:57:56 +00:00
Vera Guelpers
d55212c998 restructure SeqConservedCurrent for DWF to need less memory 2018-02-14 10:45:18 +00:00
Vera Guelpers
c6e1f64573 Test for QED 2018-02-13 09:30:23 +00:00
James Harrison
724cf02d4a QedFVol: Implement infinite-volume photon 2018-02-12 17:18:10 +00:00
Vera Guelpers
49a0ae73eb Insertion of photon field in seqential conserved current 2018-02-12 09:36:08 +00:00
Daniel Richtmann
6ab60c5b70 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-02-08 23:59:07 +01:00
Daniel Richtmann
8c692b7ffd WilsonMG: Comment assertion on hermiticity of coarse operator for now
TODO: Think of a way to not break dwf_hdcr by doing that. It's only an assertion
but it still interferes with it.
2018-02-08 23:55:05 +01:00
Daniel Richtmann
2976132bdd Add first version of multigrid for wilson clover analogous to wilson one
Just like the wilson one, this algorithm

• is currently only a 2-level method since I don't have correct implementations
  for Mdir and Mdiag in CoarsenedMatrix yet (needed for further coarsening)
• needs levelization and refactoring into a proper algorithm
2018-02-08 23:52:10 +01:00
Daniel Richtmann
48177f2f2d Add tests for all MR|GMRES solvers with wilson clover action 2018-02-08 23:52:09 +01:00
Daniel Richtmann
c4ce70a821 WilsonMG: Major cleanup 2018-02-08 23:52:08 +01:00
James Harrison
315f1146cd QedFVol: Fix output of VPCounterTerms module. 2018-02-08 20:40:45 +00:00
Daniel Richtmann
a3e009ba54 Add tests for CAGMRES solvers with staggered action 2018-02-08 17:46:28 +01:00
Daniel Richtmann
eb7cf239d9 Print warning messages in CAGMRES solvers
Currently, the implementation of these algorithms doesn't differ from their non
communication-avoiding versions.
2018-02-08 17:43:47 +01:00
Daniel Richtmann
13ae371ef8 Make solver parameters match in all MR|GMRES solver tests 2018-02-08 17:33:10 +01:00
Daniel Richtmann
9f79a87102 Fix bugs in Flexible GMRES solvers
Somehow I got the left and right-preconditioned versions of GMRES mixed up. As
of now this is right-preconditioned version, which is what we want.
2018-02-08 16:00:31 +01:00
Daniel Richtmann
4ded1ceeb0 Make GMRES solvers perform no more than MaxIterations steps
I noticed that it was possible to overrun this number.
2018-02-08 15:29:44 +01:00
James Harrison
9f202782c5 QedFVol: Change format of scalar VP output files, and save diagrams without charge factors for consistency with ChargedProp module. 2018-02-07 20:31:50 +00:00
Daniel Richtmann
8bc12e0ce1 Remove superfluous comments in MR solver 2018-02-07 18:09:09 +01:00
Daniel Richtmann
cc2f00f827 Remove test for MR solver with dwf action as it doesn't converge 2018-02-07 18:09:08 +01:00
Daniel Richtmann
cd61e2e6d6 Increase max iterations in test of MR solver with staggered action 2018-02-07 18:09:07 +01:00
Daniel Richtmann
323ed1a588 Add an overrelaxation parameter to the MR solver 2018-02-07 18:09:06 +01:00
Daniel Richtmann
68c66d2e4b Remove empty line in output of *Residual* solvers 2018-02-07 18:08:56 +01:00
Daniel Richtmann
1671adfd49 WilsonMG: Add some tests for linear operators 2018-02-07 17:15:22 +01:00
James Harrison
594a262dcc QedFVol: Remove redundant file Communicator_mpi.cc 2018-02-07 11:37:01 +00:00
James Harrison
7f8ca54285 Merge branch 'develop' into feature/qed-fvol 2018-02-07 10:11:00 +00:00
James Harrison
c5b23c367e QedFVol: Fix segmentation fault when multiple propagator modules are used. 2018-02-05 11:46:33 +00:00
Vera Guelpers
b6fe03eb26 BugFix: Now the stochatic EM potential weight is generated when calling for the first time 2018-02-02 15:29:38 +00:00
James Harrison
f37ed4958b Implement IR improvement, with coefficients set in input file. 2018-02-02 11:56:51 +00:00
James Harrison
5f85473d6b QedFVol: Move Projection class into Result class 2018-02-01 16:16:13 +00:00
Daniel Richtmann
871649238c WilsonMG: Stricter naming for linear operators 2018-02-01 14:43:08 +01:00
James Harrison
ac3b0ebc58 QedFVol: New structure for ChargedProp output files 2018-02-01 12:31:32 +00:00
Daniel Richtmann
7c86d2085b WilsonMG: Some minor cleanup 2018-02-01 12:24:16 +01:00
Daniel Richtmann
9292be0b69 WilsonMG: Add check for Mdiag + Σ Mdir == M
Need to test my implementations of CoarsenedMatrix::Mdiag &
CoarsenedMatrix::Mdir.
2018-01-31 14:03:30 +01:00
Daniel Richtmann
10141f90c9 WilsonMG: Rename test file 2018-01-30 10:25:09 +01:00
Daniel Richtmann
a414430817 Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg 2018-01-29 18:32:31 +01:00
Daniel Richtmann
f20728baa9 WilsonMG: Some further steps towards a three level method
Currently this is very "manual" as we are still testing stuff. Will refactor
and make it an algorithm once everything works.

What currently does work:

  - All tests in MultiGridPreconditioner::runChecks for the first coarse grid
  - The tests for the intergrid operators going from the first to the second
    coarse grid
    - (1 - P R) v   == 0
    - (1 - R P) v_c == 0
  - A full solve with VPGCR and a two-level MG preconditioner

What hinders the rest of the tests from passing with a three-level method is the
absence of implementations of CoarsenedMatrix::Mdir and CoarsenedMatrix::Mdiag.
2018-01-29 18:29:49 +01:00
Daniel Richtmann
d2e68c4355 WilsonMG: Perform some minor cleanup 2018-01-29 18:07:10 +01:00
Daniel Richtmann
1cb745c8dc Add function to return full type as std::string
Also works for nested templates. I find it useful for debugging.
Possible usage:

std::cout << "getTypename<AType>() = " << getTypename<Atype>() << std::endl;
std::cout << "getTypename<decltype(AnInstance)>() = " << getTypename<decltype(AnInstance)>() << std::endl;
2018-01-29 17:39:19 +01:00
Daniel Richtmann
faf4278019 Use 2 passes of GS in coarse operator construction 2018-01-29 17:21:42 +01:00
Daniel Richtmann
194e4b94bb Make MG checking function work level-wise 2018-01-29 17:18:20 +01:00
Daniel Richtmann
bfc1411c1f Use more iterations in subspace creation 2018-01-29 17:11:29 +01:00
Daniel Richtmann
161637e573 Turn on orthogonality checking temporarily 2018-01-29 17:10:05 +01:00
James Harrison
4e0cf0cc28 QedFVol: Fix bug in ScalarVP.cc due to double use of temporary object. Still getting mpi3 errors when configured with enable-comms=mpi[-auto]. 2018-01-27 15:15:25 +00:00
James Harrison
cdf550845f QedFVol: Fix bugs in StochEm.cc and ChargedProp.cc (still only works without MPI). 2018-01-26 21:25:20 +00:00
James Harrison
3db7a5387b BROKEN: Adapted scalarVP, UnitEm and VPCounterTerms modules to new Hadrons. Currently getting an assertion error from Communicator_mpi3.cc when I try to run. 2018-01-26 16:33:48 +00:00
James Harrison
90dffc73c8 Merge branch 'feature/hadrons' into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/Modules/MGauge/StochEm.cc
#	extras/Hadrons/Modules/MScalar/ChargedProp.cc
#	extras/Hadrons/Modules/MScalar/ChargedProp.hpp
#	extras/Hadrons/modules.inc
#	lib/communicator/Communicator_mpi.cc
2018-01-24 16:41:44 +00:00
a1151fc734 Hadrons: MPI-safe serial IO 2018-01-23 17:26:50 +00:00
James Harrison
ab3baeb38f Implement contractions and data output in functions; calculate diagrams S, X and 4C separately; output 2E and 2T instead of sunset_shifted, sunset_unshifted, tadpole_shifted, tadpole_unshifted; add comments. 2018-01-23 17:07:45 +00:00
Vera Guelpers
389731d373 changed SeqConservedSummed.hpp to work with new hadrons interface 2018-01-23 10:11:33 +00:00
Daniel Richtmann
04f92ccddf WilsonMG: Provide a fix for the previous commit; compiles and runs successfully now
I don't like the solution with the temporary very much though ...
2018-01-22 14:56:48 +01:00
Daniel Richtmann
3b2d805398 WilsonMG: Some first steps towards coarse spin dofs; not compiling yet
A failing conversion from the innermost type (Grid::Simd<...>) to a coarse
scalar (triple iScalar) in blockPromote prohibits this commit from working.
2018-01-22 12:45:51 +01:00
Azusa Yamaguchi
97b9c6f03d No option for interior/exterior split of asm kernels since different directions get interleaved 2018-01-22 11:04:19 +00:00
Azusa Yamaguchi
63982819c6 No option to overlap comms and compute for asm implementation since different directions are interleaved
in the kernels, introducing if else structure would be too painful
2018-01-22 11:03:39 +00:00
Vera Guelpers
6fec507bef merged new hadrons interface 2018-01-22 10:09:20 +00:00
James Harrison
219b3bd34f Remove freeVpTensor object 2018-01-19 17:14:11 +00:00
Daniel Richtmann
9dc885d297 Fix a bug in Wilson MG
The calculation of the lattice size of a second coarse level was incorrect.
2018-01-18 17:02:04 +01:00
Daniel Richtmann
a70c1feecc Remove some unnecessary stuff in Wilson MG 2018-01-18 15:48:28 +01:00
Daniel Richtmann
38328100c9 Implement correctness checks for Wilson MG 2018-01-18 15:43:15 +01:00
Daniel Richtmann
9732519c41 Apply clang-format to Wilson MG
I can provide the configuration file I used if people want that.
2018-01-18 15:14:37 +01:00
Daniel Richtmann
fa4eeb28c4 Save current state in Wilson MG test file 2018-01-17 17:56:34 +01:00
Daniel Richtmann
10f7a17ae4 Make timing in VPGCR more detailed 2018-01-11 13:42:18 +01:00
Daniel Richtmann
26f14d7dd7 Adapt output format of non-herm solvers to the one of VPGCR 2018-01-11 13:36:30 +01:00
Azusa Yamaguchi
24162c9ead Staggered overlap comms comput 2018-01-09 13:02:52 +00:00
Daniel Richtmann
73434db636 Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg 2018-01-09 10:43:33 +01:00
Daniel Richtmann
c6411f8514 Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg 2018-01-08 10:37:10 +01:00
Daniel Richtmann
6cf635d61c Remove some old code in Wilson MG 2017-12-22 13:20:09 +01:00
Daniel Richtmann
39558cce52 Multiply TVs in Wilson MG with G5 instead of G5R5 2017-12-22 13:07:56 +01:00
Vera Guelpers
935cd1e173 conserved current insertion summed over Lorentzindex 2017-12-22 11:38:45 +00:00
Vera Guelpers
55e39df30f tadpole insertion for DWF 2017-12-22 11:36:31 +00:00
James Harrison
581be32ed2 Implement infrared improvement for v=0 on-shell self-energy 2017-12-14 13:42:41 +00:00
James Harrison
6bc136b1d0 Add module for calculating diagrams required for HVP counter-terms 2017-12-13 17:31:01 +00:00
Daniel Richtmann
df152648d6 Fix error in MR code when compiling for single precision 2017-12-06 18:00:58 +01:00
Daniel Richtmann
4e965c168e Implement analogon to test vector analysis in WMG codebase 2017-11-29 15:05:27 +01:00
Daniel Richtmann
f260af546e Save current state 2017-11-28 15:03:02 +01:00
Daniel Richtmann
649b8c9aca Save current state 2017-11-24 10:46:20 +01:00
Daniel Richtmann
0afa22747d Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms 2017-11-24 10:11:42 +01:00
Daniel Richtmann
fa43206c79 Remove some empty lines 2017-11-10 13:48:38 +01:00
Daniel Richtmann
a367835bf2 Set everything up for the implementation of FCAGMRES
The current implementation is the exact same code as normal FGMRES. This commit
only sets up the "framework" for the implementation of FCAGMRES, i.e., a test
and an include in the algorithms header file.
2017-11-09 17:30:41 +01:00
Daniel Richtmann
d7743591ea Fix some minor formatting errors 2017-11-09 17:28:19 +01:00
Daniel Richtmann
c6cbe533ea Set everything up for the implementation of CAGMRES
The current implementation is the exact same code as normal GMRES. This commit
only sets up the "framework" for the implementation of CAGMRES, i.e., a test and
an include in the algorithms header file.
2017-11-09 17:14:44 +01:00
Daniel Richtmann
8402ab6cf9 Some minor formatting improvements 2017-11-09 12:52:04 +01:00
Daniel Richtmann
c63095345e Remove some superfluous comments 2017-11-09 12:47:20 +01:00
Daniel Richtmann
a7ae46b61e Remove some comments 2017-11-08 16:58:20 +01:00
Daniel Richtmann
cd63052205 Remove everything preconditioner-related in GMRES code 2017-11-08 16:57:40 +01:00
Daniel Richtmann
699d537cd6 Add FGMRES test with staggered fermions 2017-11-08 16:56:42 +01:00
Daniel Richtmann
9031f0ed95 Fix a filename in a file header 2017-11-08 16:42:26 +01:00
Daniel Richtmann
26b3d441bb Check in forgotten FGMRES test with wilson Fermions 2017-11-08 16:39:11 +01:00
Daniel Richtmann
99bc4cde56 Fix an implementation error in FGMRES 2017-11-08 16:38:34 +01:00
Daniel Richtmann
e843d83d9d Make z in FGMRES a single Field 2017-11-08 16:38:16 +01:00
Daniel Richtmann
0f75ea52b7 First version of FGMRES; not working yet 2017-11-08 16:17:18 +01:00
Daniel Richtmann
8107b785cc Rename misunderstood "rsd_sq" to "rsq" in GMRES code 2017-11-08 14:40:03 +01:00
Daniel Richtmann
37b777d801 Add test for GMRES solver with staggered fermions 2017-11-08 14:28:48 +01:00
Daniel Richtmann
7382787856 Some minor changes 2017-11-08 14:23:55 +01:00
Daniel Richtmann
781c611ca0 Perform minor code style fix 2017-11-08 14:22:38 +01:00
Daniel Richtmann
b069090b52 Remove a superfluous comment 2017-11-08 13:58:02 +01:00
Daniel Richtmann
0c1c1d9900 Set precision and formatting only once in MR code 2017-11-08 13:57:06 +01:00
Daniel Richtmann
7f4ed6c2e5 First working version of GMRES + a test for Wilson fermions 2017-11-08 13:56:41 +01:00
Daniel Richtmann
56d32a4afb Rename misunderstood "rsd_sq" to "rsq" in MR code 2017-11-08 13:51:08 +01:00
Daniel Richtmann
b8ee496ed6 Print some info at start of GMRES 2017-11-08 13:23:41 +01:00
James Harrison
0c668bf46a QedFVol: Write to output files from one process only. 2017-11-07 14:46:39 +00:00
Daniel Richtmann
b87416dac4 Fix error with conformable 2017-11-07 15:00:08 +01:00
Daniel Richtmann
176bf37372 Remove some commented stuff 2017-11-07 14:57:36 +01:00
Daniel Richtmann
b3d342ca22 Remove old implementation of GMRES operator 2017-11-07 10:24:49 +01:00
Daniel Richtmann
e1f928398d Save current state 2017-11-07 10:22:41 +01:00
Daniel Richtmann
8c579d2d4a Save current state 2017-11-06 18:09:48 +01:00
James Harrison
840814c776 QedFVol: Patch to fix MPI communicators error 2017-11-06 16:34:55 +00:00
Daniel Richtmann
fc7d07ade0 Correct function signature of body of GMRES outer loop 2017-11-06 17:12:38 +01:00
Daniel Richtmann
b3be9195b4 Save one lattice fermion in GMRES code 2017-11-06 17:12:23 +01:00
Daniel Richtmann
9e3c187a4d Save current state 2017-11-06 17:05:25 +01:00
Daniel Richtmann
8363edfcdb Perform some minor changes to GMRES code 2017-11-06 16:17:44 +01:00
Daniel Richtmann
74af31564f Adapt style of wilson GMRES test to style of wilson MR test 2017-11-06 14:06:45 +01:00
Daniel Richtmann
e0819d395f Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms 2017-11-06 13:09:36 +01:00
James Harrison
95af55128e QedFVol: Redo optimisation of scalar VP (extra memory requirements were not the problem), and undo optimisation of charged propagator (which seemed to be causing HDF5 errors, although I don’t know why). 2017-11-03 18:46:16 +00:00
James Harrison
9f2a57e334 QedFVol: Undo optimisation of scalar VP, to reduce memory requirements 2017-11-03 13:10:11 +00:00
James Harrison
c645d33db5 QedFVol: Redo optimisation of charged propagator, and fix I/O bug 2017-11-03 10:59:26 +00:00
James Harrison
e0f1349524 QedFVol: Undo optimisation of charged propagator 2017-11-03 09:22:41 +00:00
Daniel Richtmann
6f81906b00 Add test for the MR solver with staggered fermions; does not converge atm
TODO: Is this a property of staggered or did I do something wrong?
2017-10-30 16:57:55 +01:00
James Harrison
79b761f923 Merge branch 'develop' into feature/qed-fvol
# Conflicts:
#	lib/communicator/Communicator_base.cc
2017-10-30 15:53:18 +00:00
James Harrison
0d4e31ca58 QedFVol: Calculate phase factors for momentum projections once per configuration only. 2017-10-30 15:46:50 +00:00
Daniel Richtmann
a2d83d4f3d Add test for the MR solver with DW fermions; does not converge atm
TODO: Is this a property of DWF or did I do something wrong?
2017-10-30 16:39:30 +01:00
Daniel Richtmann
89bacb0470 Fix path in MR solver header commentary 2017-10-30 16:16:55 +01:00
James Harrison
b07a354a33 QedFVol: output scalar propagator before FFT in spatial directions. 2017-10-30 14:20:44 +00:00
Daniel Richtmann
19010ff66a Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms 2017-10-30 13:16:46 +01:00
Daniel Richtmann
5a477ed29e Perform minor style correction 2017-10-27 14:46:18 +02:00
Daniel Richtmann
54128d579a Make MR a bit more verbose 2017-10-27 14:45:29 +02:00
Daniel Richtmann
e7b1933e88 Add a test for the MR solver 2017-10-27 14:38:57 +02:00
Daniel Richtmann
1bad64ac6a Some formatting 2017-10-27 14:35:04 +02:00
Daniel Richtmann
15dfa9f663 Change stopping criterion implementation in MR solver + some cleanup 2017-10-27 14:33:25 +02:00
Daniel Richtmann
2185b0d651 Correct author in the file 2017-10-27 14:32:38 +02:00
Daniel Richtmann
f61c0b5d03 Very early version of MR solver 2017-10-27 14:09:02 +02:00
Daniel Richtmann
074db32e54 Fix build of gmres test 2017-10-27 14:08:48 +02:00
Daniel Richtmann
d5f661ba70 Save intermediate state 2017-10-25 10:38:26 +02:00
Daniel Richtmann
1ab8d5cc13 Save two more files 2017-10-24 16:58:05 +02:00
Daniel Richtmann
789e892865 Save current state 2017-10-24 16:58:04 +02:00
Daniel Richtmann
53cfa44d7a Save current state 2017-10-24 16:58:03 +02:00
James Harrison
c433939795 QedFVol: Temporarily remove incomplete implementation of infinite-volume photon 2017-10-20 16:27:58 +01:00
James Harrison
b6a4c31b48 Merge branch 'feature/qed-fvol' of https://github.com/jch1g10/Grid into feature/qed-fvol 2017-10-20 16:25:07 +01:00
James Harrison
98b1439ff9 QedFVol: pass arbitrary input values to photon constructor in UnitEm 2017-10-20 16:24:09 +01:00
James Harrison
564738b1ff Add module for unit EM field 2017-10-17 14:03:57 +01:00
James Harrison
a80e43dbcf Added infinite-volume photon in Photon.h (not checked yet) 2017-10-11 16:44:51 -04:00
James Harrison
b99622d9fb QedFVol: fix problem with JSON wanting gcc 4.9 2017-09-28 13:34:33 -04:00
937c77ead2 Merge branch 'develop' into feature/qed-fvol 2017-09-28 16:25:20 +01:00
95e5a2ade3 Merge pull request #116 from jch1g10/feature/qed-fvol
Feature/qed fvol
2017-09-25 15:08:33 +01:00
James Harrison
91676d1dda Fix “MAP_ANONYMOUS undefined” error on OSX. 2017-09-01 15:48:30 +01:00
James Harrison
ac3611bb19 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/qed-fvol 2017-08-29 11:53:37 +01:00
James Harrison
cc4afb978d Fix bug in non-zero momentum projection 2017-08-24 17:31:44 +01:00
James Harrison
20e92a7009 QedVFol: Allow output of scalar propagator and vacuum polarisation projected to arbitrary lattice momentum, not just zero-momentum. 2017-06-12 18:27:32 +01:00
James Harrison
42f0afcbfa QedFVol: Output all scalar VP diagrams separately 2017-06-09 18:08:40 +01:00
James Harrison
20ac13fdf3 QedFVol: add ChargedProp as an input to ScalarVP module, instead of calculating scalar propagator within ScalarVP. 2017-06-08 17:43:39 +01:00
James Harrison
e38612e6fa QedFVol: Update ScalarVP module for compatibility with new scalar action 2017-06-07 17:42:00 +01:00
James Harrison
c2b2b71c5d Merge branch 'feature/qed-fvol' of https://github.com/paboyle/Grid into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/modules.inc
2017-06-07 16:59:47 +01:00
James Harrison
009f48a904 QedFVol: Add missing factor of 2 in free vacuum polarisation 2017-06-07 16:34:09 +01:00
James Harrison
5cfc0180aa QedFVol: Output free VP along with charged VP. 2017-05-09 12:46:57 +01:00
James Harrison
914f180fa3 QedFVol: Implement exact O(alpha) vacuum polarisation. 2017-05-09 11:46:25 +01:00
James Harrison
6cb563a40c QedFVol: Access HVP tensor using a vector<vector<ScalarField>> instead of vector<vector<ScalarField*>> 2017-05-05 17:12:41 +01:00
James Harrison
db3837be22 QedFVol: Change “double” to “Real” in ScalarVP.cc 2017-05-03 13:26:49 +01:00
James Harrison
2f0dd83016 Calculate HVP using a single contraction of O(alpha) charged propagators. 2017-05-03 12:53:41 +01:00
James Harrison
3ac27e5596 QedFVol: remove unnecessary copies of free propagator from shifted sources in ScalarVP 2017-04-27 14:17:50 +01:00
James Harrison
bd466a55a8 QedFVol: remove charge dependence in chargedProp function of ScalarVP 2017-04-25 10:04:03 +01:00
James Harrison
c8e6f58e24 Fix typos in ScalarVP 2017-04-13 17:04:37 +01:00
James Harrison
888988ad37 Merge branch 'feature/qed-fvol' of https://github.com/paboyle/Grid into feature/qed-fvol
# Conflicts:
#	lib/qcd/action/fermion/Fermion.h
2017-04-13 15:54:40 +01:00
James Harrison
e4a105a30b Merge branch 'feature/qed-fvol' of https://github.com/paboyle/Grid into feature/qed-fvol 2017-04-10 16:35:01 +01:00
James Harrison
26ebe41fef QedFVol: Implement charged propagator calculation within ScalarVP module 2017-04-10 16:33:54 +01:00
1e496fee74 Merge branch 'develop' into feature/qed-fvol
# Conflicts:
#	lib/qcd/action/fermion/Fermion.h
2017-04-03 19:02:57 +01:00
James Harrison
9f755e0379 Add functions momD1 and momD2 to ScalarVP 2017-03-27 16:49:18 +01:00
James Harrison
4512dbdf58 Rename module ScalarFV to ScalarVP 2017-03-27 15:02:16 +01:00
James Harrison
483fd3cfa1 Add propagator expansion terms as inputs to ScalarFV 2017-03-27 13:24:51 +01:00
James Harrison
85516e9c7c Output all terms of scalar propagator separately 2017-03-24 17:13:55 +00:00
James Harrison
0c006fbfaa Add ScalarFV inputs to ScalarFV.hpp 2017-03-24 11:59:09 +00:00
James Harrison
54c10a42cc Add source and emField inputs to ScalarFV module 2017-03-24 11:42:32 +00:00
James Harrison
ef0fe2bcc1 Added empty ScalarFV module 2017-03-21 11:39:46 +00:00
630 changed files with 42678 additions and 9028 deletions

26
.gitignore vendored
View File

@@ -83,6 +83,7 @@ ltmain.sh
.Trashes .Trashes
ehthumbs.db ehthumbs.db
Thumbs.db Thumbs.db
.dirstamp
# build directory # # build directory #
################### ###################
@@ -97,11 +98,8 @@ build.sh
# Eigen source # # Eigen source #
################ ################
lib/Eigen/* Grid/Eigen
Eigen/*
# FFTW source #
################
lib/fftw/*
# libtool macros # # libtool macros #
################## ##################
@@ -112,21 +110,7 @@ m4/libtool.m4
################ ################
gh-pages/ gh-pages/
# Buck files #
##############
.buck*
buck-out
BUCK
make-bin-BUCK.sh
# generated sources # # generated sources #
##################### #####################
lib/qcd/spin/gamma-gen/*.h Grid/qcd/spin/gamma-gen/*.h
lib/qcd/spin/gamma-gen/*.cc Grid/qcd/spin/gamma-gen/*.cc
lib/version.h
# vs code editor files #
########################
.vscode/
.vscode/settings.json
settings.json

View File

@@ -9,6 +9,11 @@ matrix:
- os: osx - os: osx
osx_image: xcode8.3 osx_image: xcode8.3
compiler: clang compiler: clang
env: PREC=single
- os: osx
osx_image: xcode8.3
compiler: clang
env: PREC=double
before_install: before_install:
- export GRIDDIR=`pwd` - export GRIDDIR=`pwd`
@@ -16,9 +21,11 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
install: install:
- export CWD=`pwd`
- echo $CWD
- export CC=$CC$VERSION - export CC=$CC$VERSION
- export CXX=$CXX$VERSION - export CXX=$CXX$VERSION
- echo $PATH - echo $PATH
@@ -31,17 +38,24 @@ install:
- which $CXX - which $CXX
- $CXX --version - $CXX --version
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
script: script:
- ./bootstrap.sh - ./bootstrap.sh
- mkdir build - mkdir build
- cd build - cd build
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none - mkdir lime
- cd lime
- mkdir build
- cd build
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
- tar xf lime-1.3.2.tar.gz
- cd lime-1.3.2
- ./configure --prefix=$CWD/build/lime/install
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals - make install
- echo make clean - cd $CWD/build
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none - ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- make check - make check

View File

@@ -48,6 +48,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/serialisation/Serialisation.h> #include <Grid/serialisation/Serialisation.h>
#include <Grid/threads/Threads.h> #include <Grid/threads/Threads.h>
#include <Grid/util/Util.h> #include <Grid/util/Util.h>
#include <Grid/util/Sha.h>
#include <Grid/communicator/Communicator.h> #include <Grid/communicator/Communicator.h>
#include <Grid/cartesian/Cartesian.h> #include <Grid/cartesian/Cartesian.h>
#include <Grid/tensors/Tensors.h> #include <Grid/tensors/Tensors.h>

View File

@@ -1,4 +1,9 @@
#pragma once #pragma once
// Force Eigen to use MKL if Grid has been configured with --enable-mkl
#ifdef USE_MKL
#define EIGEN_USE_MKL_ALL
#endif
#if defined __GNUC__ #if defined __GNUC__
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" #pragma GCC diagnostic ignored "-Wdeprecated-declarations"

View File

@@ -21,6 +21,32 @@ if BUILD_HDF5
extra_headers+=serialisation/Hdf5Type.h extra_headers+=serialisation/Hdf5Type.h
endif endif
all: version-cache
version-cache:
@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
a="uncommited changes";\
else\
a="clean";\
fi;\
echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
if [ -e version-cache ]; then\
d=`diff vertmp version-cache`;\
if [ "$${d}" != "" ]; then\
mv vertmp version-cache;\
rm -f Version.h;\
fi;\
else\
mv vertmp version-cache;\
rm -f Version.h;\
fi;\
rm -f vertmp
Version.h:
cp version-cache Version.h
.PHONY: version-cache
# #
# Libraries # Libraries
# #
@@ -30,8 +56,8 @@ include Eigen.inc
lib_LIBRARIES = libGrid.a lib_LIBRARIES = libGrid.a
CCFILES += $(extra_sources) CCFILES += $(extra_sources)
HFILES += $(extra_headers) HFILES += $(extra_headers) Config.h Version.h
libGrid_a_SOURCES = $(CCFILES) libGrid_a_SOURCES = $(CCFILES)
libGrid_adir = $(pkgincludedir) libGrid_adir = $(includedir)/Grid
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)

View File

@@ -48,6 +48,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h> #include <Grid/algorithms/iterative/BlockConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h> #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
#include <Grid/algorithms/iterative/MinimalResidual.h>
#include <Grid/algorithms/iterative/GeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/CoarsenedMatrix.h> #include <Grid/algorithms/CoarsenedMatrix.h>
#include <Grid/algorithms/FFT.h> #include <Grid/algorithms/FFT.h>

View File

@@ -211,6 +211,7 @@ namespace Grid {
for(int b=0;b<nn;b++){ for(int b=0;b<nn;b++){
subspace[b] = zero;
gaussian(RNG,noise); gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5); scale = std::pow(norm2(noise),-0.5);
noise=noise*scale; noise=noise*scale;
@@ -296,12 +297,57 @@ namespace Grid {
}; };
RealD Mdag (const CoarseVector &in, CoarseVector &out){ RealD Mdag (const CoarseVector &in, CoarseVector &out){
return M(in,out); // // corresponds to Petrov-Galerkin coarsening
// return M(in,out);
// corresponds to Galerkin coarsening
CoarseVector tmp(Grid());
G5C(tmp, in);
M(tmp, out);
G5C(out, out);
return norm2(out);
}; };
// Defer support for further coarsening for now void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
void Mdiag (const CoarseVector &in, CoarseVector &out){};
void Mdir (const CoarseVector &in, CoarseVector &out,int dir, int disp){}; conformable(_grid,in._grid);
conformable(in._grid,out._grid);
SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,compressor);
auto point = [dir, disp](){
if(dir == 0 and disp == 0)
return 8;
else
return (4 * dir + 1 - disp) / 2;
}();
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
siteVector res = zero;
siteVector nbr;
int ptype;
StencilEntry *SE;
SE=Stencil.GetEntry(ptype,point,ss);
if(SE->_is_local&&SE->_permute) {
permute(nbr,in._odata[SE->_offset],ptype);
} else if(SE->_is_local) {
nbr = in._odata[SE->_offset];
} else {
nbr = Stencil.CommBuf()[SE->_offset];
}
res = res + A[point]._odata[ss]*nbr;
vstream(out._odata[ss],res);
}
};
void Mdiag(const CoarseVector &in, CoarseVector &out){
Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
};
CoarsenedMatrix(GridCartesian &CoarseGrid) : CoarsenedMatrix(GridCartesian &CoarseGrid) :
@@ -417,7 +463,7 @@ namespace Grid {
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl; std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
#endif #endif
// ForceHermitian(); // ForceHermitian();
AssertHermitian(); // AssertHermitian();
// ForceDiagonal(); // ForceDiagonal();
} }
void ForceDiagonal(void) { void ForceDiagonal(void) {

View File

@@ -51,7 +51,7 @@ namespace Grid {
virtual void Op (const Field &in, Field &out) = 0; // Abstract base virtual void Op (const Field &in, Field &out) = 0; // Abstract base
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0; virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0;
virtual void HermOp(const Field &in, Field &out)=0; virtual void HermOp(const Field &in, Field &out)=0;
}; };
@@ -309,36 +309,59 @@ namespace Grid {
class SchurStaggeredOperator : public SchurOperatorBase<Field> { class SchurStaggeredOperator : public SchurOperatorBase<Field> {
protected: protected:
Matrix &_Mat; Matrix &_Mat;
Field tmp;
RealD mass;
double tMpc;
double tIP;
double tMeo;
double taxpby_norm;
uint64_t ncall;
public: public:
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){}; void Report(void)
{
std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
std::cout << GridLogMessage << " HermOpAndNorm.IP "<< tIP /ncall<<" usec "<<std::endl;
std::cout << GridLogMessage << " Mpc.MeoMoe "<< tMeo/ncall<<" usec "<<std::endl;
std::cout << GridLogMessage << " Mpc.axpby_norm "<< taxpby_norm/ncall<<" usec "<<std::endl;
}
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
{
assert( _Mat.isTrivialEE() );
mass = _Mat.Mass();
tMpc=0;
tIP =0;
tMeo=0;
taxpby_norm=0;
ncall=0;
}
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
GridLogIterative.TimingMode(1); ncall++;
std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl; tMpc-=usecond();
n2 = Mpc(in,out); n2 = Mpc(in,out);
std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl; tMpc+=usecond();
tIP-=usecond();
ComplexD dot= innerProduct(in,out); ComplexD dot= innerProduct(in,out);
std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl; tIP+=usecond();
n1 = real(dot); n1 = real(dot);
} }
virtual void HermOp(const Field &in, Field &out){ virtual void HermOp(const Field &in, Field &out){
std::cout << GridLogIterative << " HermOp "<<std::endl; ncall++;
Mpc(in,out); tMpc-=usecond();
_Mat.Meooe(in,out);
_Mat.Meooe(out,tmp);
tMpc+=usecond();
taxpby_norm-=usecond();
axpby(out,-1.0,mass*mass,tmp,in);
taxpby_norm+=usecond();
} }
virtual RealD Mpc (const Field &in, Field &out) { virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid); tMeo-=usecond();
Field tmp2(in._grid);
std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
_Mat.Mooee(in,out);
_Mat.Mooee(out,tmp);
std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
_Mat.Meooe(in,out); _Mat.Meooe(in,out);
_Mat.Meooe(out,tmp2); _Mat.Meooe(out,tmp);
std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl; tMeo+=usecond();
taxpby_norm-=usecond();
RealD nn=axpy_norm(out,-1.0,tmp2,tmp); RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl; taxpby_norm+=usecond();
return nn; return nn;
} }
virtual RealD MpcDag (const Field &in, Field &out){ virtual RealD MpcDag (const Field &in, Field &out){
@@ -357,6 +380,12 @@ namespace Grid {
template<class Field> class OperatorFunction { template<class Field> class OperatorFunction {
public: public:
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0; virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
virtual void operator() (LinearOperatorBase<Field> &Linop, const std::vector<Field> &in,std::vector<Field> &out) {
assert(in.size()==out.size());
for(int k=0;k<in.size();k++){
(*this)(Linop,in[k],out[k]);
}
};
}; };
template<class Field> class LinearFunction { template<class Field> class LinearFunction {
@@ -398,7 +427,7 @@ namespace Grid {
// Hermitian operator Linear function and operator function // Hermitian operator Linear function and operator function
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> template<class Field>
class HermOpOperatorFunction : public OperatorFunction<Field> { class HermOpOperatorFunction : public OperatorFunction<Field> {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Linop.HermOp(in,out); Linop.HermOp(in,out);
}; };

View File

@@ -55,6 +55,14 @@ namespace Grid {
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> { template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
public: public:
virtual GridBase *RedBlackGrid(void)=0; virtual GridBase *RedBlackGrid(void)=0;
//////////////////////////////////////////////////////////////////////
// Query the even even properties to make algorithmic decisions
//////////////////////////////////////////////////////////////////////
virtual RealD Mass(void) { return 0.0; };
virtual int ConstEE(void) { return 0; }; // Disable assumptions unless overridden
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
// half checkerboard operaions // half checkerboard operaions
virtual void Meooe (const Field &in, Field &out)=0; virtual void Meooe (const Field &in, Field &out)=0;
virtual void Mooee (const Field &in, Field &out)=0; virtual void Mooee (const Field &in, Field &out)=0;

View File

@@ -33,7 +33,7 @@ directory
namespace Grid { namespace Grid {
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec };
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Block conjugate gradient. Dimension zero should be the block direction // Block conjugate gradient. Dimension zero should be the block direction
@@ -42,7 +42,6 @@ template <class Field>
class BlockConjugateGradient : public OperatorFunction<Field> { class BlockConjugateGradient : public OperatorFunction<Field> {
public: public:
typedef typename Field::scalar_type scomplex; typedef typename Field::scalar_type scomplex;
int blockDim ; int blockDim ;
@@ -54,21 +53,15 @@ class BlockConjugateGradient : public OperatorFunction<Field> {
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
Integer PrintInterval; //GridLogMessages or Iterative
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) : Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
{}; {};
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// Thin QR factorisation (google it) // Thin QR factorisation (google it)
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
Field & Q,
const Field & R)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
//Dimensions //Dimensions
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock // R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
@@ -85,22 +78,20 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
// Cdag C = Rdag R ; passes. // Cdag C = Rdag R ; passes.
// QdagQ = 1 ; passes // QdagQ = 1 ; passes
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
Field & Q,
const Field & R)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
sliceInnerProductMatrix(m_rr,R,R,Orthog); sliceInnerProductMatrix(m_rr,R,R,Orthog);
// Force manifest hermitian to avoid rounding related // Force manifest hermitian to avoid rounding related
m_rr = 0.5*(m_rr+m_rr.adjoint()); m_rr = 0.5*(m_rr+m_rr.adjoint());
#if 0
std::cout << " Calling Cholesky ldlt on m_rr " << m_rr <<std::endl;
Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();
std::cout << " Called Cholesky ldlt on m_rr " << L_ldlt <<std::endl;
auto D_ldlt = m_rr.ldlt().vectorD();
std::cout << " Called Cholesky ldlt on m_rr " << D_ldlt <<std::endl;
#endif
// std::cout << " Calling Cholesky llt on m_rr " <<std::endl;
Eigen::MatrixXcd L = m_rr.llt().matrixL(); Eigen::MatrixXcd L = m_rr.llt().matrixL();
// std::cout << " Called Cholesky llt on m_rr " << L <<std::endl;
C = L.adjoint(); C = L.adjoint();
Cinv = C.inverse(); Cinv = C.inverse();
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -112,6 +103,25 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
sliceMulMatrix(Q,Cinv,R,Orthog); sliceMulMatrix(Q,Cinv,R,Orthog);
} }
// see comments above
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
std::vector<Field> & Q,
const std::vector<Field> & R)
{
InnerProductMatrix(m_rr,R,R);
m_rr = 0.5*(m_rr+m_rr.adjoint());
Eigen::MatrixXcd L = m_rr.llt().matrixL();
C = L.adjoint();
Cinv = C.inverse();
MulMatrix(Q,Cinv,R);
}
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// Call one of several implementations // Call one of several implementations
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -119,14 +129,20 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{ {
if ( CGtype == BlockCGrQ ) { if ( CGtype == BlockCGrQ ) {
BlockCGrQsolve(Linop,Src,Psi); BlockCGrQsolve(Linop,Src,Psi);
} else if (CGtype == BlockCG ) {
BlockCGsolve(Linop,Src,Psi);
} else if (CGtype == CGmultiRHS ) { } else if (CGtype == CGmultiRHS ) {
CGmultiRHSsolve(Linop,Src,Psi); CGmultiRHSsolve(Linop,Src,Psi);
} else { } else {
assert(0); assert(0);
} }
} }
virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi)
{
if ( CGtype == BlockCGrQVec ) {
BlockCGrQsolveVec(Linop,Src,Psi);
} else {
assert(0);
}
}
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// BlockCGrQ implementation: // BlockCGrQ implementation:
@@ -139,7 +155,8 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
{ {
int Orthog = blockDim; // First dimension is block dim; this is an assumption int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = B._grid->_fdimensions[Orthog]; Nblock = B._grid->_fdimensions[Orthog];
/* FAKE */
Nblock=8;
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
X.checkerboard = B.checkerboard; X.checkerboard = B.checkerboard;
@@ -202,15 +219,10 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl; std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it) //1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
Linop.HermOp(X, AD); Linop.HermOp(X, AD);
tmp = B - AD; tmp = B - AD;
//std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
//std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
//std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl;
//std::cout << GridLogMessage << " m_C " << m_C<<std::endl;
//std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl;
D=Q; D=Q;
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl; std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
@@ -232,14 +244,12 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
MatrixTimer.Start(); MatrixTimer.Start();
Linop.HermOp(D, Z); Linop.HermOp(D, Z);
MatrixTimer.Stop(); MatrixTimer.Stop();
//std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl;
//4. M = [D^dag Z]^{-1} //4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start(); sliceInnerTimer.Start();
sliceInnerProductMatrix(m_DZ,D,Z,Orthog); sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
sliceInnerTimer.Stop(); sliceInnerTimer.Stop();
m_M = m_DZ.inverse(); m_M = m_DZ.inverse();
//std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl;
//5. X = X + D MC //5. X = X + D MC
m_tmp = m_M * m_C; m_tmp = m_M * m_C;
@@ -257,6 +267,7 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
//7. D = Q + D S^dag //7. D = Q + D S^dag
m_tmp = m_S.adjoint(); m_tmp = m_S.adjoint();
sliceMaddTimer.Start(); sliceMaddTimer.Start();
sliceMaddMatrix(D,m_tmp,D,Q,Orthog); sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
sliceMaddTimer.Stop(); sliceMaddTimer.Stop();
@@ -317,152 +328,6 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
IterationsToComplete = k; IterationsToComplete = k;
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
Eigen::MatrixXcd m_pAp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_alpha = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_beta = Eigen::MatrixXcd::Zero(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
/************************************************************************
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
************************************************************************
* O'Leary : R = B - A X
* O'Leary : P = M R ; preconditioner M = 1
* O'Leary : alpha = PAP^{-1} RMR
* O'Leary : beta = RMR^{-1}_old RMR_new
* O'Leary : X=X+Palpha
* O'Leary : R_new=R_old-AP alpha
* O'Leary : P=MR_new+P beta
*/
R = Src - AP;
P = R;
sliceInnerProductMatrix(m_rr,R,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
m_pAp_inv = m_pAp.inverse();
m_alpha = m_pAp_inv * m_rr ;
// Psi, R update
sliceMaddTimer.Start();
sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddMatrix(R ,m_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
m_rr_inv = m_rr.inverse();
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_rr,R,R,Orthog);
sliceInnerTimer.Stop();
m_beta = m_rr_inv *m_rr;
// Search update
sliceMaddTimer.Start();
sliceMaddMatrix(AP,m_beta,P,R,Orthog);
sliceMaddTimer.Stop();
P= AP;
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
RealD rr;
for(int b=0;b<Nblock;b++){
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
//////////////////////////////////////////////////////////////////////////
// multiRHS conjugate gradient. Dimension zero should be the block direction // multiRHS conjugate gradient. Dimension zero should be the block direction
// Use this for spread out across nodes // Use this for spread out across nodes
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
@@ -600,6 +465,233 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
IterationsToComplete = k; IterationsToComplete = k;
} }
void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){
for(int b=0;b<Nblock;b++){
for(int bp=0;bp<Nblock;bp++) {
m(b,bp) = innerProduct(X[b],Y[bp]);
}}
}
void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){
// Should make this cache friendly with site outermost, parallel_for
// Deal with case AP aliases with either Y or X
std::vector<Field> tmp(Nblock,X[0]);
for(int b=0;b<Nblock;b++){
tmp[b] = Y[b];
for(int bp=0;bp<Nblock;bp++) {
tmp[b] = tmp[b] + (scale*m(bp,b))*X[bp];
}
}
for(int b=0;b<Nblock;b++){
AP[b] = tmp[b];
}
}
void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){
// Should make this cache friendly with site outermost, parallel_for
for(int b=0;b<Nblock;b++){
AP[b] = zero;
for(int bp=0;bp<Nblock;bp++) {
AP[b] += (m(bp,b))*X[bp];
}
}
}
double normv(const std::vector<Field> &P){
double nn = 0.0;
for(int b=0;b<Nblock;b++) {
nn+=norm2(P[b]);
}
return nn;
}
////////////////////////////////////////////////////////////////////////////
// BlockCGrQvec implementation:
//--------------------------
// X is guess/Solution
// B is RHS
// Solve A X_i = B_i ; i refers to Nblock index
////////////////////////////////////////////////////////////////////////////
void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X)
{
Nblock = B.size();
assert(Nblock == X.size());
std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
for(int b=0;b<Nblock;b++){
X[b].checkerboard = B[b].checkerboard;
conformable(X[b], B[b]);
conformable(X[b], X[0]);
}
Field Fake(B[0]);
std::vector<Field> tmp(Nblock,Fake);
std::vector<Field> Q(Nblock,Fake);
std::vector<Field> D(Nblock,Fake);
std::vector<Field> Z(Nblock,Fake);
std::vector<Field> AD(Nblock,Fake);
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
RealD sssum=0;
for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);}
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);}
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);}
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
/************************************************************************
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
************************************************************************
* Dimensions:
*
* X,B==(Nferm x Nblock)
* A==(Nferm x Nferm)
*
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
*
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
* for k:
* Z = AD
* M = [D^dag Z]^{-1}
* X = X + D MC
* QS = Q - ZM
* D = Q + D S^dag
* C = S C
*/
///////////////////////////////////////
// Initial block: initial search dir is guess
///////////////////////////////////////
std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
for(int b=0;b<Nblock;b++) {
Linop.HermOp(X[b], AD[b]);
tmp[b] = B[b] - AD[b];
}
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
for(int b=0;b<Nblock;b++) D[b]=Q[b];
std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl;
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
//3. Z = AD
MatrixTimer.Start();
for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);
MatrixTimer.Stop();
//4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start();
InnerProductMatrix(m_DZ,D,Z);
sliceInnerTimer.Stop();
m_M = m_DZ.inverse();
//5. X = X + D MC
m_tmp = m_M * m_C;
sliceMaddTimer.Start();
MaddMatrix(X,m_tmp, D,X);
sliceMaddTimer.Stop();
//6. QS = Q - ZM
sliceMaddTimer.Start();
MaddMatrix(tmp,m_M,Z,Q,-1.0);
sliceMaddTimer.Stop();
QRTimer.Start();
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
QRTimer.Stop();
//7. D = Q + D S^dag
m_tmp = m_S.adjoint();
sliceMaddTimer.Start();
MaddMatrix(D,m_tmp,D,Q);
sliceMaddTimer.Stop();
//8. C = S C
m_C = m_S*m_C;
/*********************
* convergence monitor
*********************
*/
m_rr = m_C.adjoint() * m_C;
RealD max_resid=0;
RealD rrsum=0;
RealD rr;
for(int b=0;b<Nblock;b++) {
rrsum+=real(m_rr(b,b));
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
}; };
} }

View File

@@ -0,0 +1,244 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when CAGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the CAGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
CommunicationAvoidingGeneralisedMinimalResidual(RealD tol,
Integer maxit,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular GMRES" << std::endl;
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: src " << ssq << std::endl;
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// this should probably be made a class member so that it is only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(v, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
MatrixTimer.Start();
LinOp.Op(v[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + v[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@@ -54,6 +54,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) { void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard; psi.checkerboard = src.checkerboard;
conformable(psi, src); conformable(psi, src);
@@ -70,7 +71,6 @@ class ConjugateGradient : public OperatorFunction<Field> {
Linop.HermOpAndNorm(psi, mmp, d, b); Linop.HermOpAndNorm(psi, mmp, d, b);
r = src - mmp; r = src - mmp;
p = r; p = r;
@@ -96,38 +96,44 @@ class ConjugateGradient : public OperatorFunction<Field> {
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer; GridStopWatch LinalgTimer;
GridStopWatch InnerTimer;
GridStopWatch AxpyNormTimer;
GridStopWatch LinearCombTimer;
GridStopWatch MatrixTimer; GridStopWatch MatrixTimer;
GridStopWatch SolverTimer; GridStopWatch SolverTimer;
SolverTimer.Start(); SolverTimer.Start();
int k; int k;
for (k = 1; k <= MaxIterations; k++) { for (k = 1; k <= MaxIterations*1000; k++) {
c = cp; c = cp;
MatrixTimer.Start(); MatrixTimer.Start();
Linop.HermOpAndNorm(p, mmp, d, qq); Linop.HermOp(p, mmp);
MatrixTimer.Stop(); MatrixTimer.Stop();
LinalgTimer.Start(); LinalgTimer.Start();
// RealD qqck = norm2(mmp);
// ComplexD dck = innerProduct(p,mmp);
InnerTimer.Start();
ComplexD dc = innerProduct(p,mmp);
InnerTimer.Stop();
d = dc.real();
a = c / d; a = c / d;
b_pred = a * (a * qq - d) / c;
AxpyNormTimer.Start();
cp = axpy_norm(r, -a, mmp, r); cp = axpy_norm(r, -a, mmp, r);
AxpyNormTimer.Stop();
b = cp / c; b = cp / c;
// Fuse these loops ; should be really easy LinearCombTimer.Start();
psi = a * p + psi; parallel_for(int ss=0;ss<src._grid->oSites();ss++){
p = p * b + r; vstream(psi[ss], a * p[ss] + psi[ss]);
vstream(p [ss], b * p[ss] + r[ss]);
}
LinearCombTimer.Stop();
LinalgTimer.Stop(); LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl; << " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
// Stopping condition // Stopping condition
if (cp <= rsq) { if (cp <= rsq) {
@@ -148,6 +154,9 @@ class ConjugateGradient : public OperatorFunction<Field> {
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);

View File

@@ -43,6 +43,7 @@ namespace Grid {
public: public:
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
int verbose; int verbose;
MultiShiftFunction shifts; MultiShiftFunction shifts;
@@ -164,6 +165,15 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
axpby(psi[s],0.,-bs[s]*alpha[s],src,src); axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
} }
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch AXPYTimer;
GridStopWatch ShiftTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
// Iteration loop // Iteration loop
int k; int k;
@@ -171,7 +181,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
for (k=1;k<=MaxIterations;k++){ for (k=1;k<=MaxIterations;k++){
a = c /cp; a = c /cp;
AXPYTimer.Start();
axpy(p,a,p,r); axpy(p,a,p,r);
AXPYTimer.Stop();
// Note to self - direction ps is iterated seperately // Note to self - direction ps is iterated seperately
// for each shift. Does not appear to have any scope // for each shift. Does not appear to have any scope
@@ -180,6 +192,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
// However SAME r is used. Could load "r" and update // However SAME r is used. Could load "r" and update
// ALL ps[s]. 2/3 Bandwidth saving // ALL ps[s]. 2/3 Bandwidth saving
// New Kernel: Load r, vector of coeffs, vector of pointers ps // New Kernel: Load r, vector of coeffs, vector of pointers ps
AXPYTimer.Start();
for(int s=0;s<nshift;s++){ for(int s=0;s<nshift;s++){
if ( ! converged[s] ) { if ( ! converged[s] ) {
if (s==0){ if (s==0){
@@ -190,22 +203,34 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
} }
} }
} }
AXPYTimer.Stop();
cp=c; cp=c;
MatrixTimer.Start();
//Linop.HermOpAndNorm(p,mmp,d,qq); // d is used
// The below is faster on KNL
Linop.HermOp(p,mmp);
d=real(innerProduct(p,mmp));
Linop.HermOpAndNorm(p,mmp,d,qq); MatrixTimer.Stop();
AXPYTimer.Start();
axpy(mmp,mass[0],p,mmp); axpy(mmp,mass[0],p,mmp);
AXPYTimer.Stop();
RealD rn = norm2(p); RealD rn = norm2(p);
d += rn*mass[0]; d += rn*mass[0];
bp=b; bp=b;
b=-cp/d; b=-cp/d;
AXPYTimer.Start();
c=axpy_norm(r,b,mmp,r); c=axpy_norm(r,b,mmp,r);
AXPYTimer.Stop();
// Toggle the recurrence history // Toggle the recurrence history
bs[0] = b; bs[0] = b;
iz = 1-iz; iz = 1-iz;
ShiftTimer.Start();
for(int s=1;s<nshift;s++){ for(int s=1;s<nshift;s++){
if((!converged[s])){ if((!converged[s])){
RealD z0 = z[s][1-iz]; RealD z0 = z[s][1-iz];
@@ -215,6 +240,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
} }
} }
ShiftTimer.Stop();
for(int s=0;s<nshift;s++){ for(int s=0;s<nshift;s++){
int ss = s; int ss = s;
@@ -257,6 +283,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
if ( all_converged ){ if ( all_converged ){
SolverTimer.Stop();
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl; std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl; std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
@@ -269,8 +298,19 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
RealD cn = norm2(src); RealD cn = norm2(src);
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl; std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
} }
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMarix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return; return;
} }
} }
// ugly hack // ugly hack
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl; std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;

View File

@@ -30,22 +30,23 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
namespace Grid { namespace Grid {
struct ZeroGuesser { template<class Field>
class ZeroGuesser: public LinearFunction<Field> {
public: public:
template<class Field> virtual void operator()(const Field &src, Field &guess) { guess = zero; };
void operator()(const Field &src,Field &guess) { guess = Zero(); };
}; };
struct SourceGuesser {
template<class Field>
class SourceGuesser: public LinearFunction<Field> {
public: public:
template<class Field> virtual void operator()(const Field &src, Field &guess) { guess = src; };
void operator()(const Field &src,Field &guess) { guess = src; };
}; };
//////////////////////////////// ////////////////////////////////
// Fine grid deflation // Fine grid deflation
//////////////////////////////// ////////////////////////////////
template<class Field> template<class Field>
struct DeflatedGuesser { class DeflatedGuesser: public LinearFunction<Field> {
private: private:
const std::vector<Field> &evec; const std::vector<Field> &evec;
const std::vector<RealD> &eval; const std::vector<RealD> &eval;
@@ -54,7 +55,7 @@ public:
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {}; DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
void operator()(const Field &src,Field &guess) { virtual void operator()(const Field &src,Field &guess) {
guess = zero; guess = zero;
assert(evec.size()==eval.size()); assert(evec.size()==eval.size());
auto N = evec.size(); auto N = evec.size();
@@ -62,11 +63,12 @@ public:
const Field& tmp = evec[i]; const Field& tmp = evec[i];
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
} }
guess.checkerboard = src.checkerboard;
} }
}; };
template<class FineField, class CoarseField> template<class FineField, class CoarseField>
class LocalCoherenceDeflatedGuesser { class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
private: private:
const std::vector<FineField> &subspace; const std::vector<FineField> &subspace;
const std::vector<CoarseField> &evec_coarse; const std::vector<CoarseField> &evec_coarse;
@@ -92,6 +94,7 @@ public:
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse); axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
} }
blockPromote(guess_coarse,guess,subspace); blockPromote(guess_coarse,guess,subspace);
guess.checkerboard = src.checkerboard;
}; };
}; };

View File

@@ -0,0 +1,256 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when FCAGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the FCAGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch PrecTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
LinearFunction<Field> &Preconditioner;
FlexibleCommunicationAvoidingGeneralisedMinimalResidual(RealD tol,
Integer maxit,
LinearFunction<Field> &Prec,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.)
, Preconditioner(Prec) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular FGMRES" << std::endl;
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: src " << ssq << std::endl;
PrecTimer.Reset();
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// these should probably be made class members so that they are only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, z, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(z, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
PrecTimer.Start();
Preconditioner(v[iter], z[iter]);
PrecTimer.Stop();
MatrixTimer.Start();
LinOp.Op(z[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + z[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@@ -0,0 +1,254 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when FGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the FGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch PrecTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
LinearFunction<Field> &Preconditioner;
FlexibleGeneralisedMinimalResidual(RealD tol,
Integer maxit,
LinearFunction<Field> &Prec,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.)
, Preconditioner(Prec) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: src " << ssq << std::endl;
PrecTimer.Reset();
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// these should probably be made class members so that they are only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, z, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(z, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
PrecTimer.Start();
Preconditioner(v[iter], z[iter]);
PrecTimer.Stop();
MatrixTimer.Start();
LinOp.Op(z[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + z[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@@ -0,0 +1,242 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/GeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class GeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the GMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
GeneralisedMinimalResidual(RealD tol,
Integer maxit,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "GeneralisedMinimalResidual: src " << ssq << std::endl;
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "GeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// this should probably be made a class member so that it is only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(v, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
MatrixTimer.Start();
LinOp.Op(v[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + v[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@@ -57,7 +57,8 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
parallel_region parallel_region
{ {
std::vector < vobj > B(Nm); // Thread private
std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
parallel_for_internal(int ss=0;ss < grid->oSites();ss++){ parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
for(int j=j0; j<j1; ++j) B[j]=0.; for(int j=j0; j<j1; ++j) B[j]=0.;

View File

@@ -286,8 +286,10 @@ public:
void Orthogonalise(void ) { void Orthogonalise(void ) {
CoarseScalar InnerProd(_CoarseGrid); CoarseScalar InnerProd(_CoarseGrid);
blockOrthogonalise(InnerProd,subspace);std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl; std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl; blockOrthogonalise(InnerProd,subspace);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
}; };
template<typename T> static RealD normalise(T& v) template<typename T> static RealD normalise(T& v)
@@ -333,7 +335,7 @@ public:
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth); Chebyshev<FineField> ChebySmooth(cheby_smooth);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_subspace); ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax); ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
for(int k=0;k<evec_coarse.size();k++){ for(int k=0;k<evec_coarse.size();k++){
@@ -374,14 +376,14 @@ public:
RealD MaxIt, RealD betastp, int MinRes) RealD MaxIt, RealD betastp, int MinRes)
{ {
Chebyshev<FineField> Cheby(cheby_op); Chebyshev<FineField> Cheby(cheby_op);
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_subspace); ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_subspace); ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth); Chebyshev<FineField> ChebySmooth(cheby_smooth);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_subspace,relax); ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
evals_coarse.resize(Nm); evals_coarse.resize(Nm);
evec_coarse.resize(Nm,_CoarseGrid); evec_coarse.resize(Nm,_CoarseGrid);

View File

@@ -0,0 +1,156 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/MinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MINIMAL_RESIDUAL_H
#define GRID_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field> class MinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // throw an assert when the MR fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
RealD overRelaxParam;
Integer IterationsToComplete; // Number of iterations the MR took to finish.
// Filled in upon completion
MinimalResidual(RealD tol, Integer maxit, Real ovrelparam = 1.0, bool err_on_no_conv = true)
: Tolerance(tol), MaxIterations(maxit), overRelaxParam(ovrelparam), ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
Complex a, c;
Real d;
Field Mr(src);
Field r(src);
// Initial residual computation & set up
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Linop.Op(psi, Mr);
r = src - Mr;
RealD cp = norm2(r);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "MinimalResidual: src " << ssq << std::endl;
std::cout << GridLogIterative << "MinimalResidual: mp " << d << std::endl;
std::cout << GridLogIterative << "MinimalResidual: cp,r " << cp << std::endl;
if (cp <= rsq) {
return;
}
std::cout << GridLogIterative << "MinimalResidual: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
MatrixTimer.Start();
Linop.Op(r, Mr);
MatrixTimer.Stop();
LinalgTimer.Start();
c = innerProduct(Mr, r);
d = norm2(Mr);
a = c / d;
a = a * overRelaxParam;
psi = psi + r * a;
r = r - Mr * a;
cp = norm2(r);
LinalgTimer.Stop();
std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
std::cout << GridLogDebug << "a = " << a << " c = " << c << " d = " << d << std::endl;
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
Linop.Op(psi, Mr);
r = src - Mr;
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "MinimalResidual Converged on iteration " << k
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "MR Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MR Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MR Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
if (ErrorOnNoConverge)
assert(true_residual / Tolerance < 10000.0);
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "MinimalResidual did NOT converge"
<< std::endl;
if (ErrorOnNoConverge)
assert(0);
IterationsToComplete = k;
}
};
} // namespace Grid
#endif

View File

@@ -0,0 +1,273 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class FieldD, class FieldF, typename std::enable_if<getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction<FieldD> {
public:
bool ErrorOnNoConverge; // Throw an assert when MPFGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the MPFGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch PrecTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
GridStopWatch ChangePrecTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
GridBase* SinglePrecGrid;
LinearFunction<FieldF> &Preconditioner;
MixedPrecisionFlexibleGeneralisedMinimalResidual(RealD tol,
Integer maxit,
GridBase * sp_grid,
LinearFunction<FieldF> &Prec,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.)
, SinglePrecGrid(sp_grid)
, Preconditioner(Prec) {};
void operator()(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
FieldD r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "MPFGMRES: guess " << guess << std::endl;
std::cout << GridLogIterative << "MPFGMRES: src " << ssq << std::endl;
PrecTimer.Reset();
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
ChangePrecTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "MPFGMRES: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: PrecChange " << ChangePrecTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "MPFGMRES did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi, RealD rsq) {
RealD cp = 0;
FieldD w(src._grid);
FieldD r(src._grid);
// these should probably be made class members so that they are only allocated once, not in every restart
std::vector<FieldD> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
std::vector<FieldD> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, z, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "MPFGMRES: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(z, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<FieldD> &LinOp, std::vector<FieldD> &v, std::vector<FieldD> &z, FieldD &w, int iter) {
FieldF v_f(SinglePrecGrid);
FieldF z_f(SinglePrecGrid);
ChangePrecTimer.Start();
precisionChange(v_f, v[iter]);
precisionChange(z_f, z[iter]);
ChangePrecTimer.Stop();
PrecTimer.Start();
Preconditioner(v_f, z_f);
PrecTimer.Stop();
ChangePrecTimer.Start();
precisionChange(z[iter], z_f);
ChangePrecTimer.Stop();
MatrixTimer.Start();
LinOp.Op(z[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<FieldD> const &z, FieldD &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + z[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@@ -139,7 +139,10 @@ namespace Grid {
MatTimer.Start(); MatTimer.Start();
Linop.HermOpAndNorm(psi,Az,zAz,zAAz); Linop.HermOpAndNorm(psi,Az,zAz,zAAz);
MatTimer.Stop(); MatTimer.Stop();
LinalgTimer.Start();
r=src-Az; r=src-Az;
LinalgTimer.Stop();
///////////////////// /////////////////////
// p = Prec(r) // p = Prec(r)
@@ -152,8 +155,10 @@ namespace Grid {
Linop.HermOp(z,tmp); Linop.HermOp(z,tmp);
MatTimer.Stop(); MatTimer.Stop();
LinalgTimer.Start();
ttmp=tmp; ttmp=tmp;
tmp=tmp-r; tmp=tmp-r;
LinalgTimer.Stop();
/* /*
std::cout<<GridLogMessage<<r<<std::endl; std::cout<<GridLogMessage<<r<<std::endl;
@@ -166,12 +171,14 @@ namespace Grid {
Linop.HermOpAndNorm(z,Az,zAz,zAAz); Linop.HermOpAndNorm(z,Az,zAz,zAAz);
MatTimer.Stop(); MatTimer.Stop();
LinalgTimer.Start();
//p[0],q[0],qq[0] //p[0],q[0],qq[0]
p[0]= z; p[0]= z;
q[0]= Az; q[0]= Az;
qq[0]= zAAz; qq[0]= zAAz;
cp =norm2(r); cp =norm2(r);
LinalgTimer.Stop();
for(int k=0;k<nstep;k++){ for(int k=0;k<nstep;k++){
@@ -181,12 +188,14 @@ namespace Grid {
int peri_k = k %mmax; int peri_k = k %mmax;
int peri_kp= kp%mmax; int peri_kp= kp%mmax;
LinalgTimer.Start();
rq= real(innerProduct(r,q[peri_k])); // what if rAr not real? rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
a = rq/qq[peri_k]; a = rq/qq[peri_k];
axpy(psi,a,p[peri_k],psi); axpy(psi,a,p[peri_k],psi);
cp = axpy_norm(r,-a,q[peri_k],r); cp = axpy_norm(r,-a,q[peri_k],r);
LinalgTimer.Stop();
if((k==nstep-1)||(cp<rsq)){ if((k==nstep-1)||(cp<rsq)){
return cp; return cp;
@@ -202,6 +211,8 @@ namespace Grid {
Linop.HermOpAndNorm(z,Az,zAz,zAAz); Linop.HermOpAndNorm(z,Az,zAz,zAAz);
Linop.HermOp(z,tmp); Linop.HermOp(z,tmp);
MatTimer.Stop(); MatTimer.Stop();
LinalgTimer.Start();
tmp=tmp-r; tmp=tmp-r;
std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl; std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl;
@@ -219,9 +230,9 @@ namespace Grid {
} }
qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
LinalgTimer.Stop();
} }
assert(0); // never reached assert(0); // never reached
return cp; return cp;
} }

View File

@@ -0,0 +1,473 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_SCHUR_RED_BLACK_H
#define GRID_SCHUR_RED_BLACK_H
/*
* Red black Schur decomposition
*
* M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo)
* (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 )
* = L D U
*
* L^-1 = (1 0 )
* (-MoeMee^{-1} 1 )
* L^{dag} = ( 1 Mee^{-dag} Moe^{dag} )
* ( 0 1 )
* L^{-d} = ( 1 -Mee^{-dag} Moe^{dag} )
* ( 0 1 )
*
* U^-1 = (1 -Mee^{-1} Meo)
* (0 1 )
* U^{dag} = ( 1 0)
* (Meo^dag Mee^{-dag} 1)
* U^{-dag} = ( 1 0)
* (-Meo^dag Mee^{-dag} 1)
***********************
* M psi = eta
***********************
*Odd
* i) D_oo psi_o = L^{-1} eta_o
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
*
* Wilson:
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
* Stag:
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
*
* L^-1 eta_o= (1 0 ) (e
* (-MoeMee^{-1} 1 )
*
*Even
* ii) Mee psi_e + Meo psi_o = src_e
*
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
*
*
* TODO: Other options:
*
* a) change checkerboards for Schur e<->o
*
* Left precon by Moo^-1
* b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o
* eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
*
* Right precon by Moo^-1
* c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o
* eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
* psi_o = M_oo^-1 phi_o
* TODO: Deflation
*/
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Use base class to share code
///////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackBase {
protected:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
public:
SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise = 0;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
/////////////////////////////////////////////////////////////
// Shared code
/////////////////////////////////////////////////////////////
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out)
{
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Guesser>
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
int nblock = in.size();
std::vector<Field> src_o(nblock,grid);
std::vector<Field> sol_o(nblock,grid);
std::vector<Field> guess_save;
Field resid(fgrid);
Field tmp(grid);
////////////////////////////////////////////////
// Prepare RedBlack source
////////////////////////////////////////////////
for(int b=0;b<nblock;b++){
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
}
////////////////////////////////////////////////
// Make the guesses
////////////////////////////////////////////////
if ( subGuess ) guess_save.resize(nblock,grid);
for(int b=0;b<nblock;b++){
guess(src_o[b],sol_o[b]);
if ( subGuess ) {
guess_save[b] = sol_o[b];
}
}
//////////////////////////////////////////////////////////////
// Call the block solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
RedBlackSolve(_Matrix,src_o,sol_o);
////////////////////////////////////////////////
// A2A boolean behavioural control & reconstruct other checkerboard
////////////////////////////////////////////////
for(int b=0;b<nblock;b++) {
if (subGuess) sol_o[b] = sol_o[b] - guess_save[b];
///////// Needs even source //////////////
pickCheckerboard(Even,tmp,in[b]);
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
/////////////////////////////////////////////////
// Check unprec residual if possible
/////////////////////////////////////////////////
if ( ! subGuess ) {
_Matrix.M(out[b],resid);
resid = resid-in[b];
RealD ns = norm2(in[b]);
RealD nr = norm2(resid);
std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
} else {
std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
}
}
}
template<class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field resid(fgrid);
Field src_o(grid);
Field src_e(grid);
Field sol_o(grid);
////////////////////////////////////////////////
// RedBlack source
////////////////////////////////////////////////
RedBlackSource(_Matrix,in,src_e,src_o);
////////////////////////////////
// Construct the guess
////////////////////////////////
Field tmp(grid);
guess(src_o,sol_o);
Field guess_save(grid);
guess_save = sol_o;
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
RedBlackSolve(_Matrix,src_o,sol_o);
////////////////////////////////////////////////
// Fionn A2A boolean behavioural control
////////////////////////////////////////////////
if (subGuess) sol_o= sol_o-guess_save;
///////////////////////////////////////////////////
// RedBlack solution needs the even source
///////////////////////////////////////////////////
RedBlackSolution(_Matrix,sol_o,src_e,out);
// Verify the unprec residual
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
} else {
std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
}
}
/////////////////////////////////////////////////////////////
// Override in derived. Not virtual as template methods
/////////////////////////////////////////////////////////////
virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0;
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0;
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) =0;
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)=0;
};
template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess)
{
}
//////////////////////////////////////////////////////
// Override RedBlack specialisation
//////////////////////////////////////////////////////
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field Mtmp(grid);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,src);
/////////////////////////////////////////////////////
// src_o = (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
}
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field sol_e(grid);
Field src_e(grid);
src_e = src_e_c; // Const correctness
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
}
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
}
};
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Site diagonal has Mooee on it.
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess) {};
//////////////////////////////////////////////////////
// Override RedBlack specialisation
//////////////////////////////////////////////////////
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field Mtmp(grid);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,src);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
}
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field sol_e(grid);
Field src_e_i(grid);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e_i = src_e-tmp; assert( src_e_i.checkerboard ==Even);
_Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
}
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Site diagonal is identity, right preconditioned by Mee^inv
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
//=> psi = MeeInv phi
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
: SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess) {};
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
Field tmp(grid);
Field Mtmp(grid);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,src);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
}
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field sol_o_i(grid);
Field tmp(grid);
Field sol_e(grid);
////////////////////////////////////////////////
// MooeeInv due to pecond
////////////////////////////////////////////////
_Matrix.MooeeInv(sol_o,tmp);
sol_o_i = tmp;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o_i,tmp); assert( tmp.checkerboard ==Even);
tmp = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(tmp,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_o_i); assert( sol_o_i.checkerboard ==Odd );
};
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
}
};
}
#endif

View File

@@ -50,8 +50,6 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
assert(0); assert(0);
} }
Grid_quiesce_nodes();
// Never clean up as done once. // Never clean up as done once.
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
@@ -124,10 +122,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
// split the communicator // split the communicator
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
// int Nparent = parent._processors ; // int Nparent = parent._processors ;
// std::cout << " splitting from communicator "<<parent.communicator <<std::endl;
int Nparent; int Nparent;
MPI_Comm_size(parent.communicator,&Nparent); MPI_Comm_size(parent.communicator,&Nparent);
// std::cout << " Parent size "<<Nparent <<std::endl;
int childsize=1; int childsize=1;
for(int d=0;d<processors.size();d++) { for(int d=0;d<processors.size();d++) {
@@ -136,8 +132,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
int Nchild = Nparent/childsize; int Nchild = Nparent/childsize;
assert (childsize * Nchild == Nparent); assert (childsize * Nchild == Nparent);
// std::cout << " child size "<<childsize <<std::endl;
std::vector<int> ccoor(_ndimension); // coor within subcommunicator std::vector<int> ccoor(_ndimension); // coor within subcommunicator
std::vector<int> scoor(_ndimension); // coor of split within parent std::vector<int> scoor(_ndimension); // coor of split within parent
std::vector<int> ssize(_ndimension); // coor of split within parent std::vector<int> ssize(_ndimension); // coor of split within parent

View File

@@ -114,19 +114,150 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
assert(WorldNode!=-1); assert(WorldNode!=-1);
_ShmSetup=1; _ShmSetup=1;
} }
// Gray encode support
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm) int BinaryToGray (int binary) {
int gray = (binary>>1)^binary;
return gray;
}
int Log2Size(int TwoToPower,int MAXLOG2)
{ {
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = -1; int log2size = -1;
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){ for(int i=0;i<=MAXLOG2;i++){
if ( (0x1<<i) == WorldShmSize ) { if ( (0x1<<i) == TwoToPower ) {
log2size = i; log2size = i;
break; break;
} }
} }
return log2size;
}
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
{
#ifdef HYPERCUBE
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
assert(log2size != -1);
////////////////////////////////////////////////////////////////
// Identify the hypercube coordinate of this node using hostname
////////////////////////////////////////////////////////////////
// n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits
// i runs 0..7 3 bits
// r runs 0..3 2 bits
// 2^10 = 1024 nodes
const int maxhdim = 10;
std::vector<int> HyperCubeCoords(maxhdim,0);
std::vector<int> RootHyperCubeCoords(maxhdim,0);
int R;
int I;
int N;
const int namelen = _POSIX_HOST_NAME_MAX;
char name[namelen];
// Parse ICE-XA hostname to get hypercube location
gethostname(name,namelen);
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
assert(nscan==3);
int nlo = N%9;
int nhi = N/9;
uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
uint32_t rootcoor = hypercoor;
//////////////////////////////////////////////////////////////////
// Print debug info
//////////////////////////////////////////////////////////////////
for(int d=0;d<maxhdim;d++){
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
}
std::string hname(name);
std::cout << "hostname "<<hname<<std::endl;
std::cout << "R " << R << " I " << I << " N "<< N
<< " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
//////////////////////////////////////////////////////////////////
// broadcast node 0's base coordinate for this partition.
//////////////////////////////////////////////////////////////////
MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm);
hypercoor=hypercoor-rootcoor;
assert(hypercoor<WorldSize);
assert(hypercoor>=0);
//////////////////////////////////////
// Printing
//////////////////////////////////////
for(int d=0;d<maxhdim;d++){
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
}
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
// in a maximally symmetrical way
////////////////////////////////////////////////////////////////
int ndimension = processors.size();
std::vector<int> processor_coor(ndimension);
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
std::vector<int> HyperCoor(ndimension);
int dim = 0;
for(int l2=0;l2<log2size;l2++){
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
ShmDims[dim]*=2;
dim=(dim+1)%ndimension;
}
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////
for(int d=0;d<ndimension;d++){
NodeDims[d] = WorldDims[d]/ShmDims[d];
}
////////////////////////////////////////////////////////////////
// Map Hcube according to physical lattice
// must partition. Loop over dims and find out who would join.
////////////////////////////////////////////////////////////////
int hcoor = hypercoor;
for(int d=0;d<ndimension;d++){
int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
int msk = (0x1<<bits)-1;
HyperCoor[d]=hcoor & msk;
HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
hcoor = hcoor >> bits;
}
////////////////////////////////////////////////////////////////
// Check processor counts match
////////////////////////////////////////////////////////////////
int Nprocessors=1;
for(int i=0;i<ndimension;i++){
Nprocessors*=processors[i];
}
assert(WorldSize==Nprocessors);
////////////////////////////////////////////////////////////////
// Establish mapping between lexico physics coord and WorldRank
////////////////////////////////////////////////////////////////
int rank;
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
/////////////////////////////////////////////////////////////////
// Build the new communicator
/////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0);
#else
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
assert(log2size != -1); assert(log2size != -1);
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
@@ -175,7 +306,69 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm); int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0); assert(ierr==0);
#endif
} }
////////////////////////////////////////////////////////////////////////////////////////////
// SHMGET
////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_MPI3_SHMGET
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
std::vector<int> shmids(WorldShmSize);
if ( WorldShmRank == 0 ) {
for(int r=0;r<WorldShmSize;r++){
size_t size = bytes;
key_t key = IPC_PRIVATE;
int flags = IPC_CREAT | SHM_R | SHM_W;
#ifdef SHM_HUGETLB
if (Hugepages) flags|=SHM_HUGETLB;
#endif
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
int errsv = errno;
printf("Errno %d\n",errsv);
printf("key %d\n",key);
printf("size %lld\n",size);
printf("flags %d\n",flags);
perror("shmget");
exit(1);
}
}
}
MPI_Barrier(WorldShmComm);
MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
MPI_Barrier(WorldShmComm);
for(int r=0;r<WorldShmSize;r++){
WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
if (WorldShmCommBufs[r] == (uint64_t *)-1) {
perror("Shared memory attach failure");
shmctl(shmids[r], IPC_RMID, NULL);
exit(2);
}
}
MPI_Barrier(WorldShmComm);
///////////////////////////////////
// Mark for clean up
///////////////////////////////////
for(int r=0;r<WorldShmSize;r++){
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
}
MPI_Barrier(WorldShmComm);
_ShmAlloc=1;
_ShmAllocBytes = bytes;
}
#endif
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs mapping intended // Hugetlbfs mapping intended
@@ -183,7 +376,7 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
#ifdef GRID_MPI3_SHMMMAP #ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{ {
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<<std::endl; std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
assert(_ShmSetup==1); assert(_ShmSetup==1);
assert(_ShmAlloc==0); assert(_ShmAlloc==0);
////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -193,7 +386,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
WorldShmCommBufs.resize(WorldShmSize); WorldShmCommBufs.resize(WorldShmSize);
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbf and others map filesystems as mappable huge pages // Hugetlbfs and others map filesystems as mappable huge pages
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
char shm_name [NAME_MAX]; char shm_name [NAME_MAX];
for(int r=0;r<WorldShmSize;r++){ for(int r=0;r<WorldShmSize;r++){
@@ -220,7 +413,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0); assert(((uint64_t)ptr&0x3F)==0);
close(fd); close(fd);
WorldShmCommBufs[r] =ptr; WorldShmCommBufs[r] =ptr;
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl; // std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
} }
_ShmAlloc=1; _ShmAlloc=1;
_ShmAllocBytes = bytes; _ShmAllocBytes = bytes;
@@ -262,7 +455,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0); assert(((uint64_t)ptr&0x3F)==0);
close(fd); close(fd);
WorldShmCommBufs[r] =ptr; WorldShmCommBufs[r] =ptr;
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl; // std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
} }
_ShmAlloc=1; _ShmAlloc=1;
_ShmAllocBytes = bytes; _ShmAllocBytes = bytes;
@@ -306,7 +499,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#endif #endif
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl; // std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
if ( ptr == (void * )MAP_FAILED ) { if ( ptr == (void * )MAP_FAILED ) {
perror("failed mmap"); perror("failed mmap");
assert(0); assert(0);
@@ -344,6 +537,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
} }
#endif #endif
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Global shared functionality finished // Global shared functionality finished
// Now move to per communicator functionality // Now move to per communicator functionality

File diff suppressed because it is too large Load Diff

View File

@@ -244,19 +244,11 @@ namespace Grid {
template<class sobj,class vobj> strong_inline template<class sobj,class vobj> strong_inline
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){ RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
ret.checkerboard = x.checkerboard; return axpy_norm_fast(ret,a,x,y);
conformable(ret,x);
conformable(x,y);
axpy(ret,a,x,y);
return norm2(ret);
} }
template<class sobj,class vobj> strong_inline template<class sobj,class vobj> strong_inline
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){ RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
ret.checkerboard = x.checkerboard; return axpby_norm_fast(ret,a,b,x,y);
conformable(ret,x);
conformable(x,y);
axpby(ret,a,b,x,y);
return norm2(ret); // FIXME implement parallel norm in ss loop
} }
} }

View File

@@ -33,7 +33,7 @@ namespace Grid {
// Deterministic Reduction operations // Deterministic Reduction operations
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
ComplexD nrm = innerProduct(arg,arg); auto nrm = innerProduct(arg,arg);
return std::real(nrm); return std::real(nrm);
} }
@@ -43,32 +43,85 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
{ {
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type; typedef typename vobj::vector_typeD vector_type;
scalar_type nrm;
GridBase *grid = left._grid; GridBase *grid = left._grid;
const int pad = 8;
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize()); ComplexD inner;
Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){ parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff; int nwork, mywork, myoff;
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
for(int ss=myoff;ss<mywork+myoff; ss++){ for(int ss=myoff;ss<mywork+myoff; ss++){
vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]); vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
} }
sumarray[thr]=TensorRemove(vnrm) ; // All threads sum across SIMD; reduce serial work at end
// one write per cacheline with streaming store
ComplexD tmp = Reduce(TensorRemove(vinner)) ;
vstream(sumarray[thr*pad],tmp);
} }
vector_type vvnrm; vvnrm=zero; // sum across threads inner=0.0;
for(int i=0;i<grid->SumArraySize();i++){ for(int i=0;i<grid->SumArraySize();i++){
vvnrm = vvnrm+sumarray[i]; inner = inner+sumarray[i*pad];
} }
nrm = Reduce(vvnrm);// sum across simd right._grid->GlobalSum(inner);
right._grid->GlobalSum(nrm); return inner;
}
/////////////////////////
// Fast axpby_norm
// z = a x + b y
// return norm z
/////////////////////////
template<class sobj,class vobj> strong_inline RealD
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
{
sobj one(1.0);
return axpby_norm_fast(z,a,one,x,y);
}
template<class sobj,class vobj> strong_inline RealD
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
{
const int pad = 8;
z.checkerboard = x.checkerboard;
conformable(z,x);
conformable(x,y);
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type;
RealD nrm;
GridBase *grid = x._grid;
Vector<RealD> sumarray(grid->SumArraySize()*pad);
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(x._grid->oSites(),thr,mywork,myoff);
// private to thread; sub summation
decltype(innerProductD(z._odata[0],z._odata[0])) vnrm=zero;
for(int ss=myoff;ss<mywork+myoff; ss++){
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vnrm = vnrm + innerProductD(tmp,tmp);
vstream(z._odata[ss],tmp);
}
vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
}
nrm = 0.0; // sum across threads; linear in thread count but fast
for(int i=0;i<grid->SumArraySize();i++){
nrm = nrm+sumarray[i*pad];
}
z._grid->GlobalSum(nrm);
return nrm; return nrm;
} }
template<class Op,class T1> template<class Op,class T1>
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr) inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
@@ -221,6 +274,115 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
} }
} }
template<class vobj>
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
{
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
typedef typename vobj::scalar_type scalar_type;
std::vector<scalar_type> lsSum;
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
}
template <class vobj>
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
{
// std::cout << GridLogMessage << "Start prep" << std::endl;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
GridBase *grid = lhs._grid;
assert(grid!=NULL);
conformable(grid,rhs._grid);
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
// std::cout << GridLogMessage << "Start alloc" << std::endl;
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
std::vector<iScalar<scalar_type>> extracted(Nsimd); // splitting the SIMD
// std::cout << GridLogMessage << "End alloc" << std::endl;
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
for(int r=0;r<rd;r++){
lvSum[r]=zero;
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
// std::cout << GridLogMessage << "End prep" << std::endl;
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
vector_type vv;
parallel_for(int r=0;r<rd;r++)
{
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss = so + n * stride + b;
vv = TensorRemove(innerProduct(lhs._odata[ss], rhs._odata[ss]));
lvSum[r] = lvSum[r] + vv;
}
}
}
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
for(int rt=0;rt<rd;rt++){
iScalar<vector_type> temp;
temp._internal = lvSum[rt];
extract(temp,extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
}
}
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
}
template <class vobj>
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
{
typedef typename vobj::scalar_type scalar_type;
GridBase *grid = lhs._grid;
int fd = result.size();
int ld = lsSum.size();
// sum over nodes.
std::vector<scalar_type> gsum;
gsum.resize(fd, scalar_type(0.0));
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
gsum[t]=lsSum[lt];
}
}
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
grid->GlobalSumVector(&gsum[0], fd);
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
result = gsum;
}
template<class vobj> template<class vobj>
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim) static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
{ {

View File

@@ -158,10 +158,19 @@ namespace Grid {
// tens of seconds per trajectory so this is clean in all reasonable cases, // tens of seconds per trajectory so this is clean in all reasonable cases,
// and margin of safety is orders of magnitude. // and margin of safety is orders of magnitude.
// We could hack Sitmo to skip in the higher order words of state if necessary // We could hack Sitmo to skip in the higher order words of state if necessary
//
// Replace with 2^30 ; avoid problem on large volumes
//
///////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init // uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
const int shift = 30;
uint64_t skip = site; uint64_t skip = site;
skip = skip<<40;
skip = skip<<shift;
assert((skip >> shift)==site); // check for overflow
eng.discard(skip); eng.discard(skip);
// std::cout << " Engine " <<site << " state " <<eng<<std::endl; // std::cout << " Engine " <<site << " state " <<eng<<std::endl;
} }
@@ -242,7 +251,7 @@ namespace Grid {
dist[0].reset(); dist[0].reset();
for(int idx=0;idx<words;idx++){ for(int idx=0;idx<words;idx++){
fillScalar(buf[idx],dist[0],_generators[0]); fillScalar(buf[idx],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
@@ -274,7 +283,7 @@ namespace Grid {
RealF *pointer=(RealF *)&l; RealF *pointer=(RealF *)&l;
dist[0].reset(); dist[0].reset();
for(int i=0;i<2*vComplexF::Nsimd();i++){ for(int i=0;i<2*vComplexF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]); fillScalar(pointer[i],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
} }
@@ -282,7 +291,7 @@ namespace Grid {
RealD *pointer=(RealD *)&l; RealD *pointer=(RealD *)&l;
dist[0].reset(); dist[0].reset();
for(int i=0;i<2*vComplexD::Nsimd();i++){ for(int i=0;i<2*vComplexD::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]); fillScalar(pointer[i],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
} }
@@ -290,7 +299,7 @@ namespace Grid {
RealF *pointer=(RealF *)&l; RealF *pointer=(RealF *)&l;
dist[0].reset(); dist[0].reset();
for(int i=0;i<vRealF::Nsimd();i++){ for(int i=0;i<vRealF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]); fillScalar(pointer[i],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
} }
@@ -308,6 +317,19 @@ namespace Grid {
std::seed_seq src(seeds.begin(),seeds.end()); std::seed_seq src(seeds.begin(),seeds.end());
Seed(src,0); Seed(src,0);
} }
void SeedUniqueString(const std::string &s){
std::vector<int> seeds;
std::stringstream sha;
seeds = GridChecksum::sha256_seeds(s);
for(int i=0;i<seeds.size();i++) {
sha << std::hex << seeds[i];
}
std::cout << GridLogMessage << "Intialising serial RNG with unique string '"
<< s << "'" << std::endl;
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
SeedFixedIntegers(seeds);
}
}; };
class GridParallelRNG : public GridRNGbase { class GridParallelRNG : public GridRNGbase {
@@ -368,6 +390,14 @@ namespace Grid {
_time_counter += usecond()- inner_time_counter; _time_counter += usecond()- inner_time_counter;
}; };
void SeedUniqueString(const std::string &s){
std::vector<int> seeds;
seeds = GridChecksum::sha256_seeds(s);
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
<< s << "'" << std::endl;
std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
SeedFixedIntegers(seeds);
}
void SeedFixedIntegers(const std::vector<int> &seeds){ void SeedFixedIntegers(const std::vector<int> &seeds){
// Everyone generates the same seed_seq based on input seeds // Everyone generates the same seed_seq based on input seeds

View File

@@ -464,8 +464,10 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
assert(orthog>=0); assert(orthog>=0);
for(int d=0;d<nh;d++){ for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]); if ( d!=orthog ) {
assert(lg->_ldimensions[d] == hg->_ldimensions[d]); assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
} }
// the above should guarantee that the operations are local // the above should guarantee that the operations are local
@@ -485,7 +487,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
template<class vobj> template<class vobj>
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{ {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
@@ -499,8 +501,10 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slic
assert(orthog>=0); assert(orthog>=0);
for(int d=0;d<nh;d++){ for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]); if ( d!=orthog ) {
assert(lg->_ldimensions[d] == hg->_ldimensions[d]); assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
} }
// the above should guarantee that the operations are local // the above should guarantee that the operations are local

View File

@@ -59,6 +59,7 @@ void GridLogTimestamp(int on){
} }
Colours GridLogColours(0); Colours GridLogColours(0);
GridLogger GridLogMG (1, "MG" , GridLogColours, "NORMAL");
GridLogger GridLogIRL (1, "IRL" , GridLogColours, "NORMAL"); GridLogger GridLogIRL (1, "IRL" , GridLogColours, "NORMAL");
GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL"); GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL");
GridLogger GridLogError (1, "Error" , GridLogColours, "RED"); GridLogger GridLogError (1, "Error" , GridLogColours, "RED");

View File

@@ -86,7 +86,7 @@ protected:
Colours &Painter; Colours &Painter;
int active; int active;
int timing_mode; int timing_mode;
int topWidth{-1}; int topWidth{-1}, chanWidth{-1};
static int timestamp; static int timestamp;
std::string name, topName; std::string name, topName;
std::string COLOUR; std::string COLOUR;
@@ -126,6 +126,7 @@ public:
} }
} }
void setTopWidth(const int w) {topWidth = w;} void setTopWidth(const int w) {topWidth = w;}
void setChanWidth(const int w) {chanWidth = w;}
friend std::ostream& operator<< (std::ostream& stream, Logger& log){ friend std::ostream& operator<< (std::ostream& stream, Logger& log){
@@ -136,13 +137,20 @@ public:
stream << std::setw(log.topWidth); stream << std::setw(log.topWidth);
} }
stream << log.topName << log.background()<< " : "; stream << log.topName << log.background()<< " : ";
stream << log.colour() << std::left << log.name << log.background() << " : "; stream << log.colour() << std::left;
if (log.chanWidth > 0)
{
stream << std::setw(log.chanWidth);
}
stream << log.name << log.background() << " : ";
if ( log.timestamp ) { if ( log.timestamp ) {
log.StopWatch->Stop(); log.StopWatch->Stop();
GridTime now = log.StopWatch->Elapsed(); GridTime now = log.StopWatch->Elapsed();
if ( log.timing_mode==1 ) log.StopWatch->Reset(); if ( log.timing_mode==1 ) log.StopWatch->Reset();
log.StopWatch->Start(); log.StopWatch->Start();
stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ; stream << log.evidence()
<< now << log.background() << " : " ;
} }
stream << log.colour(); stream << log.colour();
return stream; return stream;
@@ -161,6 +169,7 @@ public:
void GridLogConfigure(std::vector<std::string> &logstreams); void GridLogConfigure(std::vector<std::string> &logstreams);
extern GridLogger GridLogMG;
extern GridLogger GridLogIRL; extern GridLogger GridLogIRL;
extern GridLogger GridLogSolver; extern GridLogger GridLogSolver;
extern GridLogger GridLogError; extern GridLogger GridLogError;

View File

@@ -0,0 +1,3 @@
#include <Grid/GridCore.h>
int Grid::BinaryIO::latticeWriteMaxRetry = -1;

View File

@@ -81,6 +81,7 @@ inline void removeWhitespace(std::string &key)
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
class BinaryIO { class BinaryIO {
public: public:
static int latticeWriteMaxRetry;
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
// more byte manipulation helpers // more byte manipulation helpers
@@ -263,7 +264,7 @@ PARALLEL_CRITICAL
GridBase *grid, GridBase *grid,
std::vector<fobj> &iodata, std::vector<fobj> &iodata,
std::string file, std::string file,
uint64_t offset, uint64_t& offset,
const std::string &format, int control, const std::string &format, int control,
uint32_t &nersc_csum, uint32_t &nersc_csum,
uint32_t &scidac_csuma, uint32_t &scidac_csuma,
@@ -370,7 +371,7 @@ PARALLEL_CRITICAL
#endif #endif
} else { } else {
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : " std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl; << iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
std::ifstream fin; std::ifstream fin;
fin.open(file, std::ios::binary | std::ios::in); fin.open(file, std::ios::binary | std::ios::in);
if (control & BINARYIO_MASTER_APPEND) if (control & BINARYIO_MASTER_APPEND)
@@ -431,14 +432,20 @@ PARALLEL_CRITICAL
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0); MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
} }
std::cout << GridLogDebug << "MPI read I/O set view " << file << std::endl; std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
assert(ierr == 0); assert(ierr == 0);
std::cout << GridLogDebug << "MPI read I/O write all " << file << std::endl; std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status); ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
assert(ierr == 0); assert(ierr == 0);
MPI_Offset os;
MPI_File_get_position(fh, &os);
MPI_File_get_byte_offset(fh, os, &disp);
offset = disp;
MPI_File_close(&fh); MPI_File_close(&fh);
MPI_Type_free(&fileArray); MPI_Type_free(&fileArray);
MPI_Type_free(&localArray); MPI_Type_free(&localArray);
@@ -448,7 +455,7 @@ PARALLEL_CRITICAL
} else { } else {
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : " std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl; << iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
std::ofstream fout; std::ofstream fout;
fout.exceptions ( std::fstream::failbit | std::fstream::badbit ); fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
@@ -495,6 +502,7 @@ PARALLEL_CRITICAL
exit(1); exit(1);
#endif #endif
} }
offset = fout.tellp();
fout.close(); fout.close();
} }
timer.Stop(); timer.Stop();
@@ -575,7 +583,9 @@ PARALLEL_CRITICAL
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
typedef typename vobj::Realified::scalar_type word; word w=0; typedef typename vobj::Realified::scalar_type word; word w=0;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
uint64_t lsites = grid->lSites(); uint64_t lsites = grid->lSites(), offsetCopy = offset;
int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry);
bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0);
std::vector<sobj> scalardata(lsites); std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
@@ -590,9 +600,35 @@ PARALLEL_CRITICAL
grid->Barrier(); grid->Barrier();
timer.Stop(); timer.Stop();
while (attemptsLeft >= 0)
{
grid->Barrier();
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb);
if (checkWrite)
{
std::vector<fobj> ckiodata(lsites);
uint32_t cknersc_csum, ckscidac_csuma, ckscidac_csumb;
uint64_t ckoffset = offsetCopy;
std::cout << GridLogMessage << "writeLatticeObject: read back object" << std::endl;
grid->Barrier();
IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
cknersc_csum,ckscidac_csuma,ckscidac_csumb);
if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb))
{
std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl;
offset = offsetCopy;
}
else
{
std::cout << GridLogMessage << "writeLatticeObject: read test checksum correct" << std::endl;
break;
}
}
attemptsLeft--;
}
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl; std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl;
} }
@@ -699,7 +735,6 @@ PARALLEL_CRITICAL
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
iodata.resize(1); iodata.resize(1);
{ {
std::vector<RngStateType> tmp(RngStateCount); std::vector<RngStateType> tmp(RngStateCount);
@@ -719,5 +754,6 @@ PARALLEL_CRITICAL
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl; std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
} }
}; };
} }
#endif #endif

View File

@@ -182,6 +182,11 @@ class GridLimeReader : public BinaryIO {
{ {
filename= _filename; filename= _filename;
File = fopen(filename.c_str(), "r"); File = fopen(filename.c_str(), "r");
if (File == nullptr)
{
std::cerr << "cannot open file '" << filename << "'" << std::endl;
abort();
}
LimeR = limeCreateReader(File); LimeR = limeCreateReader(File);
} }
///////////////////////////////////////////// /////////////////////////////////////////////
@@ -228,7 +233,8 @@ class GridLimeReader : public BinaryIO {
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl; // std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
BinarySimpleMunger<sobj,sobj> munge; BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
///////////////////////////////////////////// /////////////////////////////////////////////
// Insist checksum is next record // Insist checksum is next record
///////////////////////////////////////////// /////////////////////////////////////////////
@@ -245,8 +251,7 @@ class GridLimeReader : public BinaryIO {
//////////////////////////////////////////// ////////////////////////////////////////////
// Read a generic serialisable object // Read a generic serialisable object
//////////////////////////////////////////// ////////////////////////////////////////////
template<class serialisable_object> void readLimeObject(std::string &xmlstring,std::string record_name)
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
{ {
// should this be a do while; can we miss a first record?? // should this be a do while; can we miss a first record??
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
@@ -261,15 +266,23 @@ class GridLimeReader : public BinaryIO {
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl; // std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
std::string xmlstring(&xmlc[0]); xmlstring = std::string(&xmlc[0]);
XmlReader RD(xmlstring, true, "");
read(RD,object_name,object);
return; return;
} }
} }
assert(0); assert(0);
} }
template<class serialisable_object>
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
{
std::string xmlstring;
readLimeObject(xmlstring, record_name);
XmlReader RD(xmlstring, true, "");
read(RD,object_name,object);
}
}; };
class GridLimeWriter : public BinaryIO class GridLimeWriter : public BinaryIO
@@ -320,16 +333,11 @@ class GridLimeWriter : public BinaryIO
//////////////////////////////////////////// ////////////////////////////////////////////
// Write a generic serialisable object // Write a generic serialisable object
//////////////////////////////////////////// ////////////////////////////////////////////
template<class serialisable_object> void writeLimeObject(int MB,int ME,XmlWriter &writer,std::string object_name,std::string record_name)
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name)
{ {
if ( boss_node ) { if ( boss_node ) {
std::string xmlstring; std::string xmlstring = writer.docString();
{
XmlWriter WR("","");
write(WR,object_name,object);
xmlstring = WR.XmlString();
}
// std::cout << "WriteLimeObject" << record_name <<std::endl; // std::cout << "WriteLimeObject" << record_name <<std::endl;
uint64_t nbytes = xmlstring.size(); uint64_t nbytes = xmlstring.size();
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl; // std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
@@ -343,6 +351,20 @@ class GridLimeWriter : public BinaryIO
limeDestroyHeader(h); limeDestroyHeader(h);
} }
} }
template<class serialisable_object>
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, const unsigned int scientificPrec = 0)
{
XmlWriter WR("","");
if (scientificPrec)
{
WR.scientificFormat(true);
WR.setPrecision(scientificPrec);
}
write(WR,object_name,object);
writeLimeObject(MB, ME, WR, object_name, record_name);
}
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// Write a generic lattice field and csum // Write a generic lattice field and csum
// This routine is Collectively called by all nodes // This routine is Collectively called by all nodes
@@ -449,7 +471,8 @@ class ScidacWriter : public GridLimeWriter {
// Write generic lattice field in scidac format // Write generic lattice field in scidac format
//////////////////////////////////////////////// ////////////////////////////////////////////////
template <class vobj, class userRecord> template <class vobj, class userRecord>
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord) void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
const unsigned int recordScientificPrec = 0)
{ {
GridBase * grid = field._grid; GridBase * grid = field._grid;
@@ -467,7 +490,7 @@ class ScidacWriter : public GridLimeWriter {
////////////////////////////////////////////// //////////////////////////////////////////////
if ( this->boss_node ) { if ( this->boss_node ) {
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML), recordScientificPrec);
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
} }
// Collective call // Collective call

View File

@@ -49,14 +49,38 @@ inline double usecond(void) {
typedef std::chrono::system_clock GridClock; typedef std::chrono::system_clock GridClock;
typedef std::chrono::time_point<GridClock> GridTimePoint; typedef std::chrono::time_point<GridClock> GridTimePoint;
typedef std::chrono::milliseconds GridTime;
typedef std::chrono::microseconds GridUsecs;
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time) typedef std::chrono::seconds GridSecs;
typedef std::chrono::milliseconds GridMillisecs;
typedef std::chrono::microseconds GridUsecs;
typedef std::chrono::microseconds GridTime;
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
{ {
stream << time.count()<<" ms"; stream << time.count()<<" s";
return stream; return stream;
} }
inline std::ostream& operator<< (std::ostream & stream, const GridMillisecs & now)
{
GridSecs second(1);
auto secs = now/second ;
auto subseconds = now%second ;
auto fill = stream.fill();
stream << secs<<"."<<std::setw(3)<<std::setfill('0')<<subseconds.count()<<" s";
stream.fill(fill);
return stream;
}
inline std::ostream& operator<< (std::ostream & stream, const GridUsecs & now)
{
GridSecs second(1);
auto seconds = now/second ;
auto subseconds = now%second ;
auto fill = stream.fill();
stream << seconds<<"."<<std::setw(6)<<std::setfill('0')<<subseconds.count()<<" s";
stream.fill(fill);
return stream;
}
class GridStopWatch { class GridStopWatch {
private: private:
@@ -96,6 +120,9 @@ public:
assert(running == false); assert(running == false);
return (uint64_t) accumulator.count(); return (uint64_t) accumulator.count();
} }
bool isRunning(void){
return running;
}
}; };
} }

Some files were not shown because too many files have changed in this diff Show More