1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-10-24 01:34:47 +01:00

Compare commits

..

1957 Commits

Author SHA1 Message Date
de8b2dcca3 Hadrons: faster A2A matrix load 2019-01-11 16:12:49 +00:00
efe000341d Hadrons: contractor fixes 2019-01-11 16:12:16 +00:00
11086c5c25 Hadrons: first stab at MPI contractor 2019-01-10 16:29:57 +00:00
Peter Boyle
91a7fe247b Merge branch 'DanielRichtmann-feature/wilsonmg' into develop 2019-01-02 14:40:31 +00:00
Peter Boyle
8a1be021d3 Merge branch 'feature/wilsonmg' of https://github.com/DanielRichtmann/Grid into DanielRichtmann-feature/wilsonmg 2019-01-02 14:39:59 +00:00
fd66325321 pure QED test and copyright update 2018-12-14 17:39:11 +00:00
c637c0c48c James H.'s code for general size Wilson loops 2018-12-14 17:37:09 +00:00
c4b472176c Photon code fix 2018-12-14 17:36:38 +00:00
856476a890 big cleanup of the Photon class + QED Coulomb gauge 2018-12-13 21:52:38 +00:00
c509bd3fe2 Merge branch 'feature/resilient-io' into develop 2018-12-01 12:57:43 +00:00
49b934310b resilient I/O fix 2018-11-27 20:17:09 +00:00
01e8cf5017 Merge branch 'develop' into feature/resilient-io 2018-11-27 19:09:59 +00:00
12f4499502 HDF5 serialiser fix 2018-11-27 19:09:50 +00:00
05aec72887 Hadrons: application parameter for resilient I/O 2018-11-27 18:46:43 +00:00
136d3802cb binary parallel IO can do read tests and eventually re-write in case of failure 2018-11-27 18:38:24 +00:00
a4c55406ed checksummed HDF5 IO 2018-11-27 17:43:19 +00:00
c7f33ca2a8 Revert "Hadrons: A2A vector write can fail and retry"
This reverts commit 10fc263675.
2018-11-27 17:27:26 +00:00
0e3035c51d Revert "optional non-fatal checksum fail in Lime lattice read (with error codes)"
This reverts commit bccfd4cbb3.
2018-11-27 17:27:20 +00:00
10fc263675 Hadrons: A2A vector write can fail and retry 2018-11-26 19:47:03 +00:00
bccfd4cbb3 optional non-fatal checksum fail in Lime lattice read (with error codes) 2018-11-26 19:45:51 +00:00
0b50d4a328 log time fix 2018-11-23 15:51:27 +00:00
e232257cb6 Hadrons: A2AAslashVector modul cleaning and renaming 2018-11-22 19:43:49 +00:00
09451b5e48 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-11-22 15:45:24 +00:00
6364aa8acf Merge branch 'feature/contractor' into develop 2018-11-22 15:44:46 +00:00
b9e84ecab7 Hadrons: minor code cleaning 2018-11-22 15:44:30 +00:00
41032fef44 Optional RW mode for Hdf5Reader 2018-11-21 18:36:50 +00:00
d77bc88170 Optional support for faster CRC32C checksum through Intel IPP 2018-11-19 17:21:53 +00:00
494b3c9e57 Hadrons: contractor more IO fix 2018-11-19 16:26:53 +00:00
2ba19a9e07 Hadrons: contractor IO fix 2018-11-19 16:17:51 +00:00
5d7cc29eaf Hadrons: contractor token @traj@ for trajectory number in input file 2018-11-19 16:04:01 +00:00
f22a27d7f9 Hadrons: contractor trajectory loop and file output 2018-11-19 15:45:04 +00:00
Peter Boyle
33a0bbb17b Const correctness 2018-11-19 11:27:57 +00:00
f592ec8baa Hadrons: contractor performance fix 2018-11-16 20:59:49 +00:00
8b007b5c24 Hadrons: remove the use of OpenMP reductions 2018-11-16 20:00:29 +00:00
9bb170576d Merge pull request #177 from guelpers/develop
Hadrons module to electrify a gauge
2018-11-14 16:04:09 +00:00
Vera Guelpers
a7e3977b75 Merge remote-tracking branch 'upstream/develop' into develop 2018-11-13 14:56:23 +00:00
Vera Guelpers
995f20e45d Hadrons: some renamings 2018-11-13 14:54:48 +00:00
Vera Guelpers
d058b4e681 Merge branch 'feature/seqA2A' into develop 2018-11-13 13:27:24 +00:00
8e0d2f3402 Hadrons: support for twisted boundary conditions 2018-11-12 17:16:18 +00:00
2ac57370f1 Hadrons: contractor translation average normalisation 2018-11-12 16:04:35 +00:00
344e832a4e Hadrons: contractor faster transpose and finer timings 2018-11-12 15:59:54 +00:00
cfe281f1a4 Hadrons: diskvectors measure hash performance in debug output 2018-11-12 15:59:11 +00:00
f5422c7334 Hadrons: more contractor instrumentation 2018-11-09 16:23:53 +00:00
68c76a410d Hadrons: more contractor improvements 2018-11-08 19:24:29 +00:00
69b6ba0a73 Hadrons: contractor fixes and improvements 2018-11-08 18:46:28 +00:00
65349b07a7 Hadrons: simpler A2A perf functions 2018-11-08 18:44:44 +00:00
7cd9914f0e Hadrons: automatically resize output in MKL A2A matrix kernels 2018-11-08 17:40:57 +00:00
Peter Boyle
f3f24b3017 Optional Twisted BC's added, in "DoubleStore" for WilsonImpl.
Untested but doesn't affect answers when twists are all zero. The zero is the default behaviour
for ImplParams.
2018-11-08 12:55:25 +00:00
Vera Guelpers
8ef4657805 Merge remote-tracking branch 'upstream/develop' into feature/seqA2A 2018-11-08 09:00:06 +00:00
Vera Guelpers
78c1086f8b Hadrons: sequential Aslash insertion and propagator on A2A vector 2018-11-08 08:58:09 +00:00
Peter Boyle
68c13045d6 Added a test for Felix and Michael to look at 2018-11-07 23:40:15 +00:00
Peter Boyle
e9b6f58fdc Allow shrinking machine in orthog direction for extract slice local 2018-11-07 23:39:18 +00:00
Peter Boyle
839605c45c Verbose reduce 2018-11-07 23:38:46 +00:00
1ff1422e07 Hadrons: contractor lighter output 2018-11-07 20:02:53 +00:00
32376f0437 Hadrons: contractor performances 2018-11-07 19:59:11 +00:00
0c6e581336 Hadrons: first stab at general contraction code, needs serious testing 2018-11-07 19:16:55 +00:00
Vera Guelpers
e0a79a5bbf Hadrons: PR#177: Electrify gauge: Single Precision fix 2018-11-07 15:01:22 +00:00
Vera Guelpers
4c016cc1a4 Merge remote-tracking branch 'upstream/develop' into develop 2018-11-07 14:03:12 +00:00
Peter Boyle
2205b1e63e Add CXX to grid-config 2018-11-07 13:32:46 +00:00
Peter Boyle
6f421c7a6f Block solver in the SchurRedBlack plus timing report cleaner 2018-11-07 12:26:56 +00:00
Peter Boyle
b62b9ac214 Patch to broken assertion 2018-11-06 22:18:17 +00:00
88d9922e4f Hadrons: fast A2A matrix contraction kernels 2018-11-06 19:49:09 +00:00
9734e3ee58 Hadrons: (somewhat) faster build 2018-11-06 19:47:41 +00:00
Peter Boyle
8c3a599148 Block solver test 2018-11-06 16:44:58 +00:00
Azusa Yamaguchi
4a47b11876 Block CG improvements to develop 2018-11-06 12:49:05 +00:00
Vera Guelpers
f1382cf81d Merge remote-tracking branch 'upstream/develop' into develop 2018-11-06 10:29:52 +00:00
Vera Guelpers
85699daef2 Hadrons: Module to electrify a gauge field 2018-11-06 10:27:18 +00:00
1651111d18 Hadrons: final, portable form of the contractor benchmark 2018-11-05 21:29:13 +00:00
1ed4ea344d Merge branch 'develop' into feature/contractor 2018-11-05 11:42:02 +00:00
8f514ae550 Hadrons: Lanczos 32bit IO 2018-11-05 11:41:10 +00:00
4a7415e83c Hadrons: contractor benchmark update 2018-10-23 21:00:54 +01:00
0ffcfea724 Hadrons: contractor benchmark 2018-10-23 17:08:16 +01:00
febe41cc1d Hadrons: improvement on PR #176 2018-10-23 12:48:15 +01:00
62173395b8 Merge pull request #176 from guelpers/develop
Hadrons: full volume noise source for A2A
2018-10-23 12:29:35 +01:00
b48611b80f Merge branch 'develop' into feature/contractor 2018-10-22 18:27:18 +01:00
6b559d68aa Hadrons: eigenpack converter can do test reads 2018-10-22 11:10:18 +01:00
1982cc58dd Hadrons: A2A vectors I/O filename fix 2018-10-21 01:20:05 +01:00
2e2e5ce596 SciDAC I/O print data checksums 2018-10-19 20:36:32 +01:00
7d84dca8e9 Merge branch 'develop' into feature/contractor 2018-10-18 23:46:58 +01:00
2d3916418e Hadrons: more precision fix 2018-10-18 23:45:13 +01:00
21304e2139 Hadrons: fix to allow single-prec build again 2018-10-18 19:58:50 +01:00
7b850eb48b Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-10-18 19:46:25 +01:00
a3ace57e01 Hadrons copyright update 2018-10-18 19:46:11 +01:00
b1c3cbe35e Hadrons: A2A vectors I/O 2018-10-18 19:44:58 +01:00
f31d6bfec2 Hadrons: contractor cleaning and better error check 2018-10-18 17:50:35 +01:00
a7cfa26901 Hadrons: reverse A2A matrix load for better DiskVector cache reuse 2018-10-18 17:50:16 +01:00
f333f3e575 Hadrons: DiskVector save-on-eviction and faster CRC32 for Eigen matrices 2018-10-18 17:48:25 +01:00
2b4e253473 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-10-17 20:28:20 +01:00
0ba3d469c7 Benchmark IO in single and double precision 2018-10-17 20:27:34 +01:00
f709329d96 Hadrons: first version of a contractor utility 2018-10-17 20:26:48 +01:00
f05b25dae4 Hadrons: A2AMatrix load 2018-10-17 20:26:26 +01:00
3e1d268fa3 Hadrons: DiskVector optimisation 2018-10-17 20:25:32 +01:00
Vera Guelpers
109c74bed8 Hadrons: full volume noise source for A2A 2018-10-16 14:56:12 +01:00
3023287fd9 Hadrons: 3-index RO access to Eigen disk vector 2018-10-16 14:44:14 +01:00
b3d6805638 Merge branch 'feature/contractor' into develop 2018-10-16 11:29:37 +01:00
291bc2a1f0 IO benchmark on a list of directories 2018-10-15 17:25:08 +01:00
2f368c33fc Hadrons: copyright update 2018-10-15 15:51:45 +01:00
9592115341 Hadrons: NPR and gauge fixing linking fix 2018-10-15 15:49:42 +01:00
Peter Boyle
24c07694bc Mixed precision now supported in MADWF 2018-10-14 00:22:52 +01:00
Peter Boyle
f0229025e2 MADWF working across a range of actions 2018-10-13 19:55:03 +01:00
Peter Boyle
6de9a45a09 NPR first cut by Julia Kettle 2018-10-12 11:00:58 +01:00
Peter Boyle
03c3d495a2 First cut (non functional NPR code) developed by Julia Kettle 2018-10-12 10:59:33 +01:00
Peter Boyle
49f25e08e8 PauliVillars based 4D -> 5D reconstruction with Fourier Accelerated PV inverse
by Christoph. Differs from the one by Rudy in BFM since it vectorises the twisted
4D solves in pairs.
2018-10-11 12:35:32 +01:00
efc0c65056 Hadrons: DiskVector Eigen specialisation with binary I/O and sha256 correctness check 2018-10-08 19:02:00 +01:00
936eaac8e1 function to get the sha256 string 2018-10-08 19:00:50 +01:00
fe6a372f75 Hadrons: fixes and cleaning in the scalar SU(N) part 2018-10-08 15:14:08 +01:00
148fc052bd Hadrons: Aslash field, tested 2018-10-05 21:04:10 +01:00
c073341a10 Hadrons: more cleaning 2018-10-05 19:50:41 +01:00
78299daaac Hadrons: code cleaning 2018-10-05 16:47:52 +01:00
866449c804 Hadrons: integration of Peter's A2Autils 2018-10-05 16:42:44 +01:00
d69a52079f Merge remote-tracking branch 'gh/feature/a2a-integration' into feature/aslashfield 2018-10-05 15:39:09 +01:00
9f4f8a14a3 Hadrons: code cleaning 2018-10-05 15:38:01 +01:00
f6593dc881 Hadrons: A2A block performance counter fix 2018-10-05 15:11:01 +01:00
Peter Boyle
b46d31d4b6 MKL enable on Eigen if Grid is configured to use MKL 2018-10-05 11:29:40 +01:00
58567fc650 Hadrons: big update abstracting the block meson field routine, tested & working, performance counters broken and code dirty 2018-10-04 20:01:49 +01:00
Peter Boyle
7c57cac670 Adding A2A utils class for containing kernels. 2018-10-04 18:57:41 +01:00
d0b21bf1ff Merge branch 'feature/eigenpack-convert' into develop 2018-10-04 18:26:45 +01:00
a1825d1f59 Hadrons: final fix for multiprec eigenpacks 2018-10-04 18:25:26 +01:00
5a3e83ff7b Hadrons: new layer in eigenpacks class hierarchy 2018-10-03 14:45:01 +01:00
52569d98d8 Hadrons: multiprec eigenpack I/O fix 2018-10-03 14:24:43 +01:00
b351103c29 Hadrons: eigenpack load module with 32bit I/O 2018-10-02 21:07:56 +01:00
118cca4681 Hadrons: linking fix 2018-10-02 20:08:49 +01:00
44de727cd2 Hadrons: eigenpack support for multiprecision I/O 2018-10-02 19:51:09 +01:00
888ebc3cf9 Hadrons: better name for the EP converter 2018-10-02 15:22:18 +01:00
6c031a1b81 Merge branch 'feature/eigenpack-convert' into develop 2018-10-02 14:57:30 +01:00
02aa4bd762 Hadrons: cleaner eigenpack convert log 2018-10-02 13:43:25 +01:00
9aafa8ee60 Hadrons: eigenpack converter generalised for RB/5d grids 2018-10-02 13:34:17 +01:00
430b98b354 fix previous commit 2018-10-02 13:12:46 +01:00
84189867ef Hadrons: eigenpack converter with RB grids (to be generalised) 2018-10-02 13:05:05 +01:00
4ab8cfbe2a Hadrons: more verbose eigenpack convert 2018-10-02 12:24:45 +01:00
aadd9f4468 Eigenpack converter, to be tested, HadronsXmlRun moved to Utilities directory 2018-10-02 00:02:34 +01:00
8fbb27ce13 Hadrons: less code duplication in eigenpack IO 2018-10-01 20:15:21 +01:00
21bba95909 Hadrons: eigenpack metadata is no ignored anymore when reading 2018-10-01 19:33:45 +01:00
6448fe7121 More flexible XML control in Lime files 2018-10-01 19:32:50 +01:00
2458a11d1d Hadrons: precision cast module 2018-09-29 18:00:08 +01:00
d0ca7c3fe6 Hadrons: big update for getGrid, grids are now created automatically 2018-09-29 17:55:19 +01:00
57f899d79c Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-09-29 15:50:59 +01:00
e881a0c157 Merge commit 'beed527ea37c90fd5e19b82d326eb8adc8eba5ff' into develop 2018-09-29 15:50:21 +01:00
f411657118 JSON update 2018-09-29 15:48:05 +01:00
Peter Boyle
7458c6174b Use operator() for indexing internal indices 2018-09-27 06:42:02 +01:00
Peter Boyle
21b269d0f9 Move the Grid.pdf out of a deep directory 2018-09-27 06:36:25 +01:00
Peter Boyle
083af92ac2 Update from chulwoo ; high level link for Grid.pdf in documentation 2018-09-27 06:30:40 +01:00
Peter Boyle
2c162577b5 HMC documentation 2018-09-25 23:28:17 +01:00
Peter Boyle
b1c4e96382 Updates to actions etc.. 2018-09-24 22:10:30 +01:00
Peter Boyle
a55c6f34f3 Updated docs 2018-09-24 15:44:35 +01:00
Peter Boyle
beed527ea3 Carletons chapter 2018-09-24 15:09:51 +01:00
eaa633cf69 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-09-21 18:16:22 +01:00
c632455129 Hadrons: meson field IO fix 2018-09-21 18:16:01 +01:00
c012899ed5 Hadrons: big update after templating of get/createGrid 2018-09-21 18:15:33 +01:00
paboyle
8bab544c2f Updated manual pdf 2018-09-20 18:51:11 +01:00
paboyle
76fc06a5dc Updates with todo from Carleton 2018-09-20 18:50:11 +01:00
4af6c7e7aa Hadrons: copyright update 2018-09-14 12:51:48 +01:00
f60fbcfc4d Hadrons: mixed precision CG, to be tested 2018-09-14 12:47:55 +01:00
464c81706e Hadrons: defaults Impls for different precisions 2018-09-14 12:46:43 +01:00
408130b808 Hadrons: header list fix 2018-09-10 17:38:54 +01:00
375edd1370 file forgotten in last commit 2018-09-10 17:37:29 +01:00
6d912f6c67 Hadrons: general guesser factory 2018-09-10 17:36:54 +01:00
6d1d28955e Guesser class is redundant, switching to LinearFunction 2018-09-10 17:35:54 +01:00
920b471761 Hadrons tests update 2018-09-10 15:32:13 +01:00
63c21767ba Hadrons: grids stored with hash of SIMD type (for mixed-precision setups) 2018-09-10 15:31:39 +01:00
7b6b712565 function to convert std::vector to string 2018-09-10 15:17:32 +01:00
35abd05ee9 mute Version.h cache creation 2018-09-10 15:16:59 +01:00
dd36e60f6a compilation fix for hypercube optimal communicator 2018-09-08 18:07:29 +01:00
cb6c548e21 Hadrons: code cleaning 2018-09-07 20:40:55 +01:00
02c4ccf621 Hadrons: diskvector debug message for writes 2018-09-07 20:33:49 +01:00
fd24588212 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-09-07 20:25:11 +01:00
b800bb3ecb Hadrons: disk vector cache policy to last touch 2018-09-07 20:24:48 +01:00
f8abd0978b Hadrons copyright update 2018-09-07 20:10:07 +01:00
12c7c493bf Hadrons: disk-based container 2018-09-07 20:04:54 +01:00
paboyle
c7c9072313 Documentation 2018-09-06 16:01:42 +01:00
2bf3be5fae Hadrons: copyright and code cleaning 2018-09-04 18:25:10 +01:00
3a40e4fc69 Hadrons: scalar SU(N) 2-pt guard against negative momenta components 2018-09-04 18:24:07 +01:00
2e69e03f6f Hadrons: CosmHol configs IO module 2018-09-04 18:23:28 +01:00
a09f9bb528 Hadrons: code cleaning 2018-09-04 18:22:21 +01:00
f0e341d726 Hadrons: module list generator fix 2018-09-04 18:22:04 +01:00
6f09df0daf Hadrons: A2A matrix IO fix 2018-09-02 01:46:22 +01:00
26cee605b8 Hadrons: copyright update 2018-09-01 21:30:30 +01:00
b3fa18c229 copyright script never removes authorship 2018-09-01 21:29:58 +01:00
2940c9bcfd Hadrons: dedicated IO class for A2A matrices 2018-09-01 21:09:01 +01:00
0bb532f72b more explicit clean git tree message 2018-09-01 20:02:18 +01:00
fada2aa0f7 Hadrons: precision fix 2018-09-01 20:00:12 +01:00
c193e4e675 Aslash expression in Mathematica notebook 2018-09-01 19:59:58 +01:00
3ee682f676 more Version.h fine tuning 2018-09-01 19:58:16 +01:00
d85ec3bac2 build system minor fix 2018-09-01 19:54:21 +01:00
b52d8eb1e3 better Version.h implementation 2018-09-01 19:49:13 +01:00
ee630d2e8b Hadrons: smearing plaquette output 2018-09-01 17:38:32 +01:00
2f0af79869 Hadrons: scalar SU(N) NPR update 2018-09-01 17:36:35 +01:00
1b7fb79ec0 CI fix 2018-08-28 18:26:37 +01:00
2db1a4628c build system minor fix 2018-08-28 18:26:30 +01:00
6aa047d842 Hadrons module template fix 2018-08-28 17:17:00 +01:00
8779c32ae1 Merge branch 'feature/hadrons' into develop 2018-08-28 17:10:33 +01:00
c527dc3358 CI fix 2018-08-28 17:10:08 +01:00
6b42577b6b gitignore update 2018-08-28 16:58:37 +01:00
fb3596f968 Hadrons: precision fixes 2018-08-28 16:58:23 +01:00
f3a0158213 code cleaning 2018-08-28 16:56:07 +01:00
0250aa9347 file committed in error 2018-08-28 16:55:48 +01:00
3df6743396 more build system cleaning and patch for bad include in Eigen 2018-08-28 16:54:57 +01:00
fb7d021b9d Hadrons: moving Hadrons to root directory, build system improvements 2018-08-28 15:00:40 +01:00
5f206df775 Hadrons: meson field cache friendly cache copy 2018-08-15 17:29:44 +01:00
7727e81113 Hadrons: slight improvement on previous commit 2018-08-14 20:18:47 +01:00
c4115544a5 Hadrons: application option to save graph 2018-08-14 20:03:53 +01:00
08c47328ba Hadrons: meson field kernel performance for each block 2018-08-14 17:35:42 +01:00
09001aedca Hadrons: meson fields saved in single precision 2018-08-14 17:19:38 +01:00
2c67304716 Hadrons: meson field code cleaning 2018-08-14 17:00:05 +01:00
dc6d8686de Hadrons: meson field chunked HDF5 IO 2018-08-14 16:40:29 +01:00
cc2780bea3 Hadrons: meson field parallel IO 2018-08-14 14:55:13 +01:00
6e5a2b7922 fix previous commit 2018-08-14 14:07:54 +01:00
f4878d3a13 Hadrons: meson field threaded cache copy 2018-08-14 14:02:37 +01:00
89d2fac92e Hadrons: copyright update 2018-08-14 12:19:14 +01:00
f2d3e41cf2 Hadrons: meson field: HDF5 perf, gamma input and Eigen tensors allocated by Grid 2018-08-13 20:18:33 +01:00
3c27bb36d4 Hadrons: direct timer access 2018-08-13 20:17:45 +01:00
603d59f389 Hadrons: code cleaning 2018-08-13 20:17:24 +01:00
07a0ef3f95 Hadrons: global measurement time profile 2018-08-13 16:44:57 +01:00
503259f9c9 Hadrons: meson field HDF5 IO done and tested 2018-08-12 16:52:12 +01:00
5be6a51044 Hadrons: meson fields code cleaning and momentum phases 2018-08-11 15:13:43 +01:00
ac69f042b1 Hadrons: module RNG uniquely seeded with <run id> + <module name> + <trajectory> 2018-08-10 18:27:00 +01:00
133d5c2e34 Merge branch 'develop' into feature/hadrons 2018-08-10 16:36:40 +01:00
2a94244890 configure: --with-openssl option and LIME is now mandatory 2018-08-10 16:36:11 +01:00
a15a2dfd29 Merge branch 'develop' into feature/hadrons 2018-08-10 16:08:22 +01:00
093bb02633 Hadrons: execute message for time diluted noise 2018-08-10 16:07:48 +01:00
99a85116f8 Hadrons: module and VM instrumentation 2018-08-10 16:07:30 +01:00
paboyle
27cdb79063 Sha used to seed from a unique string 2018-08-10 15:11:01 +01:00
f4cbfd63ff Hadrons: more meson field cleaning, needs IO now 2018-08-09 18:39:58 +01:00
2b794b6aa7 Hadrons: module generating random lattices for testing purposes 2018-08-09 17:16:42 +01:00
d0244a059f Hadrons: cleaning cleaning... 2018-08-09 00:38:17 +01:00
dcdd891d7d Hadrons: precision fix 2018-08-09 00:13:53 +01:00
6d2df9de79 Hadrons: even more cleaning 2018-08-08 23:15:55 +01:00
41d4e37bae Hadrons: more cleaning 2018-08-08 19:04:44 +01:00
ee5c0cc9b6 Hadrons: code cleaning 2018-08-08 18:45:06 +01:00
0a4020eb4d Hadrons: copyright fix 2018-08-07 18:42:52 +01:00
b2de26589b Hadrons: code cleaning and copyright update 2018-08-07 18:40:48 +01:00
0677adb4dd Hadrons: overhaul of A2A for production 2018-08-07 18:27:59 +01:00
231cc95be6 Hadrons: eigenvalues precision fix 2018-08-07 18:27:19 +01:00
639f9cab82 Hadrons: schedule loading fix 2018-08-07 18:26:49 +01:00
4eac4e575e Hadrons: meson fields indentation fix 2018-08-06 12:42:25 +01:00
3f0f92cda6 Hadrons: first cleaning/integration of A2A/meson fields 2018-08-06 12:11:52 +01:00
d2650e89bd Hadrons: VM exception for object type (solves infinite loop in scheduler) 2018-08-06 12:11:00 +01:00
2962123cba Hadrons: diluted noise polish 2018-08-05 01:44:37 +01:00
830168ec37 Hadrons: first try at diluted noise class (tested) 2018-08-04 12:32:58 +01:00
584c921ca0 Eigen support fix (use of Grid as a library was broken) 2018-08-03 21:07:58 +01:00
81347b4d16 gitignore update 2018-08-03 19:58:52 +01:00
2cfa0b0e6b Merge pull request #174 from fionnoh/a2a_basics
A2A basics
2018-08-03 16:32:14 +01:00
fionnoh
fa5dee76b1 Included Peter's A2AMeson field and Eigen changes 2018-08-03 15:15:54 +01:00
fionnoh
8d1679c6b8 Merge branch 'feature/hadrons-a2a' of https://github.com/paboyle/Grid into a2a_basics 2018-08-03 15:12:24 +01:00
Peter Boyle
3791a38f7c Optimised the MesonField a bit more 2018-08-01 08:27:27 +01:00
Peter Boyle
142f7b0c86 Updated the A2A Meson Field module 2018-07-31 15:58:02 +01:00
fionnoh
891ad66eab Included changes to Hadrons RBPrecCG solver needed for subtraction of guess 2018-07-31 11:26:07 +01:00
Peter Boyle
60c43151c5 Merge branch 'feature/hadrons-a2a' of https://github.com/paboyle/Grid into feature/hadrons-a2a 2018-07-31 01:09:02 +01:00
paboyle
e036800261 Eigen fix 2018-07-31 01:08:42 +01:00
Peter Boyle
62900def36 Merge branch 'feature/hadrons-a2a' of https://github.com/paboyle/Grid into feature/hadrons-a2a 2018-07-31 00:36:26 +01:00
paboyle
e3a309a73f Eigen happiness 2018-07-31 00:35:17 +01:00
fionnoh
ad6c1c0c4e The basics of what is needed in Grid and Hadrons for the A2A class and module, with none of the contraction or MF code. 2018-07-30 18:40:50 +01:00
Peter Boyle
00b92a91b5 Optimising 2018-07-28 23:46:22 +01:00
paboyle
65533741f7 7 moms 2018-07-28 16:17:47 +01:00
Peter Boyle
dc0259fbda Merge pull request #173 from fionnoh/feature/hadrons-a2a
Changes to meson field benchmark. Now includes the gammas in the fina…
2018-07-27 23:03:56 +01:00
Peter Boyle
131a6785d4 Merge branch 'feature/hadrons-a2a' into feature/hadrons-a2a 2018-07-27 23:03:42 +01:00
paboyle
44f4f5c8e2 Momentum loop 2018-07-27 23:00:16 +01:00
fionnoh
2679df034f Changes to meson field benchmark. Now includes the gammas in the final part of the naive method, both methods compute
lhs^dag*Gamma*rhs (previously Gamma*lhs^dag*rhs), and checks results.
2018-07-27 18:31:10 +01:00
bf71162b97 Hadrons: backtrace on abort 2018-07-26 19:20:12 +01:00
299e828d83 Merge branch 'develop' into feature/hadrons 2018-07-26 16:49:49 +01:00
ef5452cddf Hadrons: smarter memory profiler 2018-07-26 16:47:45 +01:00
80de748737 Hadrons: new exceptions which can save a integer 2018-07-26 16:47:25 +01:00
paboyle
71e1006ba8 Updated meson field benchmark for dirac structures 2018-07-26 09:09:29 +01:00
00f31ae83f Merge pull request #163 from goracle/unstaged
Add printing of whether there are unstaged changes in the git hash print
2018-07-25 19:00:00 +00:00
cce339deaf Merge pull request #172 from fionnoh/feature/hadrons
feature/hadrons -> feature/hadrons-a2a
2018-07-25 17:20:19 +00:00
fionnoh
24128ff109 Changes needed for MF benchmark to work with comms correctly 2018-07-23 15:51:37 +01:00
fionnoh
34e9d3f0ca Moved the creation and resizing of the v and w high modes from the A2A class to the A2A module and made them an output of the module. This means that they have to be inputs of the contration modules and they will freed from memory if they are no longer needed. 2018-07-22 14:40:31 +01:00
fionnoh
c995788259 Added ImportUnphysicalFermion and included appropriate logic for 5d w vectors in A2A code 2018-07-21 00:08:11 +01:00
fionnoh
94c7198001 Added ZFIMPL to A2AMeson contraction 2018-07-20 23:08:22 +01:00
fionnoh
04d86fe9f3 Removed overly verbose print statement 2018-07-20 21:38:19 +01:00
fionnoh
b78074b6a0 Removed a Dminus from high mode v and removed duplication pf D_oo code 2018-07-20 16:55:24 +01:00
fionnoh
7dfd3cdae8 Inclusion of ExportPhysicalFermionSource that fixes a bug in the low mode w vectors 2018-07-20 15:45:43 +01:00
fionnoh
cecee1ef2c Merge branch 'develop' of github.com:paboyle/Grid into feature/hadrons 2018-07-20 13:37:50 +01:00
fionnoh
355d4b58be Merge branch 'feature/hadrons' of github.com:fionnoh/Grid into feature/hadrons 2018-07-19 16:07:54 +01:00
fionnoh
2c54a536f3 Moved the meson field inner product to its own header file 2018-07-19 15:56:52 +01:00
fionnoh
d868a45120 Cleaned up some stuff that was erroneously included in a previous "trash" commit. Leaving in the mySliceInnerProdct function for now as it speeds up mesonfield creation quite a lot for 24^3 tests 2018-07-16 16:19:59 +01:00
fionnoh
9deae8c962 A2A meson field contraction code 2018-07-16 14:18:45 +01:00
fionnoh
db86cdd7bd Possible trash commit 2018-07-10 13:30:45 +01:00
paboyle
ec9939c1ba Test for faster implementation of meson field inner loop
This should be possible to cache block at outer levels, global sum across nodes not performed
and deferred to caller to block them all into a big all reduce.
Nc=3 and Fermion is hard coded in an ugly way. We might think about benchmarking whether
a product without the conjugate should be made available by Grid.

It is not clear whether the explicit unroll, or the performing of conjugate on left once
was the real source of the speed up.

Gives 70-80 GF/s on my laptop (single) half that double, and 70GB/s to cache.

This is competitive with dslash and a reasonable stopping point for the optimisation. If necessary we can revisit.
2018-07-10 12:38:51 +01:00
fionnoh
f74617c124 Added ZFIMPL to meson field module 2018-07-03 14:04:53 +01:00
fionnoh
8c6a3921ed Merge remote-tracking branch 'upstream/feature/hadrons' into feature/hadrons 2018-07-03 11:35:14 +01:00
a8a15dd9d0 Hadrons: code cleaning 2018-07-02 17:52:39 +01:00
3ce68a751a Hadrons: stout smearing module 2018-07-02 17:52:04 +01:00
fionnoh
daa0977d01 Included a print statement that indicates that the guess is being subtracted from the solve. 2018-06-28 16:34:56 +01:00
fionnoh
a2929f4384 Removed A2A contraction module and replaced it with the beginnings of a meson field module 2018-06-28 16:17:26 +01:00
fionnoh
7fe3974c0a Included eigenPacks and action as references, not inputs, of A2A module. They now now longer need to be parameters in the meson field modules. 2018-06-28 16:14:49 +01:00
fionnoh
f7e86f81a0 Changes A2A class to make use of the new Solver class 2018-06-28 16:14:16 +01:00
fionnoh
fecec803d9 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2018-06-28 16:13:43 +01:00
fionnoh
8fe9a13cdd Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2018-06-28 16:13:07 +01:00
d2c42e6f42 Hadrons: scaled DWF action 2018-06-26 14:59:33 +01:00
Daniel Richtmann
2881b3e8e5 WilsonMG: Remove unnecessary static assertions 2018-06-26 14:42:30 +02:00
049cc518f4 Hadrons: introduction message 2 2018-06-25 19:08:39 +01:00
2e1c66897f Hadrons: introduction message 2018-06-25 19:08:22 +01:00
adcef36189 Hadrons: Möbius DWF action 2018-06-25 15:58:35 +01:00
fionnoh
2f121c41c9 Commiting reation of meson field code before a merge with the upstream branch feature/hadrons 2018-06-25 12:20:46 +01:00
e0ed7e300f Hadrons: spurious Dminus removed 2018-06-22 16:33:43 +02:00
485207901b Merge branch 'develop' into feature/hadrons 2018-06-22 16:15:32 +02:00
c760f0a4c3 Hadrons: remove make_5D/4D functions and FreeProp fix 2018-06-22 16:12:46 +02:00
c84eeedec3 Hadrons: GaugeProp module for z-Wilson actions 2018-06-22 15:53:22 +02:00
fionnoh
1ac3526f33 Small changes to the A2A header and module 2018-06-22 12:29:42 +01:00
fionnoh
0de090ee74 Temporarily added in the contraction code that produced the working 2-pt function. This is commited for reference only and will be removed in the next push. 2018-06-22 12:28:41 +01:00
91405de3f7 Hadrons: new solver exposing fermion matrix and generic source/solve import/export 2018-06-22 12:14:37 +02:00
fionnoh
8fccda301a Fixed a bug where the guess was always subtracted after the solve and included appropriate weights for the sources in the one case we're looking at now. More work needs to be done to make the 5d/4d source logic less brittle. 2018-06-21 16:36:59 +01:00
fionnoh
7a0abfac89 Restructured the class that computes and returns the A2A vectors. 2018-06-21 16:36:06 +01:00
fionnoh
ae37fda699 A more elegant way to subtract guesses from solve and a bool check before verifying residual 2018-06-20 16:07:40 +01:00
fionnoh
b5fc5e2030 All to all module update that hit a promising milestone. Commiting for a reference for future changes. 2018-06-20 10:59:07 +01:00
Daniel Richtmann
cc5d025ea4 WilsonMG: Adapt staggered GMRES/MR tests to "new" constructor 2018-06-18 16:20:20 +02:00
Daniel Richtmann
ddcb53bce2 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-06-13 09:50:37 +02:00
Daniel Richtmann
d1c80e1d46 WilsonMG: Correct years in copyright line 2018-06-13 09:44:09 +02:00
Daniel Richtmann
c73cc7d354 WilsonMG: Add tests with MG preconditioner running single precision, outer solver running in double 2018-06-12 16:10:48 +02:00
Daniel Richtmann
49fdc324a0 WilsonMG: Make MG correctness checks abort on failing tests 2018-06-12 16:10:48 +02:00
Daniel Richtmann
f32714a2d1 WilsonMG: Make running MG correctness checks optional via commandline 2018-06-12 16:10:48 +02:00
Daniel Richtmann
73a955be20 WilsonMG: Move tests for Wilson & WilsonClover into separate files 2018-06-12 16:10:48 +02:00
Daniel Richtmann
66b7a0f871 WilsonMG: Move multigrid class to separate file 2018-06-12 16:10:48 +02:00
Daniel Richtmann
2ab9d4bc56 WilsonMG: Fix random behavior in GMRES
From time to time I saw random since the basis vectors were not initialized
properly.
2018-06-12 15:01:31 +02:00
Daniel Richtmann
4f41cd114d WilsonMG: Add a mixed precision version of FGMRES
This version does everything in double prec but accepts a preconditioner working
in single precision.
2018-06-12 15:01:31 +02:00
Daniel Richtmann
11c4f5e32c WilsonMG: Provide command line switch for reading in input xml + move default params to constructor of MultiGridParams 2018-06-12 15:01:31 +02:00
Daniel Richtmann
e9b9550298 WilsonMG: Fix incompatibility with single prec MG in construction of simd layout on coarser grids 2018-06-12 15:01:31 +02:00
Daniel Richtmann
7564fedf68 WilsonMG: Set subspace to zero to avoid random behavior 2018-06-12 15:01:31 +02:00
8db0ef9736 Merge pull request #168 from jch1g10/feature/qed-fvol
Feature/qed fvol
2018-06-08 20:09:06 +02:00
Guido Cossu
95d4b46446 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-06-08 11:30:29 +01:00
paboyle
5dfd216a34 Better thread safety 2018-06-04 21:08:44 +01:00
paboyle
c2e8d0aa88 Solve g++ problem on the lanczos test 2018-06-04 18:34:15 +01:00
James Harrison
0fe5aeffbb Merge branch 'feature/hadrons' into feature/qed-fvol 2018-06-04 16:59:43 +01:00
James Harrison
7fbc469046 Merge branch 'develop' into feature/hadrons 2018-06-04 16:58:30 +01:00
paboyle
bf96a4bdbf Merge branch 'master' into develop 2018-06-04 14:03:11 +01:00
paboyle
84685c9bc3 Overflow fix 2018-06-04 13:42:07 +01:00
fionnoh
a8d4156997 Added a Hadrons module that computes the all-to-all v and w vectors 2018-05-31 17:18:58 +01:00
fionnoh
c18074869b Changes to Hadrons SchurRB solver to allow for a subtract_guess boolean to be passed 2018-05-31 17:17:16 +01:00
fionnoh
f4c6d39238 CHanges made to SchurRB solvers to allow for the subtraction of a guess after solve 2018-05-31 17:16:20 +01:00
200d35b38a Merge branch 'develop' into feature/hadrons 2018-05-28 11:52:47 +02:00
eb52e84d09 Merge branch 'feature/hadrons' of github.com:paboyle/Grid into feature/hadrons 2018-05-28 11:50:27 +02:00
72abc34764 Merge pull request #166 from guelpers/feature/hadrons
Feature/hadrons
2018-05-28 11:49:46 +02:00
e3164d4c7b Hadrons: env function to get volume in double 2018-05-28 11:39:17 +02:00
James Harrison
f5db386c55 Change MODULE_REGISTER_NS -> MODULE_REGISTER in UnitEM, ScalarVP and VPCounterTerms 2018-05-22 16:16:21 +01:00
James Harrison
294ee70a7a Merge branch 'feature/hadrons' into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/modules.inc
#	lib/qcd/action/gauge/Photon.h
2018-05-21 18:02:41 +01:00
Azusa Yamaguchi
013ea4e8d1 Merge branch 'feature/staggered-comms-compute' into develop 2018-05-21 13:11:56 +01:00
Azusa Yamaguchi
7fbbb31a50 Merge branch 'develop' into feature/staggered-comms-compute
Conflicts:
	lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
2018-05-21 13:07:29 +01:00
Azusa Yamaguchi
0e127b1fc7 New file single prec test 2018-05-21 12:57:13 +01:00
Azusa Yamaguchi
68c028b0a6 Comment 2018-05-21 12:54:25 +01:00
255d4992e1 Hadrons: stochastic scalar SU(N) free field fix 2018-05-18 20:49:55 +01:00
a0d399e5ce Hadrons: yet other attempts at EMT NPR 2018-05-18 20:49:26 +01:00
fd3b2e945a Hadrons: don't right result with empty stem 2018-05-18 20:48:24 +01:00
Daniel Richtmann
6c27c72585 WilsonMG: Provide more sensible default values for MG parameters 2018-05-16 17:26:09 +02:00
Daniel Richtmann
9c003d2d72 WilsonMG: Base wilson mg preconditioner entirely on existing infrastructure 2018-05-16 17:26:09 +02:00
Daniel Richtmann
4b8710970c WilsonMG: Switch to Galerkin coarsening in CoarsenedMatrix 2018-05-16 17:26:09 +02:00
Daniel Richtmann
68d686ec38 WilsonMG: Add functionality for applying G5 on coarse grids 2018-05-16 16:17:14 +02:00
Daniel Richtmann
c48b69ca81 WilsonMG: Implement Mdir & Mdiag in CoarsenedMatrix 2018-05-16 16:08:05 +02:00
Daniel Richtmann
df8c208f5c WilsonMG: Revert CoarsenedMatrix.h and Lattice_transfer.h back to state of develop branch 2018-05-16 16:02:54 +02:00
Daniel Richtmann
61812ab7f1 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-05-15 14:57:18 +02:00
b999984501 Merge branch 'develop' into feature/hadrons 2018-05-15 13:53:57 +01:00
Guido Cossu
7836cc2d74 No checksum output on log for scidac 2018-05-15 10:10:08 +01:00
a61e0df54b Travis fix for Lime 2018-05-14 19:56:12 +01:00
9d835afa35 Attempt at solving the FP exception in the QED code 2018-05-14 19:05:54 +01:00
5e3be47117 Hadrons: scalar SU(N) various fixes 2018-05-14 18:58:39 +01:00
48de706dd5 Merge branch 'develop' into feature/hadrons 2018-05-11 18:06:40 +01:00
f871fb0c6d check file is opened correctly in the Lime reader 2018-05-11 18:06:28 +01:00
93771f3099 Hadrons: scalar SU(N) stochastic free field 2018-05-10 22:29:48 +01:00
8cb205725b Merge branch 'develop' into feature/hadrons 2018-05-09 23:56:35 +01:00
9ad580d82f Hadrons: format fix 2018-05-07 21:38:15 +01:00
899f961d0d Hadrons: eigenvalue metadata saved with 16 significant digits 2018-05-07 21:37:03 +01:00
54d789204f more general implementation of the precision interface for serialisers 2018-05-07 21:17:46 +01:00
25828746f3 XML precision scientific with 16 digits by default 2018-05-07 21:04:31 +01:00
f362c00739 Hadrons: better handling of automatic directory creation 2018-05-07 19:43:40 +01:00
Guido Cossu
25d1cadd3b Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-05-07 18:55:09 +01:00
Guido Cossu
c24d53bbd1 Further debug of RNG I/O 2018-05-07 18:55:05 +01:00
2017e4e3b4 Hadrons: more verbose directory creation error 2018-05-07 18:12:22 +01:00
27a4d4c951 Hadrons: multi-file eigenpack in separate directory 2018-05-07 17:52:54 +01:00
2f92721249 Merge branch 'develop' into feature/hadrons 2018-05-07 17:26:47 +01:00
3c7a4106ed Trap for deadly empty comm thread option 2018-05-07 17:26:39 +01:00
3252059daf Hadrons: multi-file support for eigenpacks 2018-05-07 17:25:36 +01:00
paboyle
6eed167f0c Merge branch 'release/0.8.1' 2018-05-04 17:34:11 +01:00
paboyle
4ad0df6fde Bump volume for Gerardo 2018-05-04 17:33:23 +01:00
661381e881 Merge branch 'develop' into feature/hadrons 2018-05-04 14:52:17 +01:00
Peter Boyle
68a5079f33 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-05-04 14:13:54 +01:00
Peter Boyle
8634e19f1b Update 2018-05-04 14:13:35 +01:00
Azusa Yamaguchi
9ada378e38 Add timing 2018-05-04 10:58:01 +01:00
Vera Guelpers
9d9692d439 Fix double vs float in boundary phases 2018-05-03 16:40:16 +01:00
0659ae4014 Merge branch 'develop' into feature/hadrons 2018-05-03 16:20:22 +01:00
bfbf2f1fa0 no threaded stencil benchmark if OpenMP is not supported 2018-05-03 16:20:01 +01:00
dd6b796a01 Hadrons: scalar SU(N) volume factor fix 2018-05-03 16:19:17 +01:00
Vera Guelpers
52a856b4a8 FreeProp module for Hadrons 2018-05-03 12:33:20 +01:00
Vera Guelpers
04190ee7f3 5D free propagator for DWF and boundary conditions for free propagators 2018-05-03 12:31:36 +01:00
Azusa Yamaguchi
587bfcc0f4 Add Timing 2018-05-03 12:10:31 +01:00
Vera Guelpers
2700992ef5 Merge remote-tracking branch 'upstream/feature/hadrons' into feature/hadrons 2018-05-03 10:01:52 +01:00
Peter Boyle
8c658de179 Compressor speed up (a little); streaming stores 2018-05-02 17:52:16 +01:00
Guido Cossu
ba37d51ee9 Debugging the RNG IO 2018-05-02 15:32:06 +01:00
Azusa Yamaguchi
4f4181c54a Merge branch 'feature/staggered-comms-compute' of https://github.com/paboyle/Grid into feature/staggered-comms-compute 2018-05-02 14:59:13 +01:00
Guido Cossu
4d4ac2517b Adding Scalar field theory example for Scidac format 2018-05-02 14:36:32 +01:00
Guido Cossu
e568c24d1d Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-05-02 14:29:25 +01:00
Guido Cossu
b458326744 Checkpointer module update 2018-05-02 14:29:22 +01:00
Guido Cossu
6e7d5e2243 HMC: added Scidac checkpointer and support for metadata 2018-05-02 14:28:59 +01:00
Azusa Yamaguchi
b35169f1dd MultiShift for Staggered 2018-05-02 14:22:37 +01:00
Azusa Yamaguchi
441ad7498d add Iterative counter 2018-05-02 14:21:30 +01:00
Peter Boyle
6f6c5c549a Split off gparity 2018-05-02 14:11:23 +01:00
Peter Boyle
1584e17b54 Revert to fast versoin 2018-05-02 14:10:55 +01:00
Peter Boyle
12982a4455 Hypercube optimisation 2018-05-02 14:10:21 +01:00
Peter Boyle
172f412102 shmget reintroduce 2018-05-02 14:07:41 +01:00
Peter Boyle
a64497265d TIming 2018-05-02 14:07:28 +01:00
ca639c195f Merge branch 'develop' into feature/hadrons 2018-05-01 14:07:32 +01:00
edc28dcfbf Hadrons: scalar SU(N) 2-pt fix 2018-05-01 14:02:31 +01:00
Peter Boyle
c45f24a1b5 Improvements for tesseract 2018-04-30 21:50:00 +01:00
Dr Peter Boyle
aaf37ee4d7 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-27 11:45:13 +01:00
Dr Peter Boyle
1dddd17e3c Benchmark improvements from tesseract 2018-04-27 11:44:46 +01:00
paboyle
661f1d3e8e Merge branch 'release/0.8.0' into develop 2018-04-27 11:22:33 +01:00
paboyle
edcf9b9293 Merge branch 'release/0.8.0' 2018-04-27 11:13:19 +01:00
paboyle
fe6860b4dd Update with LIME library guard 2018-04-27 08:57:34 +01:00
paboyle
d6406b13e1 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-27 07:52:56 +01:00
paboyle
e369d7306d Rename 2018-04-27 07:51:44 +01:00
paboyle
9f8d63e104 Roll over version 2018-04-27 07:51:12 +01:00
paboyle
9b0240d101 Hot start test 2018-04-27 07:50:51 +01:00
paboyle
b27f0e5a53 Control over IO 2018-04-27 07:50:15 +01:00
paboyle
75e4483407 Stronger convergence test 2018-04-27 07:49:57 +01:00
Guido Cossu
0734e9ddd4 Debugging Scatter_plane_simple 2018-04-27 14:39:01 +09:00
paboyle
809b1cdd58 Bug fix for MPI running ; introduced last night 2018-04-27 05:19:10 +01:00
paboyle
1be8089604 Clean compile 2018-04-26 23:42:45 +01:00
paboyle
3e0eff6468 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-26 23:00:46 +01:00
paboyle
7ecc47ac89 Quenched test compile 2018-04-26 23:00:28 +01:00
paboyle
e9f1ac09de static 2018-04-26 23:00:08 +01:00
Peter Boyle
fa0d8feff4 Performance of CovariantCshift now non-embarrassing. 2018-04-26 17:56:27 +01:00
49b8501fd4 Merge branch 'develop' into feature/hadrons 2018-04-26 17:33:50 +01:00
d47484717e Hadrons: scalar SU(N) result handling improvement 2018-04-26 17:32:37 +01:00
Peter Boyle
05b44aef6b Merge branch 'develop' of https://github.com/paboyle/Grid into develop
Conflicts:
	benchmarks/Benchmark_su3.cc
2018-04-26 15:38:49 +01:00
Peter Boyle
03e9832efa Use macros for bare openmp 2018-04-26 14:50:02 +01:00
Peter Boyle
28a375d35d Force static 2018-04-26 14:49:42 +01:00
Peter Boyle
3b06381745 Guard bare openmp statemetn with ifdef 2018-04-26 14:48:57 +01:00
Peter Boyle
91a0a3f820 Improvement 2018-04-26 14:48:35 +01:00
Peter Boyle
8f44c799a6 Saving the benchmarking tests for Cshift 2018-04-26 14:48:03 +01:00
Azusa Yamaguchi
96272f3841 Merge staggered fix linear operator and reduction 2018-04-26 10:33:19 +01:00
Azusa Yamaguchi
5c936d88a0 Merge branch 'feature/staggered-comms-compute' of https://github.com/paboyle/Grid into feature/staggered-comms-compute 2018-04-26 10:18:37 +01:00
Azusa Yamaguchi
1c64ee926e Faster staggered operator with m^2 term trivial used 2018-04-26 10:17:49 +01:00
Azusa Yamaguchi
2cbb72a81c Provide info if EE term is trivial (m^2 factor)
Better timing in staggered 4d case
2018-04-26 10:10:07 +01:00
Azusa Yamaguchi
31d83ee046 Enable special treatment of constEE cases 2018-04-26 10:08:46 +01:00
Azusa Yamaguchi
a9e8758a01 Improvements to staggered tests timings 2018-04-26 10:08:05 +01:00
Azusa Yamaguchi
3e125c5b61 Faster linalg on CG optimised against staggered
Sum overhead is bigger for staggered
2018-04-26 10:07:19 +01:00
Azusa Yamaguchi
eac6ec4b5e Faster reductions, important on single node staggered 2018-04-26 10:03:57 +01:00
Azusa Yamaguchi
213f8db6a2 Microsecond resultion 2018-04-26 10:01:39 +01:00
Guido Cossu
6358f35b7e Debug of previous commit 2018-04-26 14:18:11 +09:00
Guido Cossu
43f5a0df50 More timers in the integrator 2018-04-26 12:01:56 +09:00
Guido Cossu
c897878776 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-26 11:31:57 +09:00
cc6eb51e3e Hadrons: macro refactoring for library portability 2018-04-25 16:49:14 +01:00
Vera Guelpers
507009089b Merge remote-tracking branch 'upstream/feature/hadrons' into feature/hadrons 2018-04-25 09:36:39 +01:00
paboyle
2baf193031 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-25 00:14:03 +01:00
paboyle
362ba0443a Cshift updates 2018-04-25 00:12:11 +01:00
paboyle
276a2353df Move constructor 2018-04-25 00:11:07 +01:00
b234784c8e Hadrons: scalar SU(N) takes operator pairs now 2018-04-24 19:52:12 +01:00
6ea2a8b7ca Hadrons: scheduler shows starting value 2018-04-24 19:51:47 +01:00
c1d0359aaa Hadrons: scalar SU(N) kinetic term saves trace 2018-04-24 19:51:22 +01:00
047ee4ad0b Hadrons: scalar SU(N) cleanup 2018-04-24 19:50:58 +01:00
a13106da0c Hadrons: scalar SU(N) gradient 2018-04-24 19:50:30 +01:00
75113e6523 Hadrons: Scalar SU(N) variable name update 2018-04-24 19:49:27 +01:00
325c73d051 Hadrons: module template update 2018-04-24 19:48:54 +01:00
b25a59e95e Hadrons: mitigation of GCC/Intel compiler bug not generating defaulted destructors 2018-04-24 17:20:25 +01:00
Guido Cossu
c5b9147b53 Correction of a minor bug in the su3 benchmark 2018-04-24 08:03:57 -07:00
Guido Cossu
64ac815fd9 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-24 17:27:38 +09:00
Guido Cossu
a1be533329 Corrected Flop count in Benchmark su3 and expanded the Wilson flow output 2018-04-24 01:19:53 -07:00
7c4533797f Hadrons: scalar SU(N) EMT improvement term optional 2018-04-23 22:46:39 +01:00
af84fd65bb Hadrons: missing dependency message improvement 2018-04-23 22:46:17 +01:00
Dan H
1a2613086a Fix print message. 2018-04-23 15:42:12 -04:00
Dan H
4f110c09a5 Add printing of whether there are unstaged changes in the git hash print. 2018-04-23 15:38:23 -04:00
6764362237 Hadrons: automatic directory creation fix 2018-04-23 18:45:39 +01:00
2fa2b0e0b1 Hadrons: Application header does not include all the modules 2018-04-23 17:57:17 +01:00
b61292f735 Hadrons: recursive mkdir function 2018-04-23 17:36:43 +01:00
ce7720e221 Hadrons: copyright update 2018-04-23 17:36:20 +01:00
853a5528dc Hadrons: template modules compilation optimisation 2018-04-23 17:35:01 +01:00
169f405c9c Hadrons: tests repaired 2018-04-23 12:48:34 +01:00
c6125b01ce Hadrons: Error and Warning channels always on 2018-04-23 12:48:17 +01:00
b0b5b34bff Hadrons: custom abort with module trace 2018-04-23 12:48:00 +01:00
1c9722357d Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/qcd/action/fermion/FermionOperator.h
2018-04-20 17:15:21 +01:00
141da3ae71 function to get tensor dimensions 2018-04-20 17:13:34 +01:00
94edf9cf8b HDF5: direct access to group for custom operations 2018-04-20 17:13:21 +01:00
c11a3ca0a7 vectorise/unvectorise in reverse order 2018-04-20 17:13:04 +01:00
paboyle
870b1a85ae Think I have the physical prop interface to CF and PF overlap right, but need a strong check/regression.
Only support Hw overlap, not Ht for now. Ht needs a new Dminus implemented.
2018-04-18 14:17:49 +01:00
paboyle
b5510427f9 physical fermion interface, cshift benchmark in SU3. 2018-04-18 01:43:29 +01:00
Guido Cossu
26ed65c8f8 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-17 12:03:32 +01:00
paboyle
f7f043d8cf Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-04-17 10:57:18 +01:00
paboyle
ddcaa6ad29 Master does header on Nersc 2018-04-17 10:48:33 +01:00
334da7f452 Hadrons: can trace which module is throwing an error 2018-04-13 18:45:31 +02:00
4669ecd4ba Hadrons: build improvement 2018-04-13 18:21:18 +02:00
4573b34cac Hadrons: scalar SU(N) 2-pt functions with momentum 2018-04-13 18:21:00 +02:00
17f57e85d1 Merge branch 'develop' into feature/hadrons 2018-04-06 22:53:11 +01:00
c8d4d184ee XML push fragment fix 2018-04-06 22:53:01 +01:00
17f27b1ebd Hadrons: eigenpack writer fix 2018-04-06 22:52:11 +01:00
a16bbecb8a Hadrons: more feedback 2018-04-06 19:38:20 +01:00
7c9b0dd842 Hadrons: top level name for eigenpack metadata 2018-04-06 19:32:22 +01:00
6b7228b3e6 Hadrons: better metadata for eigenpack 2018-04-06 19:29:53 +01:00
f117552334 post-merge fix 2018-04-06 18:38:46 +01:00
a21a160029 Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/serialisation/XmlIO.cc
2018-04-06 18:34:19 +01:00
1569a374a9 XML interface polish, XML fragments can be pushed into a writer 2018-04-06 18:32:14 +01:00
eddf023b8a pugixml 1.9 update 2018-04-06 16:17:22 +01:00
6b8ffbe735 Hadrons: genetic minimum value type fix 2018-04-06 15:41:31 +01:00
81050535a5 Hadrons: truncate eigenvalues when loading partial eigenpack 2018-04-06 13:48:58 +01:00
7dcf5c90e3 Hadrons: eigenpack must be referred by solver when used 2018-04-06 13:16:28 +01:00
9ce00f26f9 not special characters in std::vector operator<< 2018-04-04 17:44:56 +01:00
85c253ed4a Test_serialisation MPI fix 2018-04-04 17:19:34 +01:00
ccfc0a5a89 Hadrons: better string representation of module parameters 2018-04-04 17:19:22 +01:00
d3f857b1c9 Hadrons: proper metadata for eigenpacks 2018-04-04 16:36:37 +01:00
fb62035aa0 Hadrons: do not create RB coarse grids 2018-04-03 19:49:11 +01:00
0260bc7705 Hadrons: eigen pack writing only for boss node 2018-04-03 18:55:46 +01:00
68e6a58f12 Hadrons: several Lanczos fixes and improvements 2018-04-03 17:42:21 +01:00
Daniel Richtmann
73ced656eb Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-04-03 17:51:11 +02:00
Daniel Richtmann
f69008edf1 WilsonMG: Add functionality to report timings to MG preconditioner 2018-04-03 17:26:49 +02:00
Daniel Richtmann
57a49ed22f WilsonMG: Read in MG parameters from xml in test 2018-04-03 16:03:11 +02:00
Daniel Richtmann
ff6413a764 WilsonMG: Make number of levels chooseable at runtime
I don't like this solution though :(
2018-04-03 15:57:33 +02:00
Daniel Richtmann
2530bfed01 WilsonMG: Move params instance from global scope to test main function 2018-04-03 14:50:48 +02:00
640515e3d8 Merge branch 'develop' into feature/hadrons 2018-03-30 17:43:49 +01:00
paboyle
f089bf5629 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-03-30 16:17:26 +01:00
paboyle
276f113f28 IO uses master boss node for metadata. 2018-03-30 16:17:05 +01:00
97c579f637 Merge branch 'develop' into feature/hadrons 2018-03-30 16:04:44 +01:00
a13c109111 deterministic initialisation of field metadata 2018-03-30 16:03:01 +01:00
paboyle
ab6afd18ac Still compile if no LIME 2018-03-30 13:39:20 +01:00
paboyle
5bde64d48b Barrier required in parallel when we use ftell 2018-03-30 12:41:30 +01:00
paboyle
2f5add4d5f Creation of file 2018-03-30 12:30:58 +01:00
c5a885dcd6 I/O benchmark 2018-03-29 19:57:41 +01:00
Daniel Richtmann
74f79c5ac7 Revert "Add function to return full type as std::string"
This reverts commit 1cb745c8dc.
2018-03-29 12:03:50 +02:00
Daniel Richtmann
58c30c0cb1 WilsonMG: Add conformability checks in MG preconditioner 2018-03-28 13:24:39 +02:00
Daniel Richtmann
917a92118a WilsonMG: Move operator test to MG testing routine 2018-03-28 12:19:25 +02:00
a4d8512fb8 Revert "Lattice serialisation, just HDF5 for the moment"
This reverts commit 8a0cf0194f.
2018-03-27 17:55:42 +01:00
5ec903044d Serial IO code cleaning for std:: convention 2018-03-27 17:11:50 +01:00
Daniel Richtmann
04f9cf088d WilsonMG: Add more parameters to MultiGridParams struct 2018-03-27 17:13:11 +02:00
Daniel Richtmann
99107038f9 WilsonMG: Rationalize the level counting strategy 2018-03-27 17:06:33 +02:00
8a0cf0194f Lattice serialisation, just HDF5 for the moment 2018-03-26 19:16:16 +01:00
Daniel Richtmann
b78456bdf4 WilsonMG: Get rid of explicit include of GCR header 2018-03-26 15:41:53 +02:00
Daniel Richtmann
08543b6b11 WilsonMG: Provide a switch between V- and K-cycle 2018-03-26 15:37:17 +02:00
Daniel Richtmann
63ba33371f WilsonMG: Some minor refactoring 2018-03-26 15:34:53 +02:00
Daniel Richtmann
683a7d2ddd WilsonMG: Move comment to make clang-format happy 2018-03-26 14:59:40 +02:00
1c680d4b7a Merge branch 'develop' into feature/hadrons 2018-03-26 13:52:44 +01:00
Daniel Richtmann
afdcbf79d1 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-03-23 21:13:50 +01:00
Daniel Richtmann
3c3ec4e267 WilsonMG: Move tests for Wilson & WilsonClover into the same file 2018-03-23 21:12:27 +01:00
Daniel Richtmann
bbe1d5b49e WilsonMG: Temporarily use GMRES in construction of basis vectors
This can go back to CG once Mdag in CoarsenedMatrix works.
2018-03-23 20:02:27 +01:00
Daniel Richtmann
0f6009a29f WilsonMG: Huge refactor into something that could be considered an algorithm 2018-03-23 19:55:43 +01:00
Daniel Richtmann
1cfed3de7c WilsonMG: Add new logger for MG 2018-03-23 19:55:16 +01:00
Guido Cossu
c9c073eee4 Changes in messages in test dwf mixedprec 2018-03-23 11:27:56 +00:00
Guido Cossu
f290b2e908 Fix to pass CI tests 2018-03-23 11:14:23 +00:00
Guido Cossu
5f8225461b Fencing mixedcg test propagator write. LIME is still optional in Grid 2018-03-23 10:37:58 +00:00
Daniel Richtmann
edbc0d49d7 WilsonMG: Get rid of explicit GridTypeMappers in CoarsenedMatrix 2018-03-22 16:38:24 +01:00
e9323460c7 Merge branch 'develop' into feature/hadrons 2018-03-22 10:48:37 +00:00
20e186a1e0 Merge pull request #158 from goracle/dev-pull
Make compilation faster by moving print of git hash.
2018-03-22 10:45:17 +00:00
Peter Boyle
6ef4af989b Merge pull request #159 from goracle/dev-precsafe
Add dimension check to precisionChange.
2018-03-22 10:41:53 +00:00
Dan H
ccde8b817f Add dimension check to precisionChange. 2018-03-21 20:58:04 -04:00
Dan H
68168bf72d Revert "Add dimension match check to precisionChange."
This reverts commit 8f601d9b39.
2018-03-21 20:51:38 -04:00
Dan H
e93d0feaa7 Merge branch 'dev-pull' of github.com:goracle/Grid into dev-pull 2018-03-21 20:39:30 -04:00
Dan H
8f601d9b39 Add dimension match check to precisionChange. 2018-03-21 20:38:19 -04:00
paboyle
5436308e4a Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-03-21 14:26:29 +00:00
paboyle
07fe7d0cbe Save file in current dir; print checksums 2018-03-21 14:26:04 +00:00
Guido Cossu
60b57706c4 Small bug fix in the shm file names 2018-03-21 13:57:30 +00:00
James Harrison
58c2f60b69 Merge branch 'feature/hadrons' into feature/qed-fvol 2018-03-20 20:19:18 +00:00
James Harrison
bfa3a7b3b0 Merge branch 'feature/hadrons' into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/Modules/MGauge/StochEm.cc
#	extras/Hadrons/modules.inc
2018-03-20 20:17:59 +00:00
paboyle
954e38bebe Put a username in the path 2018-03-20 18:16:15 +00:00
paboyle
b1a38bde7a Extra test for Gparity with plaquette action 2018-03-20 18:01:32 +00:00
Guido Cossu
2581875edc Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-03-19 18:00:08 +00:00
Guido Cossu
f212b0a963 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2018-03-19 17:57:13 +00:00
Guido Cossu
62702dbcb8 Fixing bug in the Point sink causing NaNs 2018-03-19 17:56:53 +00:00
41d6cab033 Merge branch 'develop' into feature/hadrons 2018-03-19 13:30:21 +00:00
5a31e747c9 Merge commit 'd5ce66f6ab2c44a12def7b6d26df80d6e646b1fb' into feature/hadrons 2018-03-19 13:19:09 +00:00
cbc73a3fd1 Hadrons: CG guesser fix 2018-03-19 13:11:38 +00:00
Peter Boyle
6c6d43eb4e Drop RB on coarse space ; that was a mistake 2018-03-17 09:35:01 +00:00
Peter Boyle
e1dcfd3553 typo fix 2018-03-16 23:10:47 +00:00
Peter Boyle
888838473a 4GB clean the offsets in parallel IO for multifile records 2018-03-16 21:54:56 +00:00
Peter Boyle
01568b0e62 Add a new SHM option 2018-03-16 21:54:28 +00:00
Peter Boyle
d5ce66f6ab Extra SHM option 2018-03-16 21:37:03 +00:00
Guido Cossu
d86936a3de Eliminating deprecated lex_sites 2018-03-16 12:26:39 +00:00
Daniel Richtmann
ee5cf6c8c5 WilsonMG: Some minor changes to GMRES implementations 2018-03-16 13:10:45 +01:00
d516938707 Hadrons: eigen packs I/O and deflation interface 2018-03-14 14:55:47 +00:00
72344d1418 Hadrons: change default Schur convention to DiagTwo 2018-03-13 17:10:54 +00:00
7ecf6ab38b Merge branch 'develop' into feature/hadrons 2018-03-13 16:11:59 +00:00
2d4d70d3ec Hadrons: LCL fixes 2018-03-13 16:10:36 +00:00
78f8d47528 Hadrons: environment access to derived objects 2018-03-13 16:10:16 +00:00
b85f987b0b Hadrons: error message channel verbose during profiling 2018-03-13 16:09:22 +00:00
f57afe2079 Hadrons: much cleaner eigenpack implementation, to be tested 2018-03-13 13:51:09 +00:00
Dan H
0fb84fa34b Make compilation faster by moving print of git hash. 2018-03-12 17:03:48 -04:00
Vera Guelpers
8462bbfe63 Gamma input for meson contraction with round brackets 2018-03-12 18:02:12 +00:00
229977c955 Hadrons: minor memory fix for ShiftProbe module 2018-03-09 21:56:27 +00:00
e485a07133 Hadrons: garbage collector debug output 2018-03-09 21:56:01 +00:00
paboyle
0880747edb Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-03-09 20:44:42 +00:00
paboyle
b801e1fcd6 fclose should be called through a call to close() 2018-03-09 20:44:10 +00:00
70ec2faa98 Hadrons: maximum iteration specified for tests and error if 0 2018-03-09 19:53:55 +00:00
Daniel Richtmann
a66cecc509 WilsonMG: Fix invalid call to MR ctor 2018-03-09 17:34:29 +01:00
Daniel Richtmann
0f6cdf3d4b WilsonMG: Implement missing parts of CoarsenedMatrix 2018-03-09 16:56:16 +01:00
Daniel Richtmann
1e63b73a14 WilsonMG: Some cleanup/formatting 2018-03-09 16:50:19 +01:00
2f849ee252 declaration fix 2018-03-08 23:34:00 +00:00
bb6ed44339 Merge branch 'develop' into feature/hadrons 2018-03-08 23:09:28 +00:00
360cface33 Grid tensor serialisation fully implemented and tested 2018-03-08 19:12:03 +00:00
Azusa Yamaguchi
80302e95a8 MILC Interface 2018-03-08 15:34:03 +00:00
caf2f6b274 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-03-08 09:52:25 +00:00
c49be8988b Grid tensor serialisation 2018-03-08 09:51:22 +00:00
971c2379bd std::vector to tensor conversion + test units 2018-03-08 09:50:39 +00:00
Guido Cossu
94b0d66e4c Merge pull request #157 from goracle/dev-pull
Add print of the current git hash on Grid init.
2018-03-08 16:09:28 +09:00
Dan H
5e8af396fd Add print of the current git hash on Grid init. 2018-03-07 13:11:51 -05:00
9942723189 Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/serialisation/BaseIO.h
2018-03-07 15:22:16 +00:00
a7d19dbb64 Merge branch 'develop' of github.com:paboyle/Grid into develop
# Conflicts:
#	lib/serialisation/BaseIO.h
2018-03-07 15:13:54 +00:00
90dbe03e17 Conversion of Grid tensors to std::vector made more elegant, also pair syntax changed to (x y) to avoid issues with JSON/XML 2018-03-07 15:12:32 +00:00
8b14096990 Conversion of Grid tensors to std::vector made more elegant, also pair syntax changed to (x y) to avoid issues with JSON/XML 2018-03-07 15:12:18 +00:00
Azusa Yamaguchi
b938202081 Overlapped Comm for Wilson DhopInternal 2018-03-07 14:08:43 +00:00
e79ef469ac Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/serialisation/BaseIO.h
2018-03-06 19:25:51 +00:00
485c5db0fe conversion of Grid tensors to nested std::vector in preparation for tensor serialisation 2018-03-06 19:22:03 +00:00
James Harrison
c793947209 Add overloaded Photon constructors, with default parameters for IR improvements and infinite-volume G(x=0). 2018-03-06 16:27:26 +00:00
3e9ee053a1 Merge branch 'develop' into feature/hadrons 2018-03-05 20:01:38 +00:00
dda6c69d5b Hadrons: scalar SU(N) shift probes 2018-03-05 20:00:29 +00:00
cd51b9af99 Torture yourself with namespace lookup 101 2018-03-05 19:58:13 +00:00
paboyle
c399c2b44d Guido broke the charge conjugate plaquette action with premature optimisation.
This sector of the code does not matter for anything other than Guido's quenched HMC
studies, and any plaq specific optimisations should be retained in a private branch
instead of destroying the code simplicity.
2018-03-05 12:55:41 +00:00
paboyle
af7de7a294 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-03-05 12:22:41 +00:00
paboyle
1dc86efd26 Finalize protection 2018-03-05 12:22:18 +00:00
f32555dcc5 Merge branch 'develop' into feature/hadrons 2018-03-03 15:31:52 +00:00
30391cb2eb Merge pull request #155 from fionnoh/develop
Some changes needed for deflation interface
2018-03-03 13:43:59 +00:00
e93c883470 Hadrons: basic GraphViz visualisation 2018-03-03 13:42:36 +00:00
Fionn O hOgain
2e88408f5c Some changes needed for deflation interface 2018-03-02 22:27:41 +00:00
fcac5c0772 Hadrons: scalar SU(N) fixes 2018-03-02 19:20:23 +00:00
90f4000935 Hadrons: scheduler debug less verbose 2018-03-02 19:20:01 +00:00
480708b9a0 Hadrons: safer error handling for HadronsXmlRun 2018-03-02 19:19:37 +00:00
c4baf876d4 Hadrons: graph consistency check 2018-03-02 18:40:18 +00:00
2f4dac3531 Hadrons: legal update 2018-03-02 18:10:58 +00:00
3ec6890850 Merge branch 'feature/hadrons' of github.com:paboyle/Grid into feature/hadrons 2018-03-02 17:56:08 +00:00
018801d973 Hadrons: legal update 2018-03-02 17:56:00 +00:00
1d83521daa Hadrons: scalar SU(N) EMT 2018-03-02 17:55:18 +00:00
fc5670c6a4 Merge pull request #151 from guelpers/feature/hadrons
Feature/hadrons
2018-03-02 17:54:43 +00:00
d9c435e282 Hadrons: Scalar SU(N) transverse projection module 2018-03-02 17:35:12 +00:00
614a0e8277 Hadrons: Scalar SU(N) utility functions 2018-03-02 17:34:23 +00:00
Vera Guelpers
aaf39222c3 update my fork and fixed conflicts 2018-03-02 17:08:08 +00:00
550142bd6a Hadrons: more code cleaning 2018-03-02 14:30:45 +00:00
c0a929aef7 Hadrons: code cleaning 2018-03-02 14:29:54 +00:00
37fe944224 Hadrons: scalar kinetic term 2018-03-02 14:14:11 +00:00
Vera Guelpers
315a42843f changes requested for the pull request 2018-03-02 11:47:38 +00:00
83a101db83 Hadrons: more LCL fixes 2018-03-02 11:05:02 +00:00
c4274e1660 Hadrons: LCL cleaning 2018-03-02 10:18:33 +00:00
ba6db55cb0 Hadrons: reverse last commit 2018-03-01 23:30:58 +00:00
e5ea84d531 Hadrons: LCL: orthogonalise coarse evec 2018-03-01 19:33:11 +00:00
15767a1491 Hadrons: LCL fine convergence test 2018-03-01 18:04:08 +00:00
4d2a32ae7a Hadrons: z-Mobius message fix 2018-03-01 18:03:44 +00:00
5b937e3644 Hadrons: VM memory profiling fix 2018-03-01 17:28:38 +00:00
e418b044f7 Hadrons: code cleaning 2018-03-01 12:57:28 +00:00
b8b05f143f Hadrons: Lanczos more conservative type names 2018-03-01 12:53:16 +00:00
6ec42b4b82 LCL: external storage fix 2018-03-01 12:27:29 +00:00
abb7d4d2f5 Hadrons: z-Mobius action 2018-02-27 19:32:19 +00:00
16ebbfff29 Hadrons: Schur convention globally defined through a macro 2018-02-27 18:45:23 +00:00
4828226095 Hadrons: prettier log 2018-02-27 14:43:51 +00:00
8a049f27b8 Hadrons: Lanczos code improvement 2018-02-27 13:46:59 +00:00
43578a3eb4 Hadrons: copyright update 2018-02-26 19:24:19 +00:00
fdbd42e542 Hadrons: first implementation of local coherence Lanczos 2018-02-26 19:22:43 +00:00
e7e4cee4f3 Merge branch 'develop' into feature/hadrons 2018-02-26 15:05:05 +00:00
James Harrison
ec3954ff5f QedFVol: Add input parameter G(x=0) for infinite-volume photon 2018-02-23 14:53:05 +00:00
Azusa Yamaguchi
0f468e2179 OverlappedComm for Staggered 5D and 4D. 2018-02-22 12:50:09 +00:00
James Harrison
8e61286741 Merge branch 'develop' into feature/qed-fvol 2018-02-20 15:33:35 +00:00
paboyle
4790e99817 Extra communicator free that I had missed.
Hard to audit them all as this is complex
2018-02-20 15:12:31 +00:00
paboyle
2dd63aa7a4 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-02-20 14:29:26 +00:00
paboyle
559a501140 Deflation interface for solvers 2018-02-20 14:29:08 +00:00
paboyle
945684c470 updates for deflation in the RB solver 2018-02-20 14:28:38 +00:00
Christopher Kelly
e30a80a234 Relaxed constraints on MPI thread mode when not using multiple comms threads 2018-02-15 17:13:36 +00:00
James Harrison
69e4ecc1d2 QedFVol: Fix single precision build error 2018-02-14 17:37:18 +00:00
James Harrison
5f483df16b Merge branch 'develop' into feature/qed-fvol 2018-02-14 16:35:04 +00:00
James Harrison
4680a977c3 QedFVol: set infinite-volume photon propagator to 1 at x=0,
so that momentum-spage photon propagator is non-negative.
Need to check whether this is sufficient for all volumes.
2018-02-14 16:30:09 +00:00
Vera Guelpers
de42456171 updated my fork and conflicts fixed 2018-02-14 13:57:56 +00:00
Vera Guelpers
d55212c998 restructure SeqConservedCurrent for DWF to need less memory 2018-02-14 10:45:18 +00:00
paboyle
c96483e3bd Whitespace only change 2018-02-13 11:39:07 +00:00
Vera Guelpers
c6e1f64573 Test for QED 2018-02-13 09:30:23 +00:00
paboyle
ae31a6a760 Move deflate to right class 2018-02-13 02:11:37 +00:00
paboyle
dd8f2a64fe INterface to suit hadrons on Lanczos 2018-02-13 02:08:49 +00:00
James Harrison
724cf02d4a QedFVol: Implement infinite-volume photon 2018-02-12 17:18:10 +00:00
paboyle
7b8b2731e7 Conj error for complex coeffs 2018-02-12 16:06:31 +00:00
paboyle
237a8ec918 Communicator leak fixed (I think) 2018-02-12 13:27:20 +00:00
Vera Guelpers
49a0ae73eb Insertion of photon field in seqential conserved current 2018-02-12 09:36:08 +00:00
Daniel Richtmann
6ab60c5b70 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-02-08 23:59:07 +01:00
Daniel Richtmann
8c692b7ffd WilsonMG: Comment assertion on hermiticity of coarse operator for now
TODO: Think of a way to not break dwf_hdcr by doing that. It's only an assertion
but it still interferes with it.
2018-02-08 23:55:05 +01:00
Daniel Richtmann
2976132bdd Add first version of multigrid for wilson clover analogous to wilson one
Just like the wilson one, this algorithm

• is currently only a 2-level method since I don't have correct implementations
  for Mdir and Mdiag in CoarsenedMatrix yet (needed for further coarsening)
• needs levelization and refactoring into a proper algorithm
2018-02-08 23:52:10 +01:00
Daniel Richtmann
48177f2f2d Add tests for all MR|GMRES solvers with wilson clover action 2018-02-08 23:52:09 +01:00
Daniel Richtmann
c4ce70a821 WilsonMG: Major cleanup 2018-02-08 23:52:08 +01:00
James Harrison
315f1146cd QedFVol: Fix output of VPCounterTerms module. 2018-02-08 20:40:45 +00:00
Daniel Richtmann
a3e009ba54 Add tests for CAGMRES solvers with staggered action 2018-02-08 17:46:28 +01:00
Daniel Richtmann
eb7cf239d9 Print warning messages in CAGMRES solvers
Currently, the implementation of these algorithms doesn't differ from their non
communication-avoiding versions.
2018-02-08 17:43:47 +01:00
Daniel Richtmann
13ae371ef8 Make solver parameters match in all MR|GMRES solver tests 2018-02-08 17:33:10 +01:00
Daniel Richtmann
9f79a87102 Fix bugs in Flexible GMRES solvers
Somehow I got the left and right-preconditioned versions of GMRES mixed up. As
of now this is right-preconditioned version, which is what we want.
2018-02-08 16:00:31 +01:00
Daniel Richtmann
4ded1ceeb0 Make GMRES solvers perform no more than MaxIterations steps
I noticed that it was possible to overrun this number.
2018-02-08 15:29:44 +01:00
James Harrison
9f202782c5 QedFVol: Change format of scalar VP output files, and save diagrams without charge factors for consistency with ChargedProp module. 2018-02-07 20:31:50 +00:00
Daniel Richtmann
8bc12e0ce1 Remove superfluous comments in MR solver 2018-02-07 18:09:09 +01:00
Daniel Richtmann
cc2f00f827 Remove test for MR solver with dwf action as it doesn't converge 2018-02-07 18:09:08 +01:00
Daniel Richtmann
cd61e2e6d6 Increase max iterations in test of MR solver with staggered action 2018-02-07 18:09:07 +01:00
Daniel Richtmann
323ed1a588 Add an overrelaxation parameter to the MR solver 2018-02-07 18:09:06 +01:00
Daniel Richtmann
68c66d2e4b Remove empty line in output of *Residual* solvers 2018-02-07 18:08:56 +01:00
Daniel Richtmann
1671adfd49 WilsonMG: Add some tests for linear operators 2018-02-07 17:15:22 +01:00
James Harrison
594a262dcc QedFVol: Remove redundant file Communicator_mpi.cc 2018-02-07 11:37:01 +00:00
James Harrison
7f8ca54285 Merge branch 'develop' into feature/qed-fvol 2018-02-07 10:11:00 +00:00
James Harrison
c5b23c367e QedFVol: Fix segmentation fault when multiple propagator modules are used. 2018-02-05 11:46:33 +00:00
Vera Guelpers
b6fe03eb26 BugFix: Now the stochatic EM potential weight is generated when calling for the first time 2018-02-02 15:29:38 +00:00
James Harrison
f37ed4958b Implement IR improvement, with coefficients set in input file. 2018-02-02 11:56:51 +00:00
Peter Boyle
896f3a8002 Fix to MPI for Hokusai system 2018-02-01 18:51:51 +00:00
James Harrison
5f85473d6b QedFVol: Move Projection class into Result class 2018-02-01 16:16:13 +00:00
Daniel Richtmann
871649238c WilsonMG: Stricter naming for linear operators 2018-02-01 14:43:08 +01:00
James Harrison
ac3b0ebc58 QedFVol: New structure for ChargedProp output files 2018-02-01 12:31:32 +00:00
Daniel Richtmann
7c86d2085b WilsonMG: Some minor cleanup 2018-02-01 12:24:16 +01:00
Daniel Richtmann
9292be0b69 WilsonMG: Add check for Mdiag + Σ Mdir == M
Need to test my implementations of CoarsenedMatrix::Mdiag &
CoarsenedMatrix::Mdir.
2018-01-31 14:03:30 +01:00
Guido Cossu
f0fcdf75b5 Update README.md 2018-01-30 12:44:20 +01:00
Guido Cossu
53bffb83d4 Updating README with new SKL target 2018-01-30 12:42:36 +01:00
Daniel Richtmann
10141f90c9 WilsonMG: Rename test file 2018-01-30 10:25:09 +01:00
Guido Cossu
cd44e851f1 Fixing compilation error in FundtoHirep 2018-01-30 06:04:30 +01:00
Daniel Richtmann
a414430817 Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg 2018-01-29 18:32:31 +01:00
Daniel Richtmann
f20728baa9 WilsonMG: Some further steps towards a three level method
Currently this is very "manual" as we are still testing stuff. Will refactor
and make it an algorithm once everything works.

What currently does work:

  - All tests in MultiGridPreconditioner::runChecks for the first coarse grid
  - The tests for the intergrid operators going from the first to the second
    coarse grid
    - (1 - P R) v   == 0
    - (1 - R P) v_c == 0
  - A full solve with VPGCR and a two-level MG preconditioner

What hinders the rest of the tests from passing with a three-level method is the
absence of implementations of CoarsenedMatrix::Mdir and CoarsenedMatrix::Mdiag.
2018-01-29 18:29:49 +01:00
Daniel Richtmann
d2e68c4355 WilsonMG: Perform some minor cleanup 2018-01-29 18:07:10 +01:00
Daniel Richtmann
1cb745c8dc Add function to return full type as std::string
Also works for nested templates. I find it useful for debugging.
Possible usage:

std::cout << "getTypename<AType>() = " << getTypename<Atype>() << std::endl;
std::cout << "getTypename<decltype(AnInstance)>() = " << getTypename<decltype(AnInstance)>() << std::endl;
2018-01-29 17:39:19 +01:00
Daniel Richtmann
faf4278019 Use 2 passes of GS in coarse operator construction 2018-01-29 17:21:42 +01:00
Daniel Richtmann
194e4b94bb Make MG checking function work level-wise 2018-01-29 17:18:20 +01:00
Daniel Richtmann
bfc1411c1f Use more iterations in subspace creation 2018-01-29 17:11:29 +01:00
Daniel Richtmann
161637e573 Turn on orthogonality checking temporarily 2018-01-29 17:10:05 +01:00
Guido Cossu
fb24e3a7d2 Adding utilities for perf profiling 2018-01-29 11:11:45 +01:00
Guido Cossu
655a69259a Added support for GCC compilation for Skylake AVX512 2018-01-28 17:02:46 +01:00
James Harrison
4e0cf0cc28 QedFVol: Fix bug in ScalarVP.cc due to double use of temporary object. Still getting mpi3 errors when configured with enable-comms=mpi[-auto]. 2018-01-27 15:15:25 +00:00
Guido Cossu
507c4e9efc Correcting an missing semicolumn in avx512 2018-01-27 10:59:55 +01:00
James Harrison
cdf550845f QedFVol: Fix bugs in StochEm.cc and ChargedProp.cc (still only works without MPI). 2018-01-26 21:25:20 +00:00
James Harrison
3db7a5387b BROKEN: Adapted scalarVP, UnitEm and VPCounterTerms modules to new Hadrons. Currently getting an assertion error from Communicator_mpi3.cc when I try to run. 2018-01-26 16:33:48 +00:00
Guido Cossu
f8a5194c70 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-01-25 13:46:37 +01:00
Guido Cossu
cff3bae155 Adding support for general Nc in the benchmark outputs 2018-01-25 13:46:31 +01:00
James Harrison
90dffc73c8 Merge branch 'feature/hadrons' into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/Modules/MGauge/StochEm.cc
#	extras/Hadrons/Modules/MScalar/ChargedProp.cc
#	extras/Hadrons/Modules/MScalar/ChargedProp.hpp
#	extras/Hadrons/modules.inc
#	lib/communicator/Communicator_mpi.cc
2018-01-24 16:41:44 +00:00
a1151fc734 Hadrons: MPI-safe serial IO 2018-01-23 17:26:50 +00:00
James Harrison
ab3baeb38f Implement contractions and data output in functions; calculate diagrams S, X and 4C separately; output 2E and 2T instead of sunset_shifted, sunset_unshifted, tadpole_shifted, tadpole_unshifted; add comments. 2018-01-23 17:07:45 +00:00
Vera Guelpers
389731d373 changed SeqConservedSummed.hpp to work with new hadrons interface 2018-01-23 10:11:33 +00:00
6e3ce7423e Hadrons: don't display module list at startup (too long) 2018-01-22 20:04:05 +00:00
15f15a7cfd Merge branch 'develop' into feature/hadrons
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/modules.inc
2018-01-22 20:03:36 +00:00
0e5f626226 Hadrons: module for scalar operator divergence 2018-01-22 19:38:19 +00:00
Daniel Richtmann
04f92ccddf WilsonMG: Provide a fix for the previous commit; compiles and runs successfully now
I don't like the solution with the temporary very much though ...
2018-01-22 14:56:48 +01:00
Daniel Richtmann
3b2d805398 WilsonMG: Some first steps towards coarse spin dofs; not compiling yet
A failing conversion from the innermost type (Grid::Simd<...>) to a coarse
scalar (triple iScalar) in blockPromote prohibits this commit from working.
2018-01-22 12:45:51 +01:00
Azusa Yamaguchi
97b9c6f03d No option for interior/exterior split of asm kernels since different directions get interleaved 2018-01-22 11:04:19 +00:00
Azusa Yamaguchi
63982819c6 No option to overlap comms and compute for asm implementation since different directions are interleaved
in the kernels, introducing if else structure would be too painful
2018-01-22 11:03:39 +00:00
Vera Guelpers
6fec507bef merged new hadrons interface 2018-01-22 10:09:20 +00:00
James Harrison
219b3bd34f Remove freeVpTensor object 2018-01-19 17:14:11 +00:00
Daniel Richtmann
9dc885d297 Fix a bug in Wilson MG
The calculation of the lattice size of a second coarse level was incorrect.
2018-01-18 17:02:04 +01:00
Daniel Richtmann
a70c1feecc Remove some unnecessary stuff in Wilson MG 2018-01-18 15:48:28 +01:00
Daniel Richtmann
38328100c9 Implement correctness checks for Wilson MG 2018-01-18 15:43:15 +01:00
Daniel Richtmann
9732519c41 Apply clang-format to Wilson MG
I can provide the configuration file I used if people want that.
2018-01-18 15:14:37 +01:00
Daniel Richtmann
fa4eeb28c4 Save current state in Wilson MG test file 2018-01-17 17:56:34 +01:00
Guido Cossu
b00d2d2c39 Correction of Representations compilation and small compilation error for Intel 17 2018-01-17 13:46:12 +00:00
Guido Cossu
f1b3e21830 Merge branch 'feature/clover' into develop 2018-01-17 10:07:42 +00:00
Guido Cossu
b7f8c5b823 Modify test to merge with the new Lanczos interface 2018-01-12 14:38:27 +00:00
Guido Cossu
3923683e9b Updating the feature/clover branch with the newest Hadron package 2018-01-12 13:35:51 +00:00
Guido Cossu
e199fda9dc Merge pull request #136 from pretidav/feature/clover
Feature/clover
2018-01-12 11:57:08 +00:00
7bb405e790 Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/communicator/Communicator_mpi3_leader.cc
#	lib/communicator/Communicator_shmem.cc
2018-01-11 18:50:15 +00:00
Daniel Richtmann
10f7a17ae4 Make timing in VPGCR more detailed 2018-01-11 13:42:18 +01:00
Daniel Richtmann
26f14d7dd7 Adapt output format of non-herm solvers to the one of VPGCR 2018-01-11 13:36:30 +01:00
ec16eacc6a Hadrons: scalar SU(N) 2-pt function 2018-01-10 22:12:21 +00:00
pretidav
cf858deb16 Lanczos with 2 reps fixed (tobe tested) 2018-01-10 18:43:02 +01:00
David Preti
a3affac963 SU3 restored + output filename for mesons and baryons fixed. 2018-01-10 14:56:54 +01:00
d9d1f43ba2 Hadrons: code cleaning 2018-01-10 11:31:24 +00:00
b7cd721308 Hadrons: scalar SU(N) tr(mag^n) 2018-01-10 11:25:59 +00:00
29f026c375 Hadrons: scalar SU(N) tr(phi^n) 1-pt function 2018-01-10 11:01:03 +00:00
58c7a13d54 Hadrons: result file macro with trajectory number 2018-01-10 10:59:58 +00:00
Azusa Yamaguchi
24162c9ead Staggered overlap comms comput 2018-01-09 13:02:52 +00:00
Daniel Richtmann
73434db636 Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg 2018-01-09 10:43:33 +01:00
paboyle
e564d11687 Allow resize of the shared memory buffers 2018-01-08 15:20:26 +00:00
paboyle
0b2162f375 Clean up 2018-01-08 14:06:53 +00:00
paboyle
5610570182 Synthetic test of lanczos 2018-01-08 11:36:39 +00:00
paboyle
44f65526e0 Simplify communicators 2018-01-08 11:35:43 +00:00
paboyle
43e48542ab Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-01-08 11:34:45 +00:00
paboyle
0b85f1bfc8 Simplify the communicator proliferation: mpi and none. 2018-01-08 11:33:47 +00:00
paboyle
9947cfbf14 Simplify number of communicator cases 2018-01-08 11:33:01 +00:00
paboyle
357badce5e Simplify communicator case proliferation 2018-01-08 11:32:16 +00:00
paboyle
0091eec23a Simplify communicator cases 2018-01-08 11:31:32 +00:00
paboyle
9e9c2962df Simplify comms layer proliferation 2018-01-08 11:30:22 +00:00
paboyle
bda97212a9 Simplify proliferation of comms layers 2018-01-08 11:29:20 +00:00
paboyle
b91282ad46 Simplify comms layer proliferation 2018-01-08 11:28:52 +00:00
paboyle
0a68470f9a Simplify comms layers 2018-01-08 11:28:30 +00:00
paboyle
6ecf280723 Simplify comms layer proliferation 2018-01-08 11:28:04 +00:00
paboyle
7eeab7f995 Simplify comms layers 2018-01-08 11:27:43 +00:00
paboyle
9b32d51cd1 Simplify comms layer proliferatoin 2018-01-08 11:27:14 +00:00
paboyle
7b3ed160aa Rationalise MPI options 2018-01-08 11:26:48 +00:00
paboyle
1a0163f45c Updated to do list 2018-01-08 11:26:11 +00:00
Daniel Richtmann
c6411f8514 Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg 2018-01-08 10:37:10 +01:00
David Preti
9028e278e4 Trying to fix a bug with SU4 mesons (still under investigation) 2018-01-06 15:57:38 +01:00
dd62f2f371 Hadrons: log message fix 2017-12-29 16:58:44 +01:00
0d612039ed Hadrons: prettier Grid logging (non-intrusive) 2017-12-29 16:58:23 +01:00
e8ac75055c Hadrons: binary configuration loader 2017-12-27 14:24:29 +01:00
8b30c5956c Hadrons: copyright update 2017-12-26 14:16:47 +01:00
185da83454 Hadrons: new MIO module namespace, NERSC loader moved there 2017-12-26 14:05:17 +01:00
6718fa8c4f Merge branch 'feature/scalar_adjointFT' into feature/hadrons 2017-12-26 12:59:33 +01:00
pretidav
4ce63af7d5 Working on Hadrons with Hirep. (QCD is set for SU4) 2017-12-22 19:02:07 +01:00
Daniel Richtmann
6cf635d61c Remove some old code in Wilson MG 2017-12-22 13:20:09 +01:00
Daniel Richtmann
39558cce52 Multiply TVs in Wilson MG with G5 instead of G5R5 2017-12-22 13:07:56 +01:00
Vera Guelpers
935cd1e173 conserved current insertion summed over Lorentzindex 2017-12-22 11:38:45 +00:00
Vera Guelpers
55e39df30f tadpole insertion for DWF 2017-12-22 11:36:31 +00:00
67c3fa0f5f Hadrons: all modules are now ported, more tests need to be done 2017-12-21 11:39:07 +00:00
65d4f17976 Hadrons: no errors when trying to recreate a cache 2017-12-19 20:28:32 +00:00
e2fe97277b Hadrons: getReference use is rare, empty by default 2017-12-19 20:28:04 +00:00
Guido Cossu
84f9c37ed4 Merge branch 'feature/scalar_adjointFT' of https://github.com/paboyle/Grid into feature/scalar_adjointFT 2017-12-19 15:43:55 +00:00
bcf6f3890c Hadrons: more fixes after test 2017-12-14 21:14:10 +00:00
591a38c487 Hadrons: VM fixes 2017-12-14 19:42:16 +00:00
James Harrison
581be32ed2 Implement infrared improvement for v=0 on-shell self-energy 2017-12-14 13:42:41 +00:00
842754bea9 Hadrons: most modules ported to the new interface, compiles but untested 2017-12-13 19:41:41 +00:00
James Harrison
6bc136b1d0 Add module for calculating diagrams required for HVP counter-terms 2017-12-13 17:31:01 +00:00
0887566134 Hadrons: scheduler back! 2017-12-13 16:36:15 +00:00
61fc50d616 Hadrons: better organisation of the VM 2017-12-13 13:44:23 +00:00
a9c8d7dad0 Hadrons: code cleaning 2017-12-13 12:13:40 +00:00
259d504ef0 Hadrons: first full implementation of the module memory profiler 2017-12-12 19:32:58 +00:00
f3a77f4b7f Merge branch 'feature/hadrons' into feature/hadrons-new-memory-model 2017-12-12 14:05:23 +00:00
26d7b829a0 Hadrons: error managed through expections 2017-12-12 14:04:28 +00:00
64161a8743 Hadrons: much simpler reference dependency 2017-12-12 13:08:01 +00:00
2401360784 Merge pull request #138 from guelpers/feature/hadrons
bug fix in sequential insertion of conserved vector current
2017-12-11 18:53:41 +01:00
Vera Guelpers
2cfb50cbe5 bug fix in sequential insertion of conserved vector current 2017-12-08 11:13:39 +00:00
f9aa39e1c4 global memory debug through command line flag 2017-12-07 14:40:58 +01:00
Daniel Richtmann
df152648d6 Fix error in MR code when compiling for single precision 2017-12-06 18:00:58 +01:00
0fbf445edd Hadrons: object creation that get properly captured by the memory profiler 2017-12-06 16:51:48 +01:00
e78794688a memory profiler improvement 2017-12-06 16:50:25 +01:00
9e31307963 Merge branch 'feature/hadrons' into feature/hadrons-new-memory-model 2017-12-06 16:49:32 +01:00
29e2eddea8 Merge branch 'develop' into feature/hadrons-new-memory-model 2017-12-06 16:49:21 +01:00
0a038ea15a Merge branch 'develop' into feature/hadrons 2017-12-06 16:49:10 +01:00
62eb1f0e59 FermionOperator virtual destructor needed for polymorphism 2017-12-06 16:48:17 +01:00
5422251959 Hadrons: execution part moved in a new virtual machine class 2017-12-05 15:31:59 +01:00
paboyle
9579c9c327 Threading improvement 2017-12-05 14:12:22 +00:00
paboyle
3729c7a7a6 Clean up of test 2017-12-05 13:07:31 +00:00
paboyle
c24d4c8d0e Improved parallel RNG init 2017-12-05 13:01:10 +00:00
paboyle
a14038051f Improved AllToAll asserts 2017-12-05 11:43:25 +00:00
paboyle
3e560b9462 Faster RNG init 2017-12-05 11:42:05 +00:00
paboyle
d93c6760ec Faster code for split unsplit 2017-12-05 11:39:26 +00:00
paboyle
ae3b7713a9 Cold start doesnt need RNG 2017-12-05 11:36:31 +00:00
cbd8fbe771 Merge branch 'feature/hadrons' into feature/hadrons-new-memory-model 2017-12-03 19:48:56 +01:00
d391f05cb7 Merge branch 'develop' into feature/hadrons 2017-12-03 19:48:46 +01:00
3127b52c90 bootstrap script does not destroy Eigen is working offline 2017-12-03 19:48:34 +01:00
01f00385a4 Hadrons: genetic pair selection based on exponential probability 2017-12-03 19:47:40 +01:00
59aae5f5ec Hadrons: garbage collector clean temporaries 2017-12-03 19:47:11 +01:00
624246409c Hadrons: module setup/execute protected to forbid user to bypass execution control 2017-12-03 19:46:18 +01:00
2a9ebddad5 Hadrons: scheduler offline, minimal code working again 2017-12-03 19:45:15 +01:00
ff7afe6e17 Merge branch 'feature/hadrons' into feature/hadrons-new-memory-model 2017-12-01 19:45:44 +00:00
33cb509d4b Merge branch 'develop' into feature/hadrons 2017-12-01 19:45:32 +00:00
456c78c233 Merge branch 'develop' into feature/hadrons-new-memory-model 2017-12-01 19:45:12 +00:00
2fd4989029 Merge branch 'develop' of github.com:paboyle/Grid into develop 2017-12-01 19:44:31 +00:00
2427a21428 minor serial IO fixes, XML now issues warning when trying to read absent nodes, these becomes 2017-12-01 19:44:07 +00:00
514993ed17 Hadrons: progress on the interface, genetic algorithm freezing 2017-12-01 19:38:23 +00:00
Daniel Richtmann
4e965c168e Implement analogon to test vector analysis in WMG codebase 2017-11-29 15:05:27 +01:00
Daniel Richtmann
f260af546e Save current state 2017-11-28 15:03:02 +01:00
paboyle
28ceacec45 Split/Unsplit working 2017-11-27 15:13:29 +00:00
paboyle
e6a3e375cf Debug 2017-11-27 15:10:22 +00:00
paboyle
4987edbd44 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-11-27 12:34:56 +00:00
paboyle
ad140bb6e7 Clean on multinode target after split 1 1 2 4 -> 1 1 2 2 2017-11-27 12:34:25 +00:00
paboyle
1f04e56038 Believe split/unsplit works, but need to make pretty 2017-11-27 12:33:08 +00:00
paboyle
4bfc8c85c3 Clean up verbose communicator create 2017-11-27 12:32:37 +00:00
azusayamaguchi
e55397bc13 Staggerd cg 2017-11-24 14:18:30 +00:00
Daniel Richtmann
649b8c9aca Save current state 2017-11-24 10:46:20 +01:00
Daniel Richtmann
0afa22747d Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms 2017-11-24 10:11:42 +01:00
a3fe874a5b Hadrons: everything is broken, repairing while implementing the new memory model 2017-11-22 23:27:19 +00:00
f403ab0133 gitignore update 2017-11-22 17:13:09 +00:00
paboyle
94b8fb5686 Debug in progress 2017-11-19 01:39:04 +00:00
Guido Cossu
1f1d77b01a Performance metrics for the Scalar Action force term 2017-11-14 10:01:48 +00:00
pretidav
6a15e2e8ef Added WilsonTwoIndexAntiSymmImpl instantiation in WilsonKernelsHand.cc (shoud not be necessary) 2017-11-12 14:16:19 +01:00
074d17429f Merge branch 'develop' into feature/scalar_adjointFT
# Conflicts:
#	lib/communicator/Communicator_mpi3.cc
2017-11-11 18:09:55 +00:00
Daniel Richtmann
fa43206c79 Remove some empty lines 2017-11-10 13:48:38 +01:00
Peter Boyle
25f73018f4 Merge pull request #135 from fionnoh/develop
Declaring virtual functions as pure virtual functions.
2017-11-09 23:19:08 +00:00
fionnoh
1d7ccc6b2c Declaring virtual functions as pure virtual functions. 2017-11-09 19:46:57 +00:00
Daniel Richtmann
a367835bf2 Set everything up for the implementation of FCAGMRES
The current implementation is the exact same code as normal FGMRES. This commit
only sets up the "framework" for the implementation of FCAGMRES, i.e., a test
and an include in the algorithms header file.
2017-11-09 17:30:41 +01:00
Daniel Richtmann
d7743591ea Fix some minor formatting errors 2017-11-09 17:28:19 +01:00
Daniel Richtmann
c6cbe533ea Set everything up for the implementation of CAGMRES
The current implementation is the exact same code as normal GMRES. This commit
only sets up the "framework" for the implementation of CAGMRES, i.e., a test and
an include in the algorithms header file.
2017-11-09 17:14:44 +01:00
Daniel Richtmann
8402ab6cf9 Some minor formatting improvements 2017-11-09 12:52:04 +01:00
Daniel Richtmann
c63095345e Remove some superfluous comments 2017-11-09 12:47:20 +01:00
pretidav
59d9ccf70c restored WilsonKernelsHand.cc and added Qtop to production codes 2017-11-08 22:02:32 +01:00
Daniel Richtmann
a7ae46b61e Remove some comments 2017-11-08 16:58:20 +01:00
Daniel Richtmann
cd63052205 Remove everything preconditioner-related in GMRES code 2017-11-08 16:57:40 +01:00
Daniel Richtmann
699d537cd6 Add FGMRES test with staggered fermions 2017-11-08 16:56:42 +01:00
Daniel Richtmann
9031f0ed95 Fix a filename in a file header 2017-11-08 16:42:26 +01:00
Daniel Richtmann
26b3d441bb Check in forgotten FGMRES test with wilson Fermions 2017-11-08 16:39:11 +01:00
Daniel Richtmann
99bc4cde56 Fix an implementation error in FGMRES 2017-11-08 16:38:34 +01:00
Daniel Richtmann
e843d83d9d Make z in FGMRES a single Field 2017-11-08 16:38:16 +01:00
Daniel Richtmann
0f75ea52b7 First version of FGMRES; not working yet 2017-11-08 16:17:18 +01:00
Daniel Richtmann
8107b785cc Rename misunderstood "rsd_sq" to "rsq" in GMRES code 2017-11-08 14:40:03 +01:00
Daniel Richtmann
37b777d801 Add test for GMRES solver with staggered fermions 2017-11-08 14:28:48 +01:00
Daniel Richtmann
7382787856 Some minor changes 2017-11-08 14:23:55 +01:00
Daniel Richtmann
781c611ca0 Perform minor code style fix 2017-11-08 14:22:38 +01:00
Daniel Richtmann
b069090b52 Remove a superfluous comment 2017-11-08 13:58:02 +01:00
Daniel Richtmann
0c1c1d9900 Set precision and formatting only once in MR code 2017-11-08 13:57:06 +01:00
Daniel Richtmann
7f4ed6c2e5 First working version of GMRES + a test for Wilson fermions 2017-11-08 13:56:41 +01:00
Daniel Richtmann
56d32a4afb Rename misunderstood "rsd_sq" to "rsq" in MR code 2017-11-08 13:51:08 +01:00
Daniel Richtmann
b8ee496ed6 Print some info at start of GMRES 2017-11-08 13:23:41 +01:00
Azusa Yamaguchi
1860b1698c Fixed the bag on MPI_T at Cam 2017-11-08 09:03:01 +00:00
Azusa Yamaguchi
9b8d1cc3da Staggered Schur decomposed matrix norm changed to not be the Schur anymore :(
Carleton wanted this for multimass / multishift
2017-11-07 14:48:45 +00:00
James Harrison
0c668bf46a QedFVol: Write to output files from one process only. 2017-11-07 14:46:39 +00:00
Guido Cossu
149c3f9e9c Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-11-07 14:01:13 +00:00
Daniel Richtmann
b87416dac4 Fix error with conformable 2017-11-07 15:00:08 +01:00
Daniel Richtmann
176bf37372 Remove some commented stuff 2017-11-07 14:57:36 +01:00
Guido Cossu
c519aab19d Fixing the MPI memory leak in the communicators 2017-11-07 13:55:37 +00:00
Daniel Richtmann
b3d342ca22 Remove old implementation of GMRES operator 2017-11-07 10:24:49 +01:00
Daniel Richtmann
e1f928398d Save current state 2017-11-07 10:22:41 +01:00
paboyle
69929f20bb Destructor fix. Split Grid and MPI3 will not yet work without more effort from me. 2017-11-06 23:45:00 +00:00
Daniel Richtmann
8c579d2d4a Save current state 2017-11-06 18:09:48 +01:00
James Harrison
840814c776 QedFVol: Patch to fix MPI communicators error 2017-11-06 16:34:55 +00:00
Daniel Richtmann
fc7d07ade0 Correct function signature of body of GMRES outer loop 2017-11-06 17:12:38 +01:00
Daniel Richtmann
b3be9195b4 Save one lattice fermion in GMRES code 2017-11-06 17:12:23 +01:00
Daniel Richtmann
9e3c187a4d Save current state 2017-11-06 17:05:25 +01:00
Daniel Richtmann
8363edfcdb Perform some minor changes to GMRES code 2017-11-06 16:17:44 +01:00
Daniel Richtmann
74af31564f Adapt style of wilson GMRES test to style of wilson MR test 2017-11-06 14:06:45 +01:00
Daniel Richtmann
e0819d395f Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms 2017-11-06 13:09:36 +01:00
pretidav
a493429218 added Production tests for MixedRep, Adj, 2S, 2AS. Still missing QObs. The HMC is not printing correctly all the actions and forces. 2017-11-04 18:16:54 +01:00
pretidav
915f610da0 clover 2indexSymm hmc production test created. clover 2indexAsymm and clover mixed to be filled. 2017-11-04 01:17:06 +01:00
pretidav
c79606a5dc Test production code wilson clover. Still missing QObs measurement on-the-fly. 2017-11-03 22:46:32 +01:00
James Harrison
95af55128e QedFVol: Redo optimisation of scalar VP (extra memory requirements were not the problem), and undo optimisation of charged propagator (which seemed to be causing HDF5 errors, although I don’t know why). 2017-11-03 18:46:16 +00:00
James Harrison
9f2a57e334 QedFVol: Undo optimisation of scalar VP, to reduce memory requirements 2017-11-03 13:10:11 +00:00
James Harrison
c645d33db5 QedFVol: Redo optimisation of charged propagator, and fix I/O bug 2017-11-03 10:59:26 +00:00
James Harrison
e0f1349524 QedFVol: Undo optimisation of charged propagator 2017-11-03 09:22:41 +00:00
paboyle
360efd0088 Improved treatment of reverse asked for by chris.
Truncate the basis.
Power method renormalises
2017-11-02 22:05:31 +00:00
pretidav
7b42ac9982 added polyakov loop observable to the hmc 2017-11-02 21:58:16 +01:00
paboyle
c5c647e35e Merge branch 'feature/lanczos-reorg' into develop 2017-11-02 15:23:11 +00:00
a4e5fd1000 Merge branch 'feature/hadrons' into feature/hadrons-new-memory-model 2017-11-01 19:24:51 +00:00
682e7d7839 Merge branch 'develop' into feature/hadrons 2017-11-01 19:24:38 +00:00
Guido Cossu
8e057721a9 Anisotropic Clover term written and tested 2017-11-01 12:50:54 +00:00
Guido Cossu
fa5e4add47 Added support for anisotropy to the WilsonFermion class 2017-10-31 18:20:38 +00:00
Daniel Richtmann
6f81906b00 Add test for the MR solver with staggered fermions; does not converge atm
TODO: Is this a property of staggered or did I do something wrong?
2017-10-30 16:57:55 +01:00
James Harrison
79b761f923 Merge branch 'develop' into feature/qed-fvol
# Conflicts:
#	lib/communicator/Communicator_base.cc
2017-10-30 15:53:18 +00:00
James Harrison
0d4e31ca58 QedFVol: Calculate phase factors for momentum projections once per configuration only. 2017-10-30 15:46:50 +00:00
Daniel Richtmann
a2d83d4f3d Add test for the MR solver with DW fermions; does not converge atm
TODO: Is this a property of DWF or did I do something wrong?
2017-10-30 16:39:30 +01:00
Daniel Richtmann
89bacb0470 Fix path in MR solver header commentary 2017-10-30 16:16:55 +01:00
James Harrison
b07a354a33 QedFVol: output scalar propagator before FFT in spatial directions. 2017-10-30 14:20:44 +00:00
Daniel Richtmann
19010ff66a Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms 2017-10-30 13:16:46 +01:00
paboyle
27ea2afe86 No compile on comms == none fix 2017-10-30 01:14:11 +00:00
paboyle
78e8704eac Shaking out 2017-10-30 00:25:31 +00:00
paboyle
67131d82f2 Get subrank info from communicator constructor 2017-10-30 00:24:11 +00:00
paboyle
615a9448b9 Extended sub comm supported 2017-10-30 00:23:34 +00:00
paboyle
00164f5ce5 : 2017-10-30 00:22:52 +00:00
paboyle
a7f72eb994 SHaking out 2017-10-30 00:22:06 +00:00
paboyle
501fa1614a Communicator updates for split grid 2017-10-30 00:16:12 +00:00
paboyle
5bf42e1e15 Update 2017-10-30 00:05:21 +00:00
paboyle
fe4d9b003c More digits 2017-10-30 00:04:47 +00:00
paboyle
4a699b4da3 New rank can be found out 2017-10-30 00:04:14 +00:00
paboyle
689323f4ee Reverse dim ordering lexico support 2017-10-30 00:03:15 +00:00
Guido Cossu
749189fd72 Full clover force correct 2017-10-29 12:03:08 +00:00
Guido Cossu
f941c4ee18 Clover term force ok 2017-10-29 11:43:33 +00:00
paboyle
84b441800f Merge branch 'develop' into feature/lanczos-reorg 2017-10-27 14:21:38 +01:00
paboyle
1ef424b139 Split grid Y2K bug fix attempt 2017-10-27 14:20:35 +01:00
Daniel Richtmann
5a477ed29e Perform minor style correction 2017-10-27 14:46:18 +02:00
Daniel Richtmann
54128d579a Make MR a bit more verbose 2017-10-27 14:45:29 +02:00
Daniel Richtmann
e7b1933e88 Add a test for the MR solver 2017-10-27 14:38:57 +02:00
Daniel Richtmann
1bad64ac6a Some formatting 2017-10-27 14:35:04 +02:00
Daniel Richtmann
15dfa9f663 Change stopping criterion implementation in MR solver + some cleanup 2017-10-27 14:33:25 +02:00
Daniel Richtmann
2185b0d651 Correct author in the file 2017-10-27 14:32:38 +02:00
Daniel Richtmann
f61c0b5d03 Very early version of MR solver 2017-10-27 14:09:02 +02:00
Daniel Richtmann
074db32e54 Fix build of gmres test 2017-10-27 14:08:48 +02:00
paboyle
aa66f41c69 Bug fix in the coarse restore...
Think this is nearly there
2017-10-27 10:29:34 +01:00
paboyle
f96c800d25 Passes reload of coarse basis 2017-10-27 09:43:22 +01:00
paboyle
32a52d7583 Move the local coherence lanczos into algorithms.
Keep the I/O in the tester. Other people can copy this method to write other I/O formats.
2017-10-27 09:04:31 +01:00
paboyle
fa04b6d3c2 Finished ? Verifying coarse evec restore 2017-10-27 08:18:29 +01:00
paboyle
7fab183c0e Better read test 2017-10-27 08:17:49 +01:00
paboyle
9ec9850bdb 64bit ftello update 2017-10-26 23:34:31 +01:00
paboyle
0c4ddaea0b Cleaning up 2017-10-26 23:31:46 +01:00
paboyle
00ebc150ad Mistake in string parse; interface is ambiguous and must fix. Is char * a file, or a XML buffer ? 2017-10-26 23:30:37 +01:00
paboyle
0f3e9ae57d Gsites error. Only appeared (so far) in I/O code for even odd fields 2017-10-26 23:29:59 +01:00
Azusa Yamaguchi
034de160bf Staggered updates : Schur fixed and added a unit test for Test_staggered_cg_schur.cc giving stronger check 2017-10-26 20:58:46 +01:00
Guido Cossu
76bcf6cd8c Deleting vscode settings file 2017-10-26 18:45:41 +01:00
Guido Cossu
91b8bf0613 Debugging force term 2017-10-26 18:23:55 +01:00
paboyle
14507fd6e4 Final? candidate for push back on the lanczos reorg feature 2017-10-26 16:25:01 +01:00
paboyle
2db05ac214 Test for split/unsplit in isolation 2017-10-26 07:48:03 +01:00
paboyle
31f99574fa Moving these out of algorithms 2017-10-26 07:47:42 +01:00
paboyle
a34c8a2961 Update to IRL; getting close to the structure I would like. 2017-10-26 07:45:56 +01:00
paboyle
ccd20df827 Better IRL interface 2017-10-26 01:59:59 +01:00
paboyle
e9be293444 Better messaging 2017-10-26 01:59:30 +01:00
paboyle
d577211cc3 Relax stoppign condition 2017-10-25 23:57:54 +01:00
paboyle
f4336e480a Faster converge time 2017-10-25 23:53:44 +01:00
paboyle
e4d461cb03 Messagign 2017-10-25 23:53:19 +01:00
paboyle
3d63b4894e Use existing functionality where possible 2017-10-25 23:52:47 +01:00
paboyle
08583afaff Red black friendly coarsening 2017-10-25 23:51:18 +01:00
paboyle
b395a312af Better error messaging 2017-10-25 23:50:37 +01:00
paboyle
66295b99aa Bit less verbose SciDAC IO 2017-10-25 23:50:05 +01:00
paboyle
b8654be0ef 64 bit safe offsets 2017-10-25 23:49:23 +01:00
paboyle
a479325349 Rewrite of local coherence lanczos 2017-10-25 23:48:47 +01:00
paboyle
f6c3f6bf2d XML serialisation of parms and initialise from parms object 2017-10-25 23:47:59 +01:00
paboyle
d83868fdbb Identity linear op added -- useful in circumstances where a linear op may or may not be needed.
Supply a trivial one if not needed
2017-10-25 23:47:10 +01:00
paboyle
303e0b927d Improvements for coarse grid compressed lanczos 2017-10-25 23:46:33 +01:00
paboyle
28ba8a0f48 Force spacing more nicely 2017-10-25 23:45:57 +01:00
Azusa Yamaguchi
f9e28577f3 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-10-25 21:07:56 +01:00
Guido Cossu
e0cae833da Merge branch 'develop' into feature/scalar_adjointFT 2017-10-25 10:49:50 +01:00
Guido Cossu
8a3aae98f6 Solving minor bug in compilation 2017-10-25 10:34:49 +01:00
Guido Cossu
8309f2364b Solving again the MPI comm bug with FFTs 2017-10-25 10:24:14 +01:00
Daniel Richtmann
d5f661ba70 Save intermediate state 2017-10-25 10:38:26 +02:00
Azusa Yamaguchi
cac1750078 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-10-24 23:30:36 +01:00
Guido Cossu
e17cd35151 Merge branch 'develop' into feature/scalar_adjointFT 2017-10-24 17:31:22 +01:00
Guido Cossu
ccdec7a7ab Merge branch 'develop' into feature/clover 2017-10-24 16:51:14 +01:00
Guido Cossu
93642d813d Merging 2017-10-24 16:48:05 +01:00
Daniel Richtmann
1ab8d5cc13 Save two more files 2017-10-24 16:58:05 +02:00
Daniel Richtmann
789e892865 Save current state 2017-10-24 16:58:04 +02:00
Daniel Richtmann
53cfa44d7a Save current state 2017-10-24 16:58:03 +02:00
Guido Cossu
0bc381f982 Merge pull request #133 from pretidav/feature/clover
Feature/clover
2017-10-24 15:15:21 +01:00
Guido Cossu
2986aa76f8 Restoring Perfcounts 2017-10-24 13:32:02 +01:00
Guido Cossu
657779374b Adding vscode to gitignore 2017-10-24 13:27:17 +01:00
Guido Cossu
ec8cd11c1f Cleanup and prepare for pull request 2017-10-24 13:21:17 +01:00
Guido Cossu
cbda4f66e0 Debug of the field strength 2017-10-24 10:20:13 +01:00
Guido Cossu
6579dd30ff More debug test 2017-10-23 18:47:00 +01:00
Guido Cossu
031c94e02e Debugging process for the clover term 2017-10-23 18:27:34 +01:00
Guido Cossu
6391b2a1d0 Added test for Wilson and Clover fermions 2017-10-23 14:42:35 +01:00
Guido Cossu
2e50b55ae4 Changes in the Makefile to compile against Chroma on Linux 2017-10-23 13:32:26 +01:00
James Harrison
c433939795 QedFVol: Temporarily remove incomplete implementation of infinite-volume photon 2017-10-20 16:27:58 +01:00
James Harrison
b6a4c31b48 Merge branch 'feature/qed-fvol' of https://github.com/jch1g10/Grid into feature/qed-fvol 2017-10-20 16:25:07 +01:00
James Harrison
98b1439ff9 QedFVol: pass arbitrary input values to photon constructor in UnitEm 2017-10-20 16:24:09 +01:00
Guido Cossu
27936900e6 Putting the FG verbosity in the Integrator level 2017-10-18 13:08:09 +01:00
James Harrison
564738b1ff Add module for unit EM field 2017-10-17 14:03:57 +01:00
Guido Cossu
cd3e810d25 Merge branch 'develop' into feature/scalar_adjointFT 2017-10-17 11:31:14 +01:00
pretidav
317ddfedee updated test clover + first attempt derivative clove term (still missing spin part) 2017-10-16 02:47:33 +02:00
paboyle
e325929851 ALl codes compile against the new Lanczos call signature 2017-10-13 14:02:43 +01:00
paboyle
47af3565f4 Logging improvement; reunified the Lanczos codes 2017-10-13 13:23:07 +01:00
paboyle
4b4d187935 Reunified the Lanczos implementations 2017-10-13 13:22:44 +01:00
paboyle
9aff354ab5 Final version prior to reunification 2017-10-13 13:22:26 +01:00
paboyle
cb9ff20249 Approx tests and lanczos improvement 2017-10-13 11:30:50 +01:00
James Harrison
a80e43dbcf Added infinite-volume photon in Photon.h (not checked yet) 2017-10-11 16:44:51 -04:00
paboyle
9fe6ac71ea Starting reorg of Blocked lanczos 2017-10-11 10:12:07 +01:00
5c392a6ecc Merge commit 'bf58557fb1ec710c766e19c9a8809b0a352de239' into feature/scalar_adjointFT 2017-10-10 17:14:56 +01:00
Azusa Yamaguchi
f1fa00b71b Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-10-10 14:26:44 +01:00
paboyle
bf58557fb1 Block compressed Lanczos 2017-10-10 14:15:11 +01:00
paboyle
10cb37f504 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-10-10 14:09:44 +01:00
Azusa Yamaguchi
1374c943d4 Correct Schur operator called 2017-10-10 13:59:50 +01:00
paboyle
a1d80282ec cb factorise 2017-10-10 13:49:31 +01:00
paboyle
4eb8bbbebe Christop mods 2017-10-10 13:48:51 +01:00
paboyle
d1c6288c5f Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-10-10 13:38:40 +01:00
Azusa Yamaguchi
dd949bc428 Merge branch 'feature/staggering' into develop 2017-10-10 13:02:51 +01:00
Azusa Yamaguchi
bb7378cfc3 Schur for staggered 2017-10-10 12:02:18 +01:00
Azusa Yamaguchi
f0e084a88c Schur staggered 2017-10-10 10:00:43 +01:00
paboyle
153672d8ec Split CG testing 2017-10-09 23:20:58 +01:00
paboyle
08ca338875 Split grid communication 2017-10-09 23:19:45 +01:00
paboyle
f7cbf82c04 Better stdout/err debug 2017-10-09 23:18:48 +01:00
paboyle
07009c569a Comms splitting improvements 2017-10-09 23:16:51 +01:00
Guido Cossu
15d690e9b9 Adding the cartesian communicator destructor 2017-10-09 09:59:58 +01:00
63b2bc1936 Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/qcd/action/fermion/FermionOperatorImpl.h
2017-10-05 14:16:23 +01:00
David Preti
d810e8c8fb first attempt to write C terms in clover derivative. Some shifts to be fixed 2017-10-05 10:13:53 +02:00
Azusa Yamaguchi
09f4cdb11e Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2017-10-04 10:51:16 +01:00
Azusa Yamaguchi
1e54882f71 Stagger 2017-10-04 10:51:06 +01:00
Guido Cossu
27caff92c6 Merge branch 'feature/scalar_adjointFT' of https://github.com/paboyle/Grid into feature/scalar_adjointFT 2017-10-04 09:44:27 +01:00
d38cee73bf Scalar: easier Fourier acceleration parametrisation through -D flags 2017-10-03 17:29:34 +01:00
8784f2a88d post-merge fix 2017-10-03 14:38:10 +01:00
c497864b5d Merge commit 'd54807b8c0cd1a7658ff8563bb00d1137b987e3e' into feature/scalar_adjointFT
# Conflicts:
#	lib/communicator/Communicator_base.h
#	lib/communicator/Communicator_mpi.cc
#	lib/communicator/Communicator_mpit.cc
2017-10-03 14:27:54 +01:00
05c1c88440 Scalar: more action generalisation 2017-10-03 14:26:20 +01:00
paboyle
d54807b8c0 MPIT works with split grid now 2017-10-02 23:14:56 +01:00
Guido Cossu
f6ba2b95ce Merge branch 'develop' into feature/scalar_adjointFT 2017-10-02 15:19:20 +01:00
paboyle
5625b47c7d Merge branch 'feature/dwf-multirhs' into develop 2017-10-02 12:42:32 +01:00
paboyle
1edcf902b7 Macos ANON 2017-10-02 12:41:02 +01:00
paboyle
e5c19e1fd7 RB constructor change 2017-10-02 12:25:52 +01:00
paboyle
a11d0a33d1 Merge branch 'feature/dwf-multirhs' of https://github.com/paboyle/Grid into feature/dwf-multirhs 2017-10-02 11:42:07 +01:00
paboyle
4f8b6f26b4 Merge branch 'develop' into feature/dwf-multirhs 2017-10-02 11:41:49 +01:00
paboyle
073525c5b3 Small patch from cori 2017-10-02 03:38:21 -07:00
Azusa Yamaguchi
eb6153080a Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2017-10-02 08:56:33 +01:00
Guido Cossu
f7072d1ac2 Solving an annoying compilation error in json 2017-10-02 07:13:40 +01:00
a021933002 Scalar: SU(N) action change to t'Hooft scaling 2017-09-29 16:09:34 +01:00
James Harrison
b99622d9fb QedFVol: fix problem with JSON wanting gcc 4.9 2017-09-28 13:34:33 -04:00
937c77ead2 Merge branch 'develop' into feature/qed-fvol 2017-09-28 16:25:20 +01:00
95e5a2ade3 Merge pull request #116 from jch1g10/feature/qed-fvol
Feature/qed fvol
2017-09-25 15:08:33 +01:00
David Preti
56478d63a5 clover + test (valence) 2017-09-24 19:32:15 +02:00
df21668f2c memory profiler update 2017-09-22 14:21:18 +01:00
Guido Cossu
482368e9de Merge branch 'develop' into feature/scalar_adjointFT 2017-09-21 13:44:08 +01:00
paboyle
fddeb29d6b Bug fix with spreadout FFT 2017-09-21 11:10:08 +01:00
paboyle
a9ec5cf564 Christoph bug report integrate 2017-09-21 10:32:41 +01:00
Peter Boyle
946a8671b9 Merge pull request #129 from djm2131/feature/eofa
Add support for DWF with the exact one flavor algorithm
2017-09-21 10:15:21 +01:00
Azusa Yamaguchi
a6eeea777b Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-09-21 10:12:41 +01:00
Peter Boyle
771a1b8e79 Merge pull request #128 from paboyle/feature/CG-reliable-update
Feature/cg reliable update
2017-09-21 10:12:03 +01:00
Peter Boyle
bfb68e6f02 Merge pull request #130 from giltirn/gparity-handunroll
Gparity handunroll
2017-09-21 10:11:00 +01:00
Azusa Yamaguchi
77f7737ccc Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-09-19 14:28:01 +01:00
Guido Cossu
9a827d0242 Fixing a compilation error 2017-09-18 14:55:51 +01:00
Guido Cossu
999c623590 Solving a memory leak in Communicator_mpi 2017-09-18 14:39:04 +01:00
paboyle
18c335198a Merge branch 'hotfix/dirac-ITT-fix1' into develop 2017-09-16 18:19:02 +01:00
paboyle
f9df685cde Merge branch 'hotfix/dirac-ITT-fix1' 2017-09-16 18:18:48 +01:00
paboyle
17c5b0f152 Patching comparison point 2017-09-16 18:18:07 +01:00
paboyle
5918769f97 Subtle Naik term bug updated in Stencil; less on logical && with a function call on right 2017-09-16 12:51:26 +01:00
Guido Cossu
b542d349b8 Minor cosmetic changes 2017-09-15 11:48:36 +01:00
Guido Cossu
91eaace19d Added support for FFT accelerated updates 2017-09-15 11:33:45 +01:00
Guido Cossu
bbaf1ada91 Merge branch 'feature/json-fix' into develop 2017-09-08 16:02:08 +01:00
Guido Cossu
1950ac9294 Fixed the Intel compiler problem with the JSON classes 2017-09-08 15:18:59 +01:00
Guido Cossu
13fa70ac1a Merge branch 'develop' into feature/json-fix 2017-09-08 13:42:20 +01:00
Guido Cossu
7cb2b11f26 Fixing Intel compiler error for the JSON parser 2017-09-08 13:41:53 +01:00
Guido Cossu
1184ed29ae Merge pull request #124 from nmeyer-ur/feature/arm-neon
Added integer reduce functionality
2017-09-08 10:54:35 +02:00
paboyle
203c7bf6fa Merge branch 'hotfix/dirac-ITT-fix' into develop 2017-09-05 15:08:51 +01:00
paboyle
c709883f3f Merge branch 'hotfix/dirac-ITT-fix' 2017-09-05 15:08:16 +01:00
paboyle
aed5de4d50 Patching macos compile 2017-09-05 15:07:07 +01:00
paboyle
ba27cc6571 Mac os happiness 2017-09-05 15:00:16 +01:00
paboyle
d856327250 Merge branch 'release/dirac-ITT' into develop 2017-09-05 14:56:12 +01:00
paboyle
d75369cb56 Merge branch 'release/dirac-ITT' 2017-09-05 14:55:54 +01:00
Peter Boyle
bf973d0d56 SHM complete 2017-09-05 14:30:29 +01:00
Peter Boyle
837bf8a5be Updating to control the SHM allocation scheme under configure time options 2017-09-05 12:51:02 +01:00
Peter Boyle
c05b2199f6 Improvements to huge memory 2017-09-04 10:41:21 -04:00
Azusa Yamaguchi
a5fe07c077 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-09-04 14:10:15 +01:00
Azusa Yamaguchi
b83b2b1415 Stability improvement to BCG. Force m_rr hermitian beyond rounding. 2017-09-04 14:09:47 +01:00
James Harrison
91676d1dda Fix “MAP_ANONYMOUS undefined” error on OSX. 2017-09-01 15:48:30 +01:00
Peter Boyle
b331be9101 Better reporting 2017-08-31 11:32:57 +01:00
Peter Boyle
49c20a9fa8 Patch to reporting 2017-08-31 11:32:21 +01:00
paboyle
7359df3501 Full reporting for benchmark; save robustness factor 2017-08-31 10:42:35 +01:00
Christopher Kelly
59bd1fe21b Fix for 'perm' and 'local' not being set for hand-unrolled external-site Dslash, which caused incorrect behavior of G-parity kernel 2017-08-29 13:07:37 -07:00
a56e3b40c4 Merge branch 'develop' into feature/hadrons 2017-08-29 11:03:53 -06:00
Nils Meyer
4e907fef2c Merge remote-tracking branch 'grid/develop' into feature/arm-neon 2017-08-29 17:47:36 +02:00
Christopher Kelly
67888b657f Merge branch 'gparity-handunroll' of https://github.com/giltirn/Grid into gparity-handunroll 2017-08-29 09:52:05 -04:00
Christopher Kelly
74af885d4e Removed some no-longer-needed associated with G-parity hand unrolled kernel 2017-08-29 09:50:37 -04:00
James Harrison
ac3611bb19 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/qed-fvol 2017-08-29 11:53:37 +01:00
Christopher Kelly
d36d2fb40d Added ability to override default Ls in Benchmark_dwf 2017-08-28 06:53:56 -07:00
Peter Boyle
5b9267e88d Cleaner comms benchmark treatment for one node runs 2017-08-27 18:24:48 -04:00
paboyle
15fd4003ef Improving presentation of results 2017-08-27 13:46:02 +01:00
paboyle
4b4c2a715b fcntl.h needed 2017-08-26 11:38:04 +01:00
paboyle
54a5e6c1d0 Check if we get huge pages on linux. Larry Meadows piece of magic. 2017-08-25 22:36:08 +01:00
paboyle
73aeca7dea Merge branch 'feature/multi-communicator' into develop 2017-08-25 21:55:09 +01:00
paboyle
ad89abb018 Fix 2017-08-25 20:43:37 +01:00
paboyle
80c5bce5bb Merge branch 'develop' into feature/multi-communicator 2017-08-25 20:21:26 +01:00
paboyle
f68b5de9c8 No compile fix on Clang 2017-08-25 19:35:21 +01:00
Peter Boyle
d0f3d525d5 Optimal block size for KNL 2017-08-25 19:33:54 +01:00
Christopher Kelly
f365a83fae In G-parity unrolled kernel, replaced calls to permute and exchange with run-time-evaluated permute type with explicit calls to appropriate underlying functions 2017-08-25 14:24:11 -04:00
Peter Boyle
3a58217405 Updated 2017-08-25 14:29:53 +01:00
Peter Boyle
c289699d9a updated from cambridge mpi3 shakeout 2017-08-25 11:41:01 +01:00
Peter Boyle
c3b1263e75 Benchmark prep 2017-08-25 09:25:54 +01:00
Christopher Kelly
34a9aeb331 Reduced number of if-statement evaluations in G-parity unrolled kernel 2017-08-24 13:53:50 -07:00
5846566728 Merge branch 'develop' into feature/hadrons 2017-08-24 18:20:52 +01:00
102ea9ae66 CI update 2017-08-24 18:17:09 +01:00
James Harrison
cc4afb978d Fix bug in non-zero momentum projection 2017-08-24 17:31:44 +01:00
21b02760c3 Merge branch 'develop' into feature/hadrons 2017-08-24 17:05:45 +01:00
Peter Boyle
2bcb704af2 Merge pull request #121 from Lanny91/feature/hadrons
Feature/hadrons
2017-08-24 12:59:08 +01:00
paboyle
5fa386ddc9 FFT test compile fixed 2017-08-24 10:17:52 +01:00
Christopher Kelly
edabb3577f Imported Benchmark_gparity 2017-08-23 16:54:06 -04:00
Christopher Kelly
ce5df177ee Removed superfluous implementation of G-parity twist for hand-unrolled kernel from GparityWilsonImpl 2017-08-23 15:05:22 -04:00
Christopher Kelly
a0bb8e5b46 Added hand-unrolled kernel implementations of all the other dslash precision / comms precision combinations with G-parity 2017-08-23 14:44:40 -04:00
Christopher Kelly
46f88e6d72 G-parity hand-unrolled intrinsics twist now uses one less permute and one less temporary 2017-08-23 13:21:10 -04:00
David Murphy
dd8f1ea189 Vectorized Mobius EOFA Dperp + shift operation 2017-08-23 13:17:26 -04:00
Christopher Kelly
b61835c1a5 Added inplace version of intrinsic G-parity twist to hand-unrolled kernel 2017-08-23 12:33:48 -04:00
Azusa Yamaguchi
d9cd4f0273 Staggered multinode block cg debugged. Missing global sum.
Code stalls and resumes on KNL at cambridge. Curious.

CG iterations 23ms each, then 3200 ms pauses. Mean bandwidth reports
as 200MB/s. Comms dominant in the report. However, the time behaviour suggests it
is *bursty*.... Could be swap to disk?
2017-08-23 15:07:18 +01:00
David Murphy
459f70e8d4 Check-in of working Mobius EOFA class and tests 2017-08-22 22:38:30 -04:00
Christopher Kelly
061e48fd73 Replaced slow unpack-repack in G-parity BC twist with intrinsics version 2017-08-22 18:12:12 -04:00
Christopher Kelly
ab50145001 Implemented first, unoptimized version of hand-unrolled G-parity kernels
Improved Test_gparity
2017-08-22 17:12:25 -04:00
paboyle
b49bec0cec MAP_HUGETLB portability fix 2017-08-20 03:08:54 +01:00
paboyle
ae56e556c6 finalise issue on new OPA revert 2017-08-20 02:53:12 +01:00
paboyle
1cdf999668 Moving multicommunicator into mpi3 also for threading 2017-08-20 02:39:10 +01:00
paboyle
11062fb686 Comms none fail fix 2017-08-20 01:37:07 +01:00
paboyle
383ca7d392 Switch off comms for now until feature/multi-communicator is merged 2017-08-20 01:27:48 +01:00
paboyle
a446d95c33 Trying to pass TeamCity and Travis 2017-08-20 01:10:50 +01:00
paboyle
be66e7dd95 Merge branch 'develop' into feature/multi-communicator 2017-08-19 23:12:38 +01:00
paboyle
6d0d064a6c Update TODO 2017-08-19 23:11:30 +01:00
paboyle
bfef525ed2 New benchmark prep 2017-08-19 23:10:12 +01:00
Peter Boyle
0b0cf62193 Fix mpi 3 interface change 2017-08-19 13:18:50 -04:00
Peter Boyle
7d88198387 Merge branch 'develop' into feature/multi-communicator 2017-08-19 13:03:35 -04:00
Peter Boyle
2f619482b8 Enable blocking stencil send 2017-08-19 12:53:59 -04:00
Peter Boyle
d6472eda8d Use mmap 2017-08-19 12:53:18 -04:00
Peter Boyle
9e658de238 Use Vector 2017-08-19 12:52:44 -04:00
Peter Boyle
bcefdd7c4e Align both allocator calls to 2MB 2017-08-19 12:49:02 -04:00
David Murphy
9d45fca8bc Implement MobiusEOFAFermioncache.cc 2017-08-17 23:45:36 -04:00
David Murphy
ac9e6b63c0 More re-import of Mobius EOFA 2017-08-17 19:28:53 -04:00
David Murphy
e140b3f802 Beginning to re-import Mobius EOFA 2017-08-16 23:36:23 -04:00
David Murphy
d9d3d30cc7 Minor clean-up 2017-08-16 20:57:51 -04:00
David Murphy
47a12ec7b5 Implement EOFA pseudofermion force and Shamir tests for G-parity and non G-parity cases 2017-08-16 19:50:08 -04:00
David Murphy
ec1e2f7a40 Add (mostly implemented) ExactOneFlavourRatio pseudofermion class and tests of Shamir heatbath and action 2017-08-16 12:38:59 -04:00
David Murphy
41f73ec083 Add ChronoForecast class for forecasting solutions across poles in the EOFA heatbath 2017-08-16 12:37:38 -04:00
Guido Cossu
fd367d8bfd Debugging the PointerCache 2017-08-16 09:42:57 +01:00
David Murphy
6d0786ff9d Typo fixes and check-in of G-parity action test for DWF 2017-08-15 22:47:00 -04:00
David Murphy
b7f93aeb4d Change CayleyFermion5D::SetCoefficientsInternal to virtual to allow overriding in derived EOFA classes 2017-08-15 14:18:51 -04:00
David Murphy
202a7fe900 Re-import DWF and abstract base EOFA fermion classes and tests 2017-08-15 13:36:08 -04:00
Guido Cossu
8d168ded4a Correction of the dagger version of the Clover 2017-08-15 10:50:44 +01:00
Guido Cossu
8a3fe60a27 Added more asserts at grid creation time 2017-08-08 11:36:20 +01:00
Guido Cossu
44051aecd1 Checking for integer divisions in cartesian full 2017-08-08 10:31:12 +01:00
Guido Cossu
06e6f8de00 Check that the reduced dim is an integer 2017-08-08 10:22:12 +01:00
Guido Cossu
dbe4d7850c Make a test file compatible with all architectures 2017-08-06 10:49:45 +01:00
Guido Cossu
4fe182e5a7 Added high level HMC support for overriding default SIMD lane decomposition 2017-08-06 10:46:19 +01:00
Guido Cossu
75ee6cfc86 Debugging the Clover term 2017-08-04 16:08:07 +01:00
Guido Cossu
fde71c3c52 Merge branch 'develop' into feature/clover 2017-08-04 12:19:57 +01:00
Guido Cossu
175f393f9d Binary IO error checking 2017-08-04 12:14:10 +01:00
Christopher Kelly
7d867a8134 Merge branch 'develop' into feature/CG-reliable-update 2017-08-02 09:48:04 -04:00
Christopher Kelly
9939b267d2 Added switching to fallback linear operator in reliable update CG, and added recalculation of b parameter on update. 2017-07-31 13:39:44 -04:00
Lanny91
323e9c439a Hadrons: Legal banner fixes 2017-07-31 12:26:34 +01:00
Lanny91
28396f1048 Merge branch 'feature/rare_kaon' of https://github.com/Lanny91/Grid into feature/hadrons 2017-07-31 12:19:54 +01:00
Lanny91
67b34e5789 Modified conserved current 5th dimension loop for compatibility with 5D vectorisation. 2017-07-31 11:35:01 +01:00
Peter Boyle
14d53e1c9e Threaded MPI calls patches 2017-07-29 13:08:10 -04:00
Guido Cossu
8bd869da37 Correcting a bug in the IO routines 2017-07-27 15:12:50 +01:00
Guido Cossu
c7036f6717 Adding checks for libm and libstdc++ 2017-07-27 11:15:09 +01:00
Guido Cossu
c0485d799d Explicit parameter declaration in the WilsonGauge test 2017-07-26 16:26:04 +01:00
Guido Cossu
7abc5613bd Added smearing to the topological charge observable 2017-07-26 16:21:17 +01:00
Guido Cossu
237cfd11ab Solving the spurious O2 flags 2017-07-26 12:08:51 +01:00
Guido Cossu
a4b7dddb67 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-07-26 12:07:38 +01:00
Guido Cossu
5696781862 Debug error in Tensor mult 2017-07-26 12:07:34 +01:00
Christopher Kelly
8f4b3049cd Merge branch 'feature/CG-reliable-update' into ckelly_develop 2017-07-25 11:55:26 -04:00
Christopher Kelly
2a6e673a91 Merge branch 'develop' into feature/CG-reliable-update 2017-07-25 11:54:43 -04:00
Christopher Kelly
9b6cde173f Merge branch 'feature/CG-reliable-update' into ckelly_develop 2017-07-25 11:51:08 -04:00
Christopher Kelly
9f280b82c4 Added mixed-precision CG with reliable updates 2017-07-25 11:30:41 -04:00
c3f0889eda Merge pull request #123 from giltirn/develop
Fix for 'using namespace' in lib/qcd/utils/GaugeFix.h
2017-07-25 11:32:02 -03:00
Nils Meyer
7a53dc3715 Added integer reduce functionality 2017-07-24 11:12:59 +02:00
Christopher Kelly
0f214ad427 Moved FourierAcceleratedGaugeFixer into Grid::QCD namespace and removed 'using namespace' directives from header 2017-07-21 11:13:51 -04:00
Peter Boyle
fe4912880d Update README.md 2017-07-17 09:53:07 +01:00
Lanny91
875e1a841f Hadrons: updated Quark -> MFermion/GaugeProp module name in test. 2017-07-16 13:47:00 +01:00
Lanny91
0366288b1c Hadrons: added tests for 3pt contractions. 2017-07-16 13:45:55 +01:00
Lanny91
6293d438cd Hadrons: sink smearing compatibility for 3pt contraction modules. 2017-07-16 13:43:25 +01:00
Lanny91
852ade029a Hadrons: Added module to sink a propagator 2017-07-16 13:41:47 +01:00
Peter Boyle
f038c6babe Update README.md 2017-07-14 22:59:16 +01:00
Peter Boyle
169f4b2711 Update README.md 2017-07-14 22:56:06 +01:00
Peter Boyle
2d8aff36fe Update README.md 2017-07-14 22:52:16 +01:00
Guido Cossu
9fa07eecde Merge branch 'develop' into feature/json-fix 2017-07-12 15:47:22 +01:00
azusayamaguchi
659d7d1a40 For test/solver
Fixed
2017-07-12 15:01:48 +01:00
Guido Cossu
f64fb7bd77 Fix gcc error on JSON compilation 2017-07-12 14:55:42 +01:00
Guido Cossu
2a35449b91 Merge branch 'develop' into feature/json-fix 2017-07-12 14:47:00 +01:00
Guido Cossu
184af5bd05 Added support for std::pair in the JSON serialiser 2017-07-12 14:44:53 +01:00
Guido Cossu
097c9637ee Fixed the JSON parsing error 2017-07-11 14:31:57 +01:00
azusayamaguchi
dc6f078246 fixed the header file for mpi3 2017-07-11 14:15:08 +01:00
Peter Boyle
8a4714a4a6 Update README.md 2017-07-09 00:11:54 +01:00
Peter Boyle
40e119c61c NUMA improvements worth preserving from AMD EPYC tests 2017-07-08 22:27:11 -04:00
Guido Cossu
d9593c4b81 Merge branch 'develop' into feature/json-fix 2017-07-07 14:17:50 +01:00
paboyle
ac740f73ce Works on Cori 2017-07-02 16:47:58 -07:00
paboyle
75dc7794b9 Working on Cori 2017-07-02 16:47:42 -07:00
paboyle
dee68fc728 IO working multiple nodes again. Strategy of all nodes writing metadata is unsafe.
Only one rank should do this. must identify this rank. Means pass communicator to the
Objects.
2017-07-02 23:33:48 +01:00
paboyle
a2d3643634 Merge branch 'feature/dwf-multirhs' of https://github.com/paboyle/Grid into feature/dwf-multirhs 2017-07-02 14:59:22 -07:00
paboyle
57002924bc NERSC shakeout of this 2017-07-02 14:58:30 -07:00
Peter Boyle
7b0237b081 Update README.md 2017-07-01 10:24:41 +01:00
Peter Boyle
b68ad0cc0b Update README.md 2017-07-01 10:20:07 +01:00
Peter Boyle
37263fd9b1 Update README.md 2017-07-01 10:06:24 +01:00
Peter Boyle
3d09e3e9e0 Update README.md 2017-07-01 10:05:46 +01:00
Peter Boyle
1354b46338 Update README.md 2017-07-01 10:04:32 +01:00
Peter Boyle
251a97fe1b Update README.md 2017-07-01 09:55:36 +01:00
Peter Boyle
e18929eaa0 Update README.md 2017-07-01 09:53:15 +01:00
Peter Boyle
f3b0a92e71 Update README.md 2017-07-01 09:48:00 +01:00
Peter Boyle
a0be3f7330 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-06-30 10:53:50 +01:00
Peter Boyle
b5a6e4f1fd Best option for Xeon cache blocking set 2017-06-30 10:53:22 +01:00
Peter Boyle
7a788db3dc Guard first touch 2017-06-30 10:49:08 +01:00
Peter Boyle
f20eceb6cd First touch once per page in a threaded loop 2017-06-30 10:48:27 +01:00
Peter Boyle
38325ebbc6 Interleave code path; not enabled 2017-06-30 10:23:51 +01:00
Peter Boyle
b73bd151bb Switch off counters by default 2017-06-30 10:16:35 +01:00
Peter Boyle
694b305cab Update to reporting 2017-06-30 10:16:13 +01:00
Peter Boyle
2d3737a133 O3, KNL 2017-06-30 10:15:59 +01:00
Peter Boyle
ac1f1838bc KNL only 2017-06-30 10:15:32 +01:00
Guido Cossu
09d09d0fe5 Update README.md 2017-06-29 11:48:11 +01:00
Guido Cossu
bf630a6821 README file update 2017-06-29 11:42:25 +01:00
Guido Cossu
8859a151cc Small corrections to the NEON port 2017-06-29 11:30:29 +01:00
Guido Cossu
688a39cfd9 Merge pull request #114 from nmeyer-ur/feature/arm-neon
ARM neon intrinsics support
Guido: checked and approved
2017-06-29 09:57:17 +01:00
paboyle
6f5a5cd9b3 Improved threaded comms benchmark 2017-06-28 23:27:02 +01:00
Nils Meyer
0933aeefd4 corrected Grid_neon.h 2017-06-28 20:22:22 +02:00
Peter Boyle
322f61acee Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-06-28 15:30:35 +01:00
Peter Boyle
08e04b9676 Better benchmarks 2017-06-28 15:30:06 +01:00
feaa2ac947 Merge branch 'feature/scalar-hmc-update' into develop 2017-06-28 12:46:18 +01:00
07de925127 minor scalar action fixes 2017-06-28 12:45:44 +01:00
Nils Meyer
a9c816a268 moved file to correct folder 2017-06-27 21:39:15 +02:00
Nils Meyer
e43a8b6b8a removed comments 2017-06-27 20:58:48 +02:00
Nils Meyer
bf729766dd removed collision with QPX implementation 2017-06-27 20:32:24 +02:00
Guido Cossu
dafb351d38 Merge pull request #120 from paboyle/feature/scalar-hmc-update
Scalar HMC update. 
I agree with the changes.
2017-06-27 16:23:14 +01:00
0b707b861c Merge branch 'develop' into feature/scalar-hmc-update 2017-06-27 14:40:05 +01:00
15e87a4607 HDF5 IO fix 2017-06-27 14:39:27 +01:00
7d7220cbd7 scalar: lambda/4! convention 2017-06-27 14:38:45 +01:00
Lanny91
7d2d5e8d3d Merge branch 'develop' of https://github.com/paboyle/Grid into feature/hadrons 2017-06-26 15:19:46 +01:00
paboyle
54e94360ad Experimental: Multiple communicators to see if we can avoid thread locks in --enable-comms=mpit 2017-06-24 23:10:24 +01:00
0af740dc15 minor scalar HMC code improvement 2017-06-24 23:04:05 +01:00
d2e8372df3 SU(N) algebra fix (was not working) 2017-06-24 23:03:39 +01:00
paboyle
869b99ec1e Threaded calls to multiple communicators 2017-06-24 10:55:54 +01:00
paboyle
4a29ab0d0a Merge branch 'feature/dwf-multirhs' of https://github.com/paboyle/Grid into feature/dwf-multirhs 2017-06-23 23:10:43 +01:00
paboyle
0165bcb58e Added an update to TODO list 2017-06-23 23:10:24 +01:00
Lanny91
deca1ecc50 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/rare_kaon 2017-06-23 19:35:19 +02:00
4372d04ad4 Merge pull request #118 from Lanny91/hotfix/bgq
Hotfix/bgq
2017-06-23 16:59:27 +01:00
paboyle
349d75e483 Precision fix 2017-06-23 02:57:59 -07:00
Lanny91
56abbdf4c2 AVX512 integer reduce fix (for non-intel compiler) 2017-06-23 11:09:14 +02:00
Lanny91
af71c63f4c AVX2 fix 2017-06-23 11:03:12 +02:00
paboyle
e51475703a Ticking off lots on the TODO list 2017-06-23 09:42:21 +01:00
paboyle
1feddf4ba6 const fixes 2017-06-22 19:32:41 +01:00
paboyle
600d7ddc2e Proof of concept : Multi RHS solver, running independent solves on different ranks 2017-06-22 18:54:34 +01:00
paboyle
e504260f3d Able to run a test job splitting into multiple MPI subdomains. 2017-06-22 18:53:11 +01:00
Lanny91
0440d4ce66 Merge branch 'develop' of https://github.com/paboyle/Grid into hotfix/bgq 2017-06-22 17:09:42 +02:00
Lanny91
08b0e472aa Fixed hadrons tests after merge 2017-06-22 16:34:33 +02:00
Lanny91
c11d69787e Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/rare_kaon
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/Modules/MFermion/GaugeProp.hpp
#	extras/Hadrons/modules.inc
#	tests/hadrons/Test_hadrons.hpp
#	tests/hadrons/Test_hadrons_meson_3pt.cc
2017-06-22 16:26:31 +02:00
Lanny91
dc6b2d30d2 Documentation fix 2017-06-22 16:09:45 +02:00
Lanny91
7a3bd5c66c Hadrons: new conserved current contraction test (for regression testing) 2017-06-22 16:06:15 +02:00
Lanny91
18211eb5b1 Hadrons: Fixed test to use new implementation of meson module. 2017-06-22 16:03:59 +02:00
Lanny91
863bb2ad10 Moving overly-specialised code out of Grid 2017-06-22 16:02:15 +02:00
paboyle
5e4bea8f20 Benchmark DWF works 2017-06-22 08:38:54 +01:00
paboyle
6ebf9f15b7 Splitting communicators first cut 2017-06-22 08:14:34 +01:00
paboyle
1d7aa673a4 Include BlockCG by default 2017-06-21 21:08:53 +01:00
paboyle
b9104f3072 Block CG 2017-06-21 21:08:03 +01:00
b22eab8c8b Merge commit 'a7d56523abee6c9030fdd9303c79954897b1086f' into feature/hadrons 2017-06-21 18:32:48 +01:00
paboyle
a7d56523ab Merge branch 'feature/lanczos-simplify' into develop 2017-06-21 14:03:20 +01:00
paboyle
9e56c65730 Updated TODO list 2017-06-21 14:02:58 +01:00
paboyle
ef4f2b8c41 todo update 2017-06-21 09:22:20 +01:00
paboyle
e8b95bd35b Clean up finished. Could shrink Lanczos to around 400 lines at a push 2017-06-21 02:50:09 +01:00
paboyle
7e35286860 Simplified lanczos, added Eigen diagonalisation.
Curious if we can deprecate dependencly on BLAS.
Will see when we get 48^3 running on our BG/Q port
2017-06-21 02:26:03 +01:00
paboyle
0486ff8e79 Improved the lancos 2017-06-20 18:46:01 +01:00
1e8a2e1621 various compatibility fixes after merge 2017-06-20 17:24:55 +01:00
7587df831a Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/qcd/action/scalar/ScalarImpl.h
2017-06-20 15:50:39 +01:00
Azusa Yamaguchi
e9cc21900f Block solver complete for staggered. Now stable on mass 0.003 and
gives 8x (!) speed up on Haswell laptop vs. standard CG for 8 RHS solves.

166 iterations vs. 537 iterations so algorithmic gain + 2x in flop rate gain.

Better than a slap in the face with a wet kipper.
2017-06-20 12:37:41 +01:00
Azusa Yamaguchi
0a8faac271 Fix make tests compile 2017-06-19 22:54:18 +01:00
Azusa Yamaguchi
abc4de0fd2 No compile make tests fix 2017-06-19 22:03:03 +01:00
b672717096 Test_serialiation update for JSON 2017-06-19 14:38:39 +01:00
284ee194b1 JSON update 2017-06-19 14:38:15 +01:00
Azusa Yamaguchi
cfe3cd76d1 Block solver improvements 2017-06-19 14:04:21 +01:00
Azusa Yamaguchi
3fa5e3109f Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-06-19 14:01:44 +01:00
paboyle
8b7049f737 Improved detectino of usqcdInfo for plaq/linktr 2017-06-19 08:46:07 +01:00
paboyle
c85024683e Merge branch 'feature/parallelio' into develop 2017-06-19 01:39:48 +01:00
paboyle
1300b0b04b Update to enable multiple records per file more consistent with SciDAC.
open, close, write records...
2017-06-19 01:01:48 +01:00
paboyle
e6d984b484 ILDG tests 2017-06-18 00:13:22 +01:00
paboyle
1d18d95d4f Class name return 2017-06-18 00:13:03 +01:00
paboyle
ae39ec85a3 ComplexField defined 2017-06-18 00:12:48 +01:00
paboyle
b96daf53a0 Query tensor structures 2017-06-18 00:12:15 +01:00
paboyle
46879e1658 Complex defined in Impl even for gauge. 2017-06-18 00:11:45 +01:00
paboyle
ae4de94798 SciDAC I/O support 2017-06-18 00:11:23 +01:00
paboyle
0ab555b4f5 SciDAC I/O and ILDG improvements 2017-06-18 00:11:02 +01:00
paboyle
8e9be9f84f Updates for SciDAC IO 2017-06-18 00:10:42 +01:00
paboyle
d572170170 Update for SciDAC 2017-06-18 00:10:20 +01:00
81b18f843a Merge branch 'feature/scalar_adjointFT' into feature/hadrons
# Conflicts:
#	lib/qcd/action/scalar/ScalarImpl.h
2017-06-16 17:59:55 +01:00
Lanny91
1bd311ba9c Faster sequential conserved current implementation, now compatible with 5D vectorisation & G-parity. 2017-06-16 16:43:15 +01:00
Lanny91
41af8c12d7 Code cleaning for conserved current contractions. Will now be easier to implement mobius conserved current. 2017-06-16 16:38:59 +01:00
Lanny91
a833f88c32 Added missing SIMD integer reduction implementation for AVX, AVX-512, SSE4, IMCI 2017-06-16 15:58:47 +01:00
Lanny91
07b2c1b253 Placeholder precision change functions to allow Grid to compile with QPX (warning: no actual functionality) 2017-06-16 15:04:26 +01:00
Lanny91
735cbdb983 QPX Integer reduction (+ integer reduction test) 2017-06-14 10:55:10 +01:00
Lanny91
2ad54c5a02 QPX exchange support 2017-06-14 10:53:39 +01:00
paboyle
12ccc73cf5 Serialisation no compile fix 2017-06-14 05:19:17 +01:00
Nils Meyer
3d04dc33c6 ARM neon intrinsics support 2017-06-13 13:26:59 +02:00
paboyle
e7564f8330 Starting a test for reading an ILDG file. 2017-06-13 12:22:50 +01:00
paboyle
91199a8ea0 openmpi is not const safe 2017-06-13 12:21:29 +01:00
paboyle
0494feec98 Libz dependency 2017-06-13 12:00:23 +01:00
paboyle
a16b1e134e gcc 4.9 fix 2017-06-13 10:48:43 +01:00
James Harrison
20e92a7009 QedVFol: Allow output of scalar propagator and vacuum polarisation projected to arbitrary lattice momentum, not just zero-momentum. 2017-06-12 18:27:32 +01:00
Lanny91
5633a2db20 Faster implementation of conserved current site contraction. Added 5D vectorised support, but not G-parity. 2017-06-12 10:41:02 +01:00
Lanny91
2d433ba307 Changed header include guards to match new convention 2017-06-12 10:32:14 +01:00
paboyle
769ad578f5 Odd new error on G++ 49 on travis 2017-06-12 00:41:21 +01:00
paboyle
eaac0044b5 Compile fixes 2017-06-12 00:20:49 +01:00
paboyle
56042f002c New files 2017-06-11 23:19:20 +01:00
paboyle
3bfd1f13e6 I/O improvements 2017-06-11 23:14:10 +01:00
James Harrison
42f0afcbfa QedFVol: Output all scalar VP diagrams separately 2017-06-09 18:08:40 +01:00
Azusa Yamaguchi
70ab598c96 Move gfix into utils 2017-06-08 22:22:23 +01:00
Azusa Yamaguchi
1d0ca65e28 Move Gfix into utils 2017-06-08 22:21:50 +01:00
Azusa Yamaguchi
2bc4d0a20e Move code into utils 2017-06-08 22:21:25 +01:00
James Harrison
20ac13fdf3 QedFVol: add ChargedProp as an input to ScalarVP module, instead of calculating scalar propagator within ScalarVP. 2017-06-08 17:43:39 +01:00
2490816297 Hadrons: rare kaon program removed 2017-06-07 20:11:02 -05:00
5f55bca378 Hadrons: Quark module renamed MFermion::GaugeProp 2017-06-07 20:10:48 -05:00
James Harrison
e38612e6fa QedFVol: Update ScalarVP module for compatibility with new scalar action 2017-06-07 17:42:00 +01:00
James Harrison
c2b2b71c5d Merge branch 'feature/qed-fvol' of https://github.com/paboyle/Grid into feature/qed-fvol
# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/modules.inc
2017-06-07 16:59:47 +01:00
James Harrison
009f48a904 QedFVol: Add missing factor of 2 in free vacuum polarisation 2017-06-07 16:34:09 +01:00
Lanny91
b8e45ae490 Fixed remaining fermion type aliases after merge. 2017-06-07 16:26:22 +01:00
Lanny91
b35fc4e7f9 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/rare_kaon
# Conflicts:
#	extras/Hadrons/Global.hpp
#	tests/hadrons/Test_hadrons_rarekaon.cc
2017-06-07 14:38:51 +01:00
Lanny91
60f11bfd72 Removed redundant test module 2017-06-07 12:34:47 +01:00
f6aa82b7f2 Merge branch 'develop' into feature/hadrons 2017-06-06 11:46:33 -05:00
22749699a3 Fixes after merge and point sink module 2017-06-06 11:45:30 -05:00
Lanny91
8d442b502d Sequential current fix for spacial indices. 2017-06-06 17:06:40 +01:00
Lanny91
e5c8b7369e Boundary condition option in quark actions for hadrons tests. 2017-06-06 14:19:10 +01:00
0503c028be Merge branch 'feature/qed-fvol' into feature/hadrons (non-trivial conflicts on scalar Impl)
# Conflicts:
#	configure.ac
#	lib/qcd/action/scalar/Scalar.h
2017-06-05 16:37:47 -05:00
Lanny91
c504b4dbad Code cleaning 2017-06-05 15:56:43 +01:00
Lanny91
622a21bec6 Improvements to sequential conserved current test and small bugfix. 2017-06-05 15:55:32 +01:00
Lanny91
eec79e0a1e Ward Identity test improvements and conserved current bug fixes 2017-06-05 11:55:41 +01:00
paboyle
092dcd4e04 MPI I/O only if MPI compiled 2017-06-02 22:50:25 +01:00
Guido Cossu
4a8c4ccfba Test wilson flow, added maxTau for adaptive flow 2017-06-02 17:02:29 +01:00
Guido Cossu
9b44189d5a Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-06-02 16:56:00 +01:00
Guido Cossu
7da4856e8e Wilson flow with adaptive steps 2017-06-02 16:55:53 +01:00
Guido Cossu
aaf1e33a77 Adding adaptive integration in the WilsonFlow 2017-06-02 16:32:35 +01:00
paboyle
094c3d091a Improved and RNG's now survive checkpoint 2017-06-02 00:38:58 +01:00
Peter Boyle
4b98e524a0 Roll over to MPI version of I/O 2017-06-01 17:38:18 -04:00
Peter Boyle
1a1f6d55f9 Roll over to MPI IO for parallel IO 2017-06-01 17:37:26 -04:00
Peter Boyle
21421656ab Big changes improving the code to use MPI IO 2017-06-01 17:36:53 -04:00
Peter Boyle
6f687a67cd As local vols increase, use 64 bits for safety 2017-06-01 17:36:18 -04:00
paboyle
b30754e762 Merge branch 'feature/parallelio' of https://github.com/paboyle/Grid into feature/parallelio 2017-05-30 23:41:28 +01:00
paboyle
1e429a0d57 Added MPI version 2017-05-30 23:41:07 +01:00
paboyle
d38a4de36c Beginning move to MPI IO 2017-05-30 23:40:39 +01:00
paboyle
ef1b7db374 Diff comparison check 2017-05-30 23:40:11 +01:00
paboyle
53a9aeb965 Cosmetic only 2017-05-30 23:39:53 +01:00
paboyle
e30fa9f4b8 RankCount; need to clean up ambigious ProcessCount 2017-05-30 23:39:16 +01:00
paboyle
58e8d0a10d reverse direction lexico mapping 2017-05-30 23:38:30 +01:00
paboyle
62cf9cf638 Cleaner code 2017-05-30 23:38:02 +01:00
paboyle
0fb458879d Precision safe compile 2017-05-30 23:37:02 +01:00
Peter Boyle
725c513d94 Better MPI3 benchmarking 2017-05-29 16:47:32 -04:00
d8648307ff Merge branch 'develop' into feature/hadrons 2017-05-29 12:58:08 +01:00
064315c00b Hadrons: mesons gamma list fix 2017-05-29 12:57:33 +01:00
Guido Cossu
7c6cc85df6 Updating WilsonFlow test 2017-05-27 18:03:49 +01:00
Guido Cossu
a6691ef87c Merge pull request #110 from Lanny91/feature/hadrons
Hadrons: Fermion boundary conditions can now be set in measurement code.
2017-05-26 16:43:22 +01:00
Lanny91
23135aa58a Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/rare_kaon 2017-05-26 16:00:50 +01:00
Lanny91
8e0ced627a Hadrons: Fermion boundary conditions can now be set in measurement code. 2017-05-26 15:59:15 +01:00
Guido Cossu
0de314870d Faster derivative for WilsonGauge 2017-05-26 14:31:49 +01:00
Guido Cossu
ffb91e53d2 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-26 12:46:02 +01:00
Guido Cossu
f4e8bf2858 Fixing the topological charge. Wilson Flow tested, ok 2017-05-26 12:45:59 +01:00
a74c34315c Bootstrap script fix 2017-05-25 14:27:49 +01:00
paboyle
69470ccc10 Update to do list 2017-05-25 13:41:26 +01:00
paboyle
b8b5934193 Attempts to speed up the parallel IO 2017-05-25 13:32:24 +01:00
Guido Cossu
75856f2945 Compilation fix in the Tensor_exp 2017-05-25 12:44:56 +01:00
Guido Cossu
3c112a7a25 Small correction to the general exp definition 2017-05-25 12:09:00 +01:00
Guido Cossu
ab3596d4d3 Using Cayley-Hamilton form for the exponential of SU(3) matrices 2017-05-25 12:07:47 +01:00
paboyle
a8c10b1933 Use a global-X x Local-Y chunksize for parallel binary I/O.
Gives O(32 x 8 x 18*8*8) chunk size on configuration I/O.

At 150KB should be getting close to packet sizes and 4MB filesystem
block sizes that are reasonably (!?) performant. We shall see once I move
this off my laptop and over to BNL and time it.
2017-05-25 11:43:33 +01:00
Guido Cossu
15e801af3f Fixing a compilation error for generic SIMD 2017-05-19 16:39:36 +01:00
Guido Cossu
0ffc235741 Adding more statistics to the Benchmark_comms. Min and max 2017-05-19 10:55:04 +01:00
Guido Cossu
8e19c99c7d Adding more statistical info in the Benchmark_comms 2017-05-18 19:07:35 +01:00
Guido Cossu
a0bc0ad06f Reverting change in Bechmark_comms. Keeping 300 iterations 2017-05-18 17:48:11 +01:00
Guido Cossu
a8fb2835ca Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-18 14:45:00 +01:00
Guido Cossu
bc862ce3ab Fixing an allocation issue in Benchmark_comms 2017-05-18 14:44:56 +01:00
Lanny91
08b314fd0f Hadrons: conserved current test fixes. Axial current tests now also optional. 2017-05-18 13:16:14 +01:00
22f4feee7b Merge branch 'develop' into feature/scalar_adjointFT 2017-05-17 13:27:13 +02:00
3f858d6755 Scalar: phi^2 observable 2017-05-17 13:25:14 +02:00
paboyle
3267683e22 Union workaround for g++ 2017-05-17 11:26:18 +01:00
Azusa Yamaguchi
f46a67ffb3 No compile issue on clang on mac fixed.
Compiler version was clang++-3.9 under mpicxx
2017-05-17 10:51:01 +01:00
paboyle
f7b8383ef5 Half precisoin comms mixed prec test 2017-05-16 14:52:51 +01:00
Guido Cossu
10f2872aae Faster exponentiation for lattice fields 2017-05-15 15:51:16 +01:00
Lanny91
34332fe393 Improvement to sequential conserved current insertion tests 2017-05-12 16:30:43 +01:00
Lanny91
c2010f21ab Added sequential propagator test for gamma matrix insertion 2017-05-12 16:23:01 +01:00
Lanny91
98f610ce53 Reduced code duplication in hadron tests 2017-05-12 16:15:26 +01:00
Lanny91
d44cc204d1 Added test module for sequential gamma matrix insertion 2017-05-12 14:58:17 +01:00
35fa3d1dfd Merge branch 'master' into feature/scalar_adjointFT 2017-05-12 10:41:39 +01:00
paboyle
cd73897b8d Merge branch 'release/v0.7.0' into develop 2017-05-12 01:16:02 +01:00
paboyle
c4435e6beb Merge branch 'release/v0.7.0' 2017-05-12 01:15:59 +01:00
paboyle
7a8f6af5f8 Drop verbose compiler predefine check 2017-05-11 12:48:40 +01:00
paboyle
49a5d9bac7 Clang major, minor trailing underscore 2017-05-11 12:25:02 +01:00
paboyle
2b3fdd4a58 Print CXX predefines 2017-05-11 12:05:50 +01:00
paboyle
34502ec471 4.8 dropped as buggy. 2017-05-11 11:43:39 +01:00
paboyle
8a43e88b4f Compiler check early in build 2017-05-11 11:43:06 +01:00
d1ece74137 HMC scalar test: magnetisation measurement 2017-05-11 11:40:44 +01:00
paboyle
238df20370 Still working on the compiler compat checks 2017-05-11 11:30:14 +01:00
paboyle
97a32a6145 Add 4.8 test 2017-05-11 11:24:21 +01:00
paboyle
655492a443 Compiler detection 2017-05-11 11:21:11 +01:00
paboyle
1cab06f6bd Compat checks for compilers 2017-05-11 10:20:24 +01:00
43c817cc67 Scalar action: const fix 2017-05-11 00:07:17 +01:00
paboyle
f8024c262b Update Eigen 2017-05-10 13:30:09 +01:00
Guido Cossu
4cc5f01f4a Small change in the readme about the intel compiler 2017-05-09 15:38:59 +01:00
James Harrison
5cfc0180aa QedFVol: Output free VP along with charged VP. 2017-05-09 12:46:57 +01:00
James Harrison
914f180fa3 QedFVol: Implement exact O(alpha) vacuum polarisation. 2017-05-09 11:46:25 +01:00
Guido Cossu
9c12c37aaf Confirming the fix on the complex boundary conditions 2017-05-09 08:41:29 +01:00
Guido Cossu
806eaa0530 Adding back the IO tests in the list 2017-05-08 22:26:44 +01:00
Guido Cossu
01d0e54594 Merge branch 'release/v0.7.0' into develop 2017-05-08 22:02:51 +01:00
Guido Cossu
5aafa335fe Fixing JSON error for complex numbers 2017-05-08 21:56:44 +01:00
Guido Cossu
8ba0494485 Fixing JSON for complex numbers 2017-05-08 21:41:39 +01:00
Peter Boyle
d99d98d9fd Merge branch 'release/v0.7.0' of https://github.com/paboyle/Grid into release/v0.7.0 2017-05-08 15:08:20 -04:00
Peter Boyle
95a017a4ae Relax force constraints to pass in single precision. 2017-05-08 15:06:41 -04:00
paboyle
92f92379e6 Adding olivers test version 2017-05-08 18:42:19 +01:00
paboyle
529e78d43f Restart the v0.7.0 release 2017-05-08 18:20:04 +01:00
paboyle
4ec746d262 Merge branch 'release/v0.7.0' into develop 2017-05-06 18:43:03 +01:00
paboyle
51bf1501fc Merge branch 'release/v0.7.0' 2017-05-06 18:42:50 +01:00
paboyle
66d819c054 More info on gcc bug 2017-05-06 18:42:11 +01:00
paboyle
3f3686f869 formattign 2017-05-06 18:41:27 +01:00
paboyle
26bb829f8c Formatting 2017-05-06 18:40:55 +01:00
paboyle
67cb04fc66 README update 2017-05-06 18:39:54 +01:00
paboyle
a40bd68aed Version update 2017-05-06 17:00:14 +01:00
paboyle
36495e0fd2 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-06 16:39:27 +01:00
paboyle
93f6c15772 Warning squash 2017-05-06 16:38:58 +01:00
Peter Boyle
cb93eeff21 Update README 2017-05-06 16:28:12 +01:00
paboyle
c7cc7e6101 Fix 2017-05-06 16:10:09 +01:00
paboyle
c349aa6511 DEFINE warning elimination 2017-05-06 16:08:35 +01:00
paboyle
3bae0a2d5c Drop a gcc warning 2017-05-06 15:51:42 +01:00
paboyle
c1c7566089 GCC bug work around in 5.0 through 6.2 inclusive. 2017-05-06 15:20:25 +01:00
paboyle
2439999ec8 Warning elimination; drop to -O2 on G++ bad versions 2017-05-06 14:44:49 +01:00
paboyle
1d96f662e3 Fixed 4d fermion gparity force. Put strong tests on make check force tests 2017-05-06 00:46:31 +01:00
paboyle
41d1889941 trusty ubuntu 2017-05-05 21:25:35 +01:00
paboyle
0c3981e0c3 Trying to force recent automake 2017-05-05 21:15:22 +01:00
paboyle
c727bd4609 Trying to work around automake version 2017-05-05 21:00:00 +01:00
paboyle
db23749b67 Adding travis to make check 2017-05-05 20:42:08 +01:00
paboyle
751f2b9703 Better check and benchmark driving 2017-05-05 19:54:38 +01:00
Guido Cossu
741bc836f6 Exposing support for Ncolours and Ndimensions and JSON input file for the ScalarAction 2017-05-05 17:36:43 +01:00
James Harrison
6cb563a40c QedFVol: Access HVP tensor using a vector<vector<ScalarField>> instead of vector<vector<ScalarField*>> 2017-05-05 17:12:41 +01:00
paboyle
697c0603ce SITMO I/O for NERSC working now bit repro 2017-05-05 16:54:44 +01:00
paboyle
14bedebb11 Source pointed to 2017-05-05 16:17:27 +01:00
Guido Cossu
8546d01a4c Merge branch 'develop' into feature/scalar_adjointFT 2017-05-05 15:47:33 +01:00
paboyle
47b5c07ffb Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-05 14:27:02 +01:00
Guido Cossu
da86a2bf54 Merge branch 'feature/hmc_generalise' into develop 2017-05-05 14:23:02 +01:00
paboyle
c1cb60a0b3 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-05 14:22:37 +01:00
Guido Cossu
5ed5b4bfbf Merge branch 'develop' into feature/hmc_generalise 2017-05-05 14:22:33 +01:00
Guido Cossu
de84aacdfd Fixing a configure error for the smearing tests 2017-05-05 13:59:10 +01:00
paboyle
2888003765 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-05 13:02:24 +01:00
paboyle
da06bf5b95 Zmobius force test added 2017-05-05 12:52:45 +01:00
Guido Cossu
20999c1370 Merge branch 'develop' into feature/hmc_generalise 2017-05-05 12:47:17 +01:00
Lanny91
77e0af9c2e Compilation fix after merge - conserved current code not yet operational for vectorised 5D or Gparity Impl. 2017-05-05 12:27:50 +01:00
paboyle
33f0ed1a33 No compile fix 2017-05-05 11:04:30 +01:00
paboyle
50be56433b Delete old and defunct tests 2017-05-04 23:41:16 +01:00
paboyle
43924007db Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-04 19:53:41 +01:00
paboyle
78ef10e60f Mobius force improvement 2017-05-04 19:53:21 +01:00
Lanny91
ca1077c560 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/rare_kaon
# Conflicts:
#	lib/qcd/action/fermion/WilsonFermion5D.cc
#	tests/hadrons/Test_hadrons_rarekaon.cc
2017-05-04 16:22:33 +01:00
679ae98b14 Merge branch 'feature/better-external-library' into develop 2017-05-04 15:42:12 +01:00
paboyle
90f6bc16bb No compile clang fix 2017-05-04 12:15:06 +01:00
Peter Boyle
9b5b639546 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-03 20:51:40 -04:00
Peter Boyle
945767c6d8 More info 2017-05-03 20:26:35 -04:00
Peter Boyle
422cdf4979 Some checks 2017-05-03 18:37:38 -04:00
Peter Boyle
38db174f3b Print statement 2017-05-03 18:25:26 -04:00
Peter Boyle
92e364a35f Better reporting in benchmark for MPI3 2017-05-03 15:43:36 -04:00
James Harrison
db3837be22 QedFVol: Change “double” to “Real” in ScalarVP.cc 2017-05-03 13:26:49 +01:00
James Harrison
2f0dd83016 Calculate HVP using a single contraction of O(alpha) charged propagators. 2017-05-03 12:53:41 +01:00
58299b8ba2 Git info separated from version in git-config 2017-05-02 20:04:41 +01:00
124bf4d829 git ref in config summary 2017-05-02 19:41:01 +01:00
e8e56b3414 Config summary saved in git-config 2017-05-02 19:40:47 +01:00
89c430136d grid-config program 2017-05-02 19:13:13 +01:00
ea9aef7baa New header for standard headers (was an issue with Remez.h and external compilation) 2017-05-02 18:26:11 +01:00
c9e9e8061d Merge branch 'feature/hadrons' into develop 2017-05-02 18:23:47 +01:00
Guido Cossu
453cf2a1c6 Moving the topological charge outside the HMC related routines 2017-05-02 14:40:12 +01:00
Guido Cossu
de7bbfa5f9 Adding ParameterFile option for the HMC 2017-05-02 12:16:16 +01:00
dda8d77c87 Merge branch 'feature/hadrons' into feature/rare_kaon 2017-05-01 17:50:57 +01:00
aa29f4346a Hadrons: weird bus error with recent macOS clang 2017-05-01 17:49:08 +01:00
Guido Cossu
86116dbed6 Adding boundary condition switch (compile time) for the Mobius HMC example 2017-05-01 16:33:11 +01:00
Guido Cossu
7bd31e3f7c Adding external file support in the Mobius example (JSON) 2017-05-01 16:30:24 +01:00
Guido Cossu
74f451715f Fix for Mac compilation on the size_t uint64_t types 2017-05-01 15:12:07 +01:00
Guido Cossu
655be8ed76 Adding tests for the mobius operator 2017-05-01 14:42:16 +01:00
Guido Cossu
4063238943 Adding HMC test file example for Mobius + smearing 2017-05-01 13:44:00 +01:00
Guido Cossu
3344788fa1 Merge branch 'develop' into feature/hmc_generalise 2017-05-01 12:13:56 +01:00
Guido Cossu
62a64d9108 EO support, wip 2017-05-01 11:06:21 +01:00
Lanny91
49331a3e72 Minor improvements to Ward Identity checks 2017-04-28 16:50:17 +01:00
Lanny91
51d84ec057 Bugfixes in Wilson 5D sequential conserved current insertion 2017-04-28 16:49:14 +01:00
Lanny91
db14fb30df Hadrons: overhaul of conserved current test 2017-04-28 16:48:00 +01:00
Lanny91
b9356d3866 Added more complete test of sequential insertion of conserved current. 2017-04-28 16:46:40 +01:00
Guido Cossu
99a73f4287 Correcting the M and Mdag in the clover term 2017-04-28 15:51:05 +01:00
Lanny91
f302eea91e SitePropagator redefined to be a scalar object in TYPE_ALIASES. 2017-04-28 15:27:49 +01:00
Guido Cossu
5553b8d2b8 Clover term compiles, not tested 2017-04-28 15:23:34 +01:00
Lanny91
a6ccbbe108 Conserved current sequential source now registered properly and fixed module inputs. 2017-04-28 10:43:47 +01:00
James Harrison
3ac27e5596 QedFVol: remove unnecessary copies of free propagator from shifted sources in ScalarVP 2017-04-27 14:17:50 +01:00
Peter Boyle
99220f6531 Fixes and better timing 2017-04-26 17:24:11 -04:00
Lanny91
d2003f24f4 Corrected incorrect usage of ExtractSlice for conserved current code. 2017-04-26 17:25:28 +01:00
Lanny91
6299dd35f5 Hadrons: Added test of conserved current code. Tests Ward identities for conserved vector and partially conserved axial currents. 2017-04-26 12:41:39 +01:00
Lanny91
a39daecb62 Removed make_5D const declaration to avoid compilation error 2017-04-26 12:39:07 +01:00
Lanny91
159770e21b Legal Banners added 2017-04-26 09:32:57 +01:00
paboyle
2a6d093749 move the sudo: required to match locatoin on Guido's branch 2017-04-26 09:15:34 +01:00
paboyle
c947947fad sudo required suggested by guido 2017-04-26 08:45:36 +01:00
paboyle
f555b50547 Merge branch 'feature/half-prec-comms' into develop 2017-04-26 08:43:40 +01:00
paboyle
738c1a11c2 longer nloop 2017-04-26 08:43:20 +01:00
Peter Boyle
f8797e1e3e bug fix. works now and great face performance 2017-04-26 03:14:02 -04:00
Peter Boyle
fd1eb7de13 Clean implementation of the exterior faces listing only those points on the boudary 2017-04-26 02:34:52 -04:00
Peter Boyle
2ce898efa3 Pretty code 2017-04-26 02:34:25 -04:00
Lanny91
dc5a6404ea Hadrons: modules for testing conserved current contractions and sequential insertion. 2017-04-25 22:08:33 +01:00
Lanny91
44260643f6 First conserved current implementation for Wilson fermions only. Not implemented for Gparity or 5D-vectorised Wilson fermions. 2017-04-25 18:00:24 +01:00
Lanny91
1425afc72f Rare Kaon test fix 2017-04-25 17:26:56 +01:00
James Harrison
bd466a55a8 QedFVol: remove charge dependence in chargedProp function of ScalarVP 2017-04-25 10:04:03 +01:00
paboyle
ab66bac4e6 Think I'm getting on top of the reduced cost exterior precomputed list of links 2017-04-25 08:50:26 +01:00
paboyle
56277a11c8 Build a list of whats on the surface 2017-04-24 17:06:15 +01:00
Guido Cossu
752048f410 Merge branch 'develop' into feature/clover 2017-04-24 14:41:20 +01:00
paboyle
916e9e1d3e Merge branch 'feature/half-prec-comms' of https://github.com/paboyle/Grid into feature/half-prec-comms 2017-04-24 10:39:19 +01:00
Peter Boyle
5b55867a7a Slightly cheaper Ext assembly 2017-04-24 05:36:11 -04:00
Peter Boyle
3accb1ef89 Debugged assemply split phase with interior suppression 2017-04-23 19:30:19 -04:00
Peter Boyle
e3d0e31525 Debugged assemply split phase with interior suppression 2017-04-23 19:29:27 -04:00
Peter Boyle
5812eb8a8c Partially fixed. But the comms-overlap does not work yet. 2017-04-22 18:50:25 -04:00
paboyle
4dd3763294 Use OMP as much as possible 2017-04-22 20:35:20 +01:00
paboyle
c429ace748 Cleaner OpenMP use 2017-04-22 20:28:42 +01:00
paboyle
ac58565d0a Dangerous rewrite of the assembly. If I make a mistake the debug will be painful. 2017-04-22 19:31:04 +01:00
paboyle
3703b718aa Mark up a table if a given site only receives from itself; including MPI3 splitting info. 2017-04-22 19:28:37 +01:00
paboyle
b722889234 Try a better load balancing loop 2017-04-22 19:27:41 +01:00
paboyle
abba44a837 Hand unrolled for overlapped comms 2017-04-22 17:45:17 +01:00
paboyle
f301be94ce Fixed 2017-04-22 17:42:31 +01:00
Peter Boyle
1d1b225497 Hand unrolled Nc=3 kernels support split phase compute (on-node, off-node). 2017-04-22 09:05:28 -04:00
Peter Boyle
53a785a3dd Fixing the KNL compile 2017-04-22 08:11:51 -04:00
paboyle
736bf3c866 Major rework of stencil. Half precision and MPI3 now working. 2017-04-22 11:33:50 +01:00
paboyle
b9bbe5d188 L1p config bg/q 2017-04-22 11:33:09 +01:00
paboyle
3844bcf800 If no f16c instructions supported must use software half precision conversion.
This will also become useful on BG/Q, so will move out from SSE4 into a general area.
Lifted the Eigen half precision from web. Looks sensible, but not extensively regressed
against the intrinsics implementation yet.
2017-04-20 15:30:52 +01:00
paboyle
e1a2319d01 Simple compressor moved out of cshift into stencil 2017-04-20 13:18:15 +01:00
paboyle
180c732b4c Move compressors out of Cshift.
Slice iterators would help
2017-04-20 13:17:55 +01:00
paboyle
957a706d0b Useful script 2017-04-20 13:17:44 +01:00
paboyle
d2312e9874 Drop compressor entirely from Cshift to only Stencil. 2017-04-20 13:16:55 +01:00
paboyle
fc4ab9ccd5 Working half precision comms 2017-04-20 11:20:26 +01:00
paboyle
4a340aa5ca Massive compressor rework to support reduced precision comms 2017-04-20 09:28:27 +01:00
paboyle
3b7de792d5 Type comparison in the traits work 2017-04-18 13:28:04 +01:00
paboyle
557c3fa109 Pretty change 2017-04-18 13:27:38 +01:00
paboyle
ec18e9f7f6 Merge branch 'develop' into feature/half-prec-comms 2017-04-18 11:39:39 +01:00
paboyle
a839d5bc55 Updated todo list 2017-04-18 11:22:17 +01:00
paboyle
de41b84c5c Merge branch 'feature/normHP' into develop 2017-04-18 10:57:21 +01:00
paboyle
8e161152e4 MultiRHS solver improvements with slice operations moved into lattice and sped up.
Block solver requires a lot of performance work.
2017-04-18 10:51:55 +01:00
paboyle
3141ebac10 MultiRHS working, starting to optimise. Block doesn't and I thought it already was; puzzled. 2017-04-17 10:50:19 +01:00
paboyle
7ede696126 Non compile of tests fixed 2017-04-16 23:40:00 +01:00
paboyle
bf516c3b81 higher precision reduction variables in norm and inner product 2017-04-15 12:27:28 +01:00
paboyle
441a52ee5d First cut at higher precision reduction 2017-04-15 10:57:21 +01:00
paboyle
a8db024c92 Cleaning up the dense matrix and lanczos sector 2017-04-15 08:54:11 +01:00
paboyle
a9c22d5f43 Verbose removal 2017-04-14 14:38:49 +01:00
paboyle
3ca41458a3 Fix to no USE_FP16 case 2017-04-14 14:20:54 +01:00
paboyle
9e2d29c644 USE_FP16 macro 2017-04-14 14:17:14 +01:00
Guido Cossu
b694996302 adding comments 2017-04-14 13:30:14 +01:00
Peter Boyle
951be75292 Half precision conversion working on AVX512 now too 2017-04-13 17:35:11 +01:00
James Harrison
c8e6f58e24 Fix typos in ScalarVP 2017-04-13 17:04:37 +01:00
Peter Boyle
b9113ed310 Patches for knl 2017-04-13 12:02:12 -04:00
James Harrison
888988ad37 Merge branch 'feature/qed-fvol' of https://github.com/paboyle/Grid into feature/qed-fvol
# Conflicts:
#	lib/qcd/action/fermion/Fermion.h
2017-04-13 15:54:40 +01:00
1407418755 Old qed-fvol program build disabled 2017-04-13 15:32:30 +01:00
a6a0da873f Merge branch 'feature/hadrons' into feature/qed-fvol 2017-04-13 15:31:06 +01:00
paboyle
42fb49d3fd Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-04-13 14:12:47 +01:00
paboyle
2a54c9aaab Merge branch 'feature/block-cg' into develop 2017-04-13 14:12:24 +01:00
paboyle
0957378679 Fixing conditional ugly way 2017-04-13 13:47:56 +01:00
paboyle
2ed6c76fc5 Getting multiline if then fi working 2017-04-13 13:43:13 +01:00
paboyle
d3b9a7fa14 F16c apparently requires AVX, even if the 128 bit are used.
Seems odd.
2017-04-13 13:19:11 +01:00
paboyle
75ea306ce9 Another try at travis 2017-04-13 13:05:32 +01:00
paboyle
4226c633c4 Default to FP16 off again 2017-04-13 12:51:39 +01:00
paboyle
5a4eafbf7e .travis 2017-04-13 12:50:43 +01:00
paboyle
eb8e26018b Travis update for macos 2017-04-13 12:35:11 +01:00
paboyle
db5ea001a3 Update to use Xcode 8.3 since -mfp16 causes SIGILL 2017-04-13 12:22:40 +01:00
paboyle
2846f079e5 Predicate tests on fp16 being enabled 2017-04-13 12:08:05 +01:00
paboyle
1d502e4ed6 FP16 optional compile time 2017-04-13 11:55:24 +01:00
paboyle
73cdf0fffe Drop f16c from SSE because of a macos compile error on travis 2017-04-13 11:23:41 +01:00
paboyle
1c25773319 Trap illegal instructions 2017-04-13 10:51:40 +01:00
paboyle
c38400b26f Trap signals 2017-04-13 10:35:20 +01:00
paboyle
9c3065b860 Debug flags off again 2017-04-13 10:01:32 +01:00
paboyle
94eb829d08 Align cast fixed for __mm128i gcc complained 2017-04-13 08:40:44 +01:00
paboyle
68392ddb5b Exchange in generic
Precision change in AVX, SSE, AVX512, Generic. QPX still to do.
2017-04-13 08:38:12 +01:00
paboyle
cb6b81ae82 Half precision conversion 2017-04-12 19:32:37 +01:00
Lanny91
c382c351a5 Quark test output correction. 2017-04-12 14:36:15 +01:00
Lanny91
af2d6ce2e0 Encapsulated 4D->5D and 5D->4D conversions in separate functions & added corresponding tests. 2017-04-12 14:36:02 +01:00
90ec6eda0c Rare K test solver name fix 2017-04-10 17:48:58 +01:00
Lanny91
ac1253bb76 Corrected solver in rare kaon test 2017-04-10 17:42:55 +01:00
fe8d625694 Merge commit '5e477ec553aa48d7d19b5a7c45d41acbb3392bcb' into feature/rare_kaon 2017-04-10 17:23:37 +01:00
53e76b41d2 Merge branch 'develop' into feature/hadrons 2017-04-10 17:00:53 +01:00
8ef4300412 spurious .dirstamp files removed 2017-04-10 17:00:22 +01:00
98a24ebf31 The macro “magics” is very intensive for the preprocessor in the measurement code which has numerous serialisable classes. Reducing the number of serialisable fields to 64 (instead of 1024) helps a lot, this is enough for now and can be extended trivially if needed in the future. 2017-04-10 16:58:54 +01:00
James Harrison
e4a105a30b Merge branch 'feature/qed-fvol' of https://github.com/paboyle/Grid into feature/qed-fvol 2017-04-10 16:35:01 +01:00
James Harrison
26ebe41fef QedFVol: Implement charged propagator calculation within ScalarVP module 2017-04-10 16:33:54 +01:00
paboyle
b12dc89d26 Commenting and clean up 2017-04-10 20:38:20 +09:00
paboyle
d80d802f9d MultiRHS solver test 2017-04-10 00:12:12 +09:00
paboyle
3d99b09dba Start of blockCG 2017-04-09 23:42:10 +09:00
paboyle
db5f6d3ae3 Verbose fix 2017-04-09 23:41:30 +09:00
paboyle
683550f116 Const args improvement 2017-04-09 23:41:04 +09:00
Lanny91
5e477ec553 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/rare_kaon 2017-04-07 11:51:09 +01:00
paboyle
55d0329624 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-04-07 11:08:14 +09:00
paboyle
86aaa35294 Christoph needs SchurDiagTwoKappa which is mobius specific. 2017-04-07 11:07:40 +09:00
Guido Cossu
363611ae21 Merge branch 'develop' into feature/clover 2017-04-05 16:26:04 +01:00
Guido Cossu
172d3dc93a Correcting names in tests 2017-04-05 16:24:04 +01:00
Guido Cossu
3b8a791e28 Merge branch 'develop' into feature/clover 2017-04-05 16:20:28 +01:00
Guido Cossu
7b03d8d087 Fixing the remaining merge conflicts 2017-04-05 16:17:46 +01:00
Guido Cossu
4b759b8f2a Merge branch 'feature/hmc_generalise' into feature/scalar_adjointFT 2017-04-05 14:50:28 +01:00
Guido Cossu
8c540333d5 Merge branch 'develop' into feature/hmc_generalise 2017-04-05 14:41:04 +01:00
Guido Cossu
6fd82228bf Working on the derivative 2017-04-05 10:51:44 +01:00
paboyle
5592f7b8c1 Creation mode better implementation 2017-04-05 02:35:34 +09:00
paboyle
35da4ece0b UID fix 2017-04-05 02:18:15 +09:00
paboyle
061b15b9e9 Merge branch 'feature/sitmo-skipahead' into develop 2017-04-05 01:24:49 +09:00
Guido Cossu
ca6efc685e Merge branch 'develop' into feature/clover 2017-04-04 10:19:02 +01:00
1e496fee74 Merge branch 'develop' into feature/qed-fvol
# Conflicts:
#	lib/qcd/action/fermion/Fermion.h
2017-04-03 19:02:57 +01:00
ff4e54ef80 Merge branch 'develop' into feature/hadrons 2017-04-03 18:56:21 +01:00
paboyle
561426f6eb Clean up 2017-04-02 23:13:48 +09:00
paboyle
83f6fab8fa Big/Small crush test, and fast SITMO rng init, faster but not ideal
MT and Ranlux init.
2017-04-02 12:10:51 +09:00
paboyle
0fade84ab2 No random device 2017-04-02 00:29:40 +09:00
paboyle
9dc7ca4c3b Sitmo fast init 2017-04-02 00:28:22 +09:00
paboyle
935d82f5b1 sanity checks 2017-04-02 00:27:28 +09:00
paboyle
9cbcdd65d7 No random device seed 2017-04-02 00:26:57 +09:00
paboyle
f18f5ed926 Drop random device 2017-04-02 00:26:26 +09:00
paboyle
d1d63a4f2d sitmo default 2017-04-02 00:26:05 +09:00
paboyle
7e5faa0f34 Multiple RNGs 2017-04-02 00:25:44 +09:00
paboyle
6af459cae4 Christoph's coefficients. 2017-03-31 17:07:43 +09:00
paboyle
1c4bc7ed38 Debugged staggered conventions 2017-03-31 14:41:48 +09:00
Lanny91
cd1bd921bd Reduced code duplication for Weak Hamiltonian contraction modules 2017-03-30 18:02:14 +01:00
Guido Cossu
b8ae787b5e Correcting a simple typo 2017-03-30 11:33:15 +01:00
Guido Cossu
fbe2c3b5f9 ]Merge branch 'develop' into feature/clover 2017-03-30 11:18:31 +01:00
Guido Cossu
1ed69816b9 First steps for the force term 2017-03-30 11:14:27 +01:00
Lanny91
fff5751b1a HADRONS: Updated rare kaon test program, including all contractions. Sink smearing still to be implemented. 2017-03-30 10:57:01 +01:00
Lanny91
2c81696fdd HADRONS: 4pt Weak + current disconnected topology (e.g. for rare neutral kaon decays) 2017-03-30 10:37:17 +01:00
Lanny91
c9dc22efa1 HADRONS: Standalone disconnected loop contraction. 2017-03-30 10:33:18 +01:00
Lanny91
0ab04a000f HADRONS: 3pt contraction with gamma insertion between two propagators. 2017-03-30 10:30:58 +01:00
paboyle
93ea5d9468 Pretty code 2017-03-30 15:00:03 +09:00
paboyle
1ec5d32369 Chulwoo's test to zmobius helped me shake out 2017-03-30 13:45:13 +09:00
paboyle
9fd23faadf Pretty layout 2017-03-30 13:44:45 +09:00
paboyle
10e4fa0dc8 Template instantiation improvements 2017-03-30 13:44:25 +09:00
paboyle
c4aca1dde4 Conjugate coefficients on adjoint 2017-03-30 13:44:05 +09:00
paboyle
b9e8ea3aaa conjugate coefficient on the dagger 2017-03-30 13:43:13 +09:00
paboyle
077aa728b9 Fix the ZMobius (I think) 2017-03-30 13:42:09 +09:00
paboyle
a8d83d886e Macro controls 2017-03-30 13:31:34 +09:00
paboyle
7fd46eeec4 Trailing whitespace removal 2017-03-30 13:31:10 +09:00
paboyle
e0c4eeb3ec Compiles again 2017-03-30 13:30:45 +09:00
paboyle
cb9a297a0a Chulwoo's Zmobius test 2017-03-30 13:30:25 +09:00
paboyle
2b115929dc Small AVX512 asm ifdef patch 2017-03-29 18:51:23 +09:00
paboyle
5c6571dab1 Merge branch 'feature/bgq-asm' into develop 2017-03-29 18:48:55 +09:00
paboyle
417ec56cca Release candidate 2017-03-29 05:45:33 -04:00
paboyle
756bc25008 Verbose header print by default 2017-03-29 04:44:17 -04:00
paboyle
35695ba57a Bug fix in MPI3 2017-03-29 04:43:55 -04:00
paboyle
81ead48850 Log any errors to a file 2017-03-29 04:39:52 -04:00
paboyle
d805867e02 Better init 2017-03-28 13:25:05 -04:00
paboyle
e55a751e23 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2017-03-28 12:20:12 -04:00
paboyle
358eb75995 Shorten loop 2017-03-28 12:20:02 -04:00
paboyle
98f9318279 Build on AVX2 and MPI passing with clang++ 2017-03-28 23:16:04 +09:00
paboyle
4b17e8eba8 Merge branch 'develop' into feature/bgq-asm
Conflicts:
	lib/qcd/action/fermion/Fermion.h
	lib/qcd/action/fermion/WilsonFermion.cc
	lib/util/Init.cc
	tests/Test_cayley_even_odd_vec.cc
2017-03-28 04:49:30 -04:00
paboyle
75112a632a IO improvements to fail on IO error 2017-03-28 02:28:04 -04:00
paboyle
18bde08d1b Merge branch 'feature/staggering' into develop 2017-03-28 15:25:55 +09:00
James Harrison
9f755e0379 Add functions momD1 and momD2 to ScalarVP 2017-03-27 16:49:18 +01:00
James Harrison
4512dbdf58 Rename module ScalarFV to ScalarVP 2017-03-27 15:02:16 +01:00
James Harrison
483fd3cfa1 Add propagator expansion terms as inputs to ScalarFV 2017-03-27 13:24:51 +01:00
Guido Cossu
3750b9ffee Deleting MPI test for OSX in Travis 2017-03-27 16:53:32 +09:00
Guido Cossu
5e549ebd8b Adding force terms 2017-03-27 16:43:15 +09:00
Guido Cossu
fff484eca5 Populating Clover fermions methods 2017-03-27 15:12:57 +09:00
Guido Cossu
5fdc05782b More in the clover fermion class 2017-03-27 10:54:16 +09:00
paboyle
d45cd7e677 Adding a simple read of NERSC test 2017-03-26 09:24:26 -04:00
paboyle
4e96679797 Added a bnl log 2017-03-25 09:25:46 -04:00
James Harrison
85516e9c7c Output all terms of scalar propagator separately 2017-03-24 17:13:55 +00:00
James Harrison
0c006fbfaa Add ScalarFV inputs to ScalarFV.hpp 2017-03-24 11:59:09 +00:00
James Harrison
54c10a42cc Add source and emField inputs to ScalarFV module 2017-03-24 11:42:32 +00:00
Guido Cossu
a04eb7df5d Starting Clover term 2017-03-24 12:43:28 +09:00
Guido Cossu
4c1ea8677e Small cosmetic changes and vscode gitignore 2017-03-23 14:09:35 +09:00
paboyle
fc93f0b2ec Save some code for static huge tlb's. It is ifdef'ed out but an interesting root only experiment.
No gain from it.
2017-03-21 22:30:29 -04:00
paboyle
8c8473998d Average over whole cluster the comm time. 2017-03-21 22:29:51 -04:00
James Harrison
ef0fe2bcc1 Added empty ScalarFV module 2017-03-21 11:39:46 +00:00
Guido Cossu
120fb59978 Adding tests for WilsonFlow classes 2017-03-21 16:11:35 +09:00
Guido Cossu
fd56b3ff38 Merge branch 'develop' into feature/hmc_generalise 2017-03-21 13:33:41 +09:00
Guido Cossu
0ec6829edc Fixing compilation errors for the WilsonFlow 2017-03-21 13:06:32 +09:00
Guido Cossu
18b7845b7b Adding WilsonFlow smearing 2017-03-21 11:52:05 +09:00
Guido Cossu
3d0fe15374 Added topological charge measurement 2017-03-17 16:14:57 +09:00
Guido Cossu
91886068fe Fixed seg fault for observable modules 2017-03-17 13:59:31 +09:00
Guido Cossu
6d1e9e5f92 Small cleanup of the observables 2017-03-17 11:42:55 +09:00
Guido Cossu
b640230b1e Moving hmc observables in a different directory 2017-03-17 11:40:17 +09:00
paboyle
e7c36771ed ZMobius prep for asm 2017-03-15 14:23:33 -04:00
Guido Cossu
038b6ee9cd Fixing JSON compilation error 2017-03-16 01:09:24 +09:00
Guido Cossu
38806343a8 Improving efficiency of the force term 2017-03-15 15:16:16 +09:00
Guido Cossu
831ca4e3bf Added Scalar action for fields in the adjoint representation 2017-03-14 14:55:18 +09:00
paboyle
8dc57a1e25 Layout change 2017-03-13 11:11:46 +00:00
paboyle
f57bd770b0 Merge branch 'bugfix/dminus' into feature/bgq-asm 2017-03-13 11:11:03 +00:00
paboyle
4ed10a3d06 Merge branch 'develop' into feature/bgq-asm 2017-03-13 11:10:10 +00:00
Peter Boyle
dfefc70b57 Merge pull request #93 from Lanny91/hotfix/qpx
Some fixes for QPX and generic SIMD types.
2017-03-13 09:31:26 +00:00
paboyle
b64e004555 MPI run fail on macos 2017-03-13 01:59:01 +00:00
paboyle
447c5e6cd7 Z mobius hermiticity correction 2017-03-13 01:30:43 +00:00
paboyle
8b99d80d8c Merge branch 'bgq-asm-shmemfixes' into feature/bgq-asm 2017-03-12 23:30:09 +00:00
Guido Cossu
b3dede4dd3 Merge branch 'develop' into feature/hmc_generalise 2017-03-10 23:57:37 +09:00
Guido Cossu
4e34132f4d Correcting modules use in test files 2017-03-10 23:54:53 +09:00
Guido Cossu
c07cb10247 Merge branch 'feature/hmc_generalise' of https://github.com/paboyle/Grid into feature/hmc_generalise 2017-03-10 22:37:25 +09:00
Guido Cossu
d7767a2a62 Few more tests 2017-03-10 22:33:48 +09:00
Guido Cossu
ec035983fd Fixing the implicit integration 2017-03-01 11:56:35 +00:00
paboyle
3901b17ade timeings from BNL 2017-02-28 17:06:45 -05:00
paboyle
af230a1fb8 Average the time across the whole machine for outliers 2017-02-28 17:05:22 -05:00
Christopher Kelly
06a132e3f9 Fixes to SHMEM comms 2017-02-28 13:31:54 -08:00
Guido Cossu
596dcd85b2 Auxiliary fields 2017-02-27 13:16:38 +00:00
paboyle
96d44d5c55 Header fix 2017-02-24 19:12:11 -05:00
Guido Cossu
7270c6a150 Integrator works now 2017-02-24 17:03:42 +00:00
Lanny91
7fe797daf8 SIMD vector length sanity checks 2017-02-23 16:49:44 +00:00
Lanny91
486a01294a Corrected QPX SIMD width 2017-02-23 16:47:56 +00:00
paboyle
586a7c90b7 Merge branch 'develop' into feature/bgq-asm 2017-02-23 00:26:59 +00:00
paboyle
e099dcdae7 Merge branch 'develop' into feature/bgq-asm 2017-02-23 00:25:29 +00:00
paboyle
4e7ab3166f Refactoring header layout 2017-02-22 18:09:33 +00:00
paboyle
aac80cbb44 Bug fix from Chris K 2017-02-22 12:19:09 -05:00
Lanny91
c80948411b Added tRotate function and MaddRealPart struct for generic SIMD, bugfix in MultRealPart and minor cosmetic changes. 2017-02-22 14:57:10 +00:00
Lanny91
95625a7bd1 Use Grid Integer type 2017-02-22 13:09:32 +00:00
Lanny91
0796696733 Emulated integer vector type for QPX and generic SIMD instruction sets. 2017-02-22 12:01:36 +00:00
Peter Boyle
f8b9ad7d50 Merge pull request #91 from sunpho84/public_modules_memebers
making public same serializable parameters in HMC Module
2017-02-22 00:53:20 +00:00
Peter Boyle
04a1959895 Merge pull request #90 from sunpho84/liming
adding --with switch to pass lime path
2017-02-22 00:52:53 +00:00
azusayamaguchi
1c30e9a961 Verified 2017-02-21 23:01:25 +00:00
Francesco Sanfilippo
93cc270016 making public same serializable parameters in HMC Module
RNGModuleParameters
GridModuleParameters
2017-02-21 23:11:56 +01:00
Francesco Sanfilippo
29b60f7e1a adding --with switch to pass lime path 2017-02-21 23:09:39 +01:00
azusayamaguchi
bf7e3f20d4 Staggaered fermion optimised version 2017-02-21 14:35:42 +00:00
Guido Cossu
902afcfbaf Adding metric and the implicit steps 2017-02-21 11:30:57 +00:00
paboyle
3ae92fa2e6 Global changes to parallel_for structure.
Move the comms flags to more sensible names
2017-02-21 05:24:27 -05:00
paboyle
3906cd2149 Stencil fix on BNL KNL system 2017-02-20 17:51:31 -05:00
paboyle
5a1fb29db7 Useful debug code info to preserve 2017-02-20 17:49:23 -05:00
paboyle
661fc4d3d1 Debug AVX512 exchange code paths 2017-02-20 17:48:36 -05:00
paboyle
41009cc142 Move excange into the stencil only; keep Cshift fully general 2017-02-20 17:48:04 -05:00
paboyle
37720c4db7 Count bytes off node only 2017-02-20 17:47:40 -05:00
paboyle
1a30455a10 1000 iters on bmark for more accurate timing 2017-02-20 17:47:01 -05:00
Guido Cossu
97a6b61551 Covariant laplacian and implicit integration 2017-02-20 11:17:27 +00:00
paboyle
cd0da81196 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2017-02-16 18:52:30 -05:00
paboyle
f246fe3304 Improvements to avx for invertible to avoid latent bug 2017-02-16 23:52:44 +00:00
paboyle
8a29c16bde Faster gather exchange 2017-02-16 23:52:22 +00:00
paboyle
d68907fc3e Debug temp 2017-02-16 18:51:35 -05:00
paboyle
5c0adf7bf2 Make clang happy with parenthesis 2017-02-16 23:51:33 +00:00
paboyle
be3a8249c6 Faster gather 2017-02-16 23:51:15 +00:00
paboyle
bd600702cf Vectorise the XYZT face gathering better.
Hard coded for simd_layout <= 2 in any given spread out direction; full generality is inconsistent
with efficiency.
2017-02-15 11:11:04 +00:00
Lanny91
f011bdb869 Fixed overwrite of pminus projection in construction of 4d propagator from 5d. 2017-02-14 14:07:17 +00:00
Guido Cossu
bafb101e4f Testing different versions of the Laplacian 2017-02-13 15:38:11 +00:00
Guido Cossu
08fdf05528 Added and tested the covariant laplacian + CG solver 2017-02-13 15:05:01 +00:00
paboyle
aca7a3ef0a Optimisation control improvements 2017-02-10 18:22:31 -05:00
Guido Cossu
9e72a6b22e Reverting to Xcode 7.3 2017-02-10 12:57:03 +00:00
Guido Cossu
1c12c5612c Xcode 8.2 for travis 2017-02-10 12:12:01 +00:00
Guido Cossu
a8193c4bcb Correcting travis compilation on gcc 2017-02-10 10:59:30 +00:00
Guido Cossu
c3d7ec65fa All tests compile. 2017-02-10 10:27:51 +00:00
Guido Cossu
8b6a6c8236 Resolving small merge conflict 2017-02-09 16:20:24 +00:00
Guido Cossu
e0571c872b Merge branch 'develop' into feature/hmc_generalise 2017-02-09 16:12:00 +00:00
Guido Cossu
c67f41887b Reverting parameters to original 2017-02-09 15:59:56 +00:00
Guido Cossu
84687ccf1f Handling an Intel compiler warning for Json class 2017-02-09 15:33:33 +00:00
Guido Cossu
3274561cf8 Cleanup 2017-02-09 15:18:38 +00:00
e08fbb3771 Merge pull request #84 from Lanny91/feature/rare_kaon
Rare Kaon decay contraction code
2017-02-08 08:23:34 -08:00
Lanny91
d7464aa0fe Switched from XmlWriter to CorrWriter in contraction code 2017-02-08 16:13:44 +00:00
Lanny91
00d29153f0 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/rare_kaon 2017-02-08 16:11:15 +00:00
2ce989f220 Hadrons: default I/O to HDF5 2017-02-08 07:50:05 -08:00
Lanny91
d7a1dc85be Revert "Hadrons: test for rare kaon contraction code."
This reverts commit 1e257a1251.
2017-02-08 13:23:05 +00:00
Lanny91
fc19503673 Removed MSink namespace. 2017-02-08 13:17:39 +00:00
Lanny91
beba824136 Make use of GammaL class in Weak Hamiltonian contractions 2017-02-08 12:45:39 +00:00
Lanny91
6ebf8b12b6 Removed unnecessary repeat of write in Weak Hamiltonian contractions 2017-02-08 12:43:13 +00:00
Lanny91
e5a7ed4362 Moved write outside of loop, some physics corrections 2017-02-08 12:29:33 +00:00
Lanny91
b9f7ea47c3 Access hasModule function directly from Environment instance. 2017-02-08 10:10:06 +00:00
Lanny91
06f7ee202e Revert "Add function to say whether or not a module exists in application class"
This reverts commit 522f6bf91a.
2017-02-08 10:08:18 +00:00
Lanny91
2b2fc6453f Fixed single precision compatibility issues 2017-02-07 13:59:29 +00:00
Lanny91
bdd2765461 Added missing allocation of Weak Hamiltonian result vector 2017-02-07 13:06:42 +00:00
paboyle
2c246551d0 Overlap comms and compute options in wilson kernels 2017-02-07 01:37:10 -05:00
paboyle
71ac2e7940 Faster RNG init 2017-02-07 01:33:23 -05:00
paboyle
2bf4688e83 Running on BNL KNL 2017-02-07 01:32:10 -05:00
paboyle
a48ee6f0f2 Don't use MPI3_leader any more. No real gain and complex 2017-02-07 01:31:24 -05:00
paboyle
73547cca66 MPI3 working i think 2017-02-07 01:30:02 -05:00
paboyle
123c673db7 Policy to control async or sync SendRecv 2017-02-07 01:24:54 -05:00
paboyle
61f82216e2 Communicator Policy, NodeCount distinct from Rank count 2017-02-07 01:22:53 -05:00
paboyle
8e7ca92278 Debugged cshift case 2017-02-07 01:21:32 -05:00
paboyle
485ad6fde0 Stencil working in SHM MPI3 2017-02-07 01:20:39 -05:00
paboyle
6ea2184e18 OMP define change 2017-02-07 01:17:16 -05:00
paboyle
fdc170b8a3 Parallel fors in lattice transfer 2017-02-07 01:16:39 -05:00
paboyle
060da786e9 Comms benchmark improvements 2017-02-07 01:07:39 -05:00
paboyle
85c7bc4321 Bug fixes for cases that physics code couldn't hit but latent
and discovered on KNL (long vector, y SIMD dir) and checker dir set to y.
Remove the assertions on these code paths now they are tested.
2017-02-07 01:01:15 -05:00
paboyle
0883d6a7ce Overlap comms compute support; make reg naming consistent with bgq aasm 2017-02-07 00:59:32 -05:00
paboyle
9ff97b4711 Improved stencil tests passing all on KNL multinode 2017-02-07 00:58:34 -05:00
paboyle
b5e9c900a4 Better printing and signal handling options 2017-02-07 00:57:55 -05:00
paboyle
4bbdfb434c Overlap comms compute modifications 2017-02-07 00:57:01 -05:00
Lanny91
4a45c06dd7 Code cleaning and addition of Weak Hamiltonian contraction log message 2017-02-06 20:12:30 +00:00
Lanny91
d6a7d7d1e0 Hadrons: added missing momentum parameter in rare kaon contraction test 2017-02-06 18:15:49 +00:00
Lanny91
1a122a0dd8 Hadrons: corrected gamma matrix inputs in rare kaon test 2017-02-06 17:35:41 +00:00
Lanny91
20e20733e8 Merge branch 'feature/hadrons' into feature/rare_kaon 2017-02-06 14:12:21 +00:00
Lanny91
b7cd1a19e3 Utilities for reading and writing "pair" objects. 2017-02-06 14:08:59 +00:00
Lanny91
f510002a62 Merge remote-tracking branch 'paboyle/feature/hadrons' into feature/hadrons 2017-02-03 14:37:34 +00:00
eedcaf6470 Merge branch 'feature/hadrons' into feature/qed-fvol 2017-02-01 15:53:10 -08:00
Lanny91
1e257a1251 Hadrons: test for rare kaon contraction code. 2017-02-01 16:36:40 +00:00
Lanny91
522f6bf91a Add function to say whether or not a module exists in application class 2017-02-01 16:36:08 +00:00
Lanny91
d35d87d2c2 Weak Hamiltonian Eye-type contraction execution 2017-02-01 16:33:24 +00:00
Lanny91
74a5cda84b Removed unnecessary "3pt" labels 2017-02-01 15:03:49 +00:00
Lanny91
5be05d85b8 Fixed collision of Wall source and sink header ifdefs 2017-02-01 13:56:22 +00:00
Lanny91
35ac85aea8 Updated Weak Hamiltonian contractions to use zero-flop gamma matrices 2017-02-01 12:57:34 +00:00
Lanny91
fa237401ff Consistent variable name in macro 2017-02-01 12:56:55 +00:00
Lanny91
97053adcb5 Merge branch 'feature/hadrons' into feature/rare_kaon 2017-02-01 10:13:29 +00:00
Lanny91
f8fbe4d7a3 Merge remote-tracking branch 'paboyle/feature/hadrons' into feature/hadrons
# Conflicts:
#	extras/Hadrons/Modules/MContraction/Meson.hpp
#	tests/hadrons/Test_hadrons_meson_3pt.cc

Updated Meson.hpp to utilise zero-flop gamma matrices.
2017-02-01 09:27:00 +00:00
Lanny91
ef31c012bf Merge remote-tracking branch 'paboyle/develop' into feature/hadrons 2017-01-31 17:36:10 +00:00
Lanny91
9e9f621d5d Hadrons: added Weak Hamiltonian module dependencies, some reformatting. 2017-01-30 17:54:21 +00:00
Lanny91
651e1a7cbc Hadrons: Momentum inserted as multiples of 2*pi/L 2017-01-30 17:14:33 +00:00
Lanny91
c4d3672720 Hadrons: Momentum projection in meson module. 2017-01-30 17:09:04 +00:00
Guido Cossu
16be6d378c Now action factory support different Fields (templated) 2017-01-30 14:22:41 +00:00
Guido Cossu
f05d0565aa Adding ScalarField theory 2017-01-30 10:59:28 +00:00
b39f0d1fb6 Hadrons: default I/O to HDF5 if possible, XML otherwise 2017-01-27 18:12:35 -08:00
9f1267dfe6 Merge branch 'feature/qed-fvol' of github.com:paboyle/Grid into feature/qed-fvol 2017-01-27 17:06:34 -08:00
2e90285232 Merge pull request #80 from jch1g10/feature/qed-fvol
ChargedProp: remove ScalarField fs
2017-01-27 17:06:13 -08:00
e254de982e Merge branch 'feature/qed-fvol' of github.com:paboyle/Grid into feature/qed-fvol 2017-01-27 17:02:35 -08:00
28d99b5297 Merge branch 'develop' into feature/qed-fvol 2017-01-27 16:59:53 -08:00
Lanny91
9bf4108d1f Weak Hamiltonian contraction modules, for Eye and NonEye contraction topologies. Execution for NonEye type diagrams has been implemented, but not yet for Eye type. 2017-01-27 16:58:11 +00:00
James Harrison
ee93f0218b ChargedProp: remove ScalarField fs 2017-01-27 12:22:48 +00:00
Guido Cossu
6929a84c70 Reformatting files 2017-01-27 11:54:44 +00:00
Guido Cossu
5c779a789b Moving registrations in an independent file 2017-01-27 11:23:51 +00:00
161ed102a5 Merge pull request #79 from jch1g10/feature/qed-fvol
Fixed bug in ChargedProp
2017-01-26 19:49:14 -08:00
Guido Cossu
e863a948e3 Cleaning up files and directories 2017-01-26 15:24:49 +00:00
James Harrison
f65a585236 ChargedProp: Switch to HDF5 output 2017-01-26 15:02:30 +00:00
Lanny91
977f34dca6 Added missing typename 2017-01-26 13:18:33 +00:00
Lanny91
90ad956340 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/rare_kaon 2017-01-26 12:08:41 +00:00
Guido Cossu
7996f06335 Commented out registrations.
Move to an independent file that is linked only for the factory managed HMC
2017-01-25 18:27:45 +00:00
Guido Cossu
7b40a3e3e5 Reorganizing files 2017-01-25 18:09:46 +00:00
Guido Cossu
f7fbbaaca3 Compiles after merging 2017-01-25 12:11:58 +00:00
Guido Cossu
17629b8d9e Merge branch 'develop' into feature/hmc_generalise 2017-01-25 11:33:53 +00:00
Guido Cossu
0baa20d292 Againg fixing compilation on Travis, no LIME lib present 2017-01-25 11:18:44 +00:00
Guido Cossu
4571c918a4 Fixing compilation error when compiling without LIME 2017-01-25 11:14:43 +00:00
Guido Cossu
5251ea4d30 Adding more fermion action modules, generalised DWF 2017-01-25 11:10:44 +00:00
Guido Cossu
7f456b4173 👷 Added all pseudofermion actions to the serialiser 2017-01-24 13:57:32 +00:00
James Harrison
ae99e99da2 Fixed bug in ChargedProp 2017-01-23 17:27:50 +00:00
Lanny91
c291ef77b5 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2017-01-23 15:24:47 +00:00
Lanny91
7dd2764bb2 Wall sink smearing 2017-01-23 15:17:54 +00:00
Guido Cossu
244f8fb6dc Added JSON parser (without NextElement) 2017-01-23 14:57:38 +00:00
azusayamaguchi
05c1924819 Timing loop change 2017-01-23 10:43:45 +00:00
f3ca29af6c Merge branch 'feature/hadrons' into feature/qed-fvol 2017-01-21 13:41:05 -08:00
37988221a8 Merge branch 'feature/serialisation-hdf5' into feature/qed-fvol 2017-01-20 14:04:20 -08:00
Guido Cossu
27dfe816fa Added TwoFlavorsEO
Had to remove a conformability check in the Derivative of SchurDiff,
see the comments in the file
2017-01-20 16:59:31 +00:00
Lanny91
af29be2c90 Simplified operation of meson module. Result has been modified to output one contraction at a time for each pair of gamma insertions at source and sink. 2017-01-20 16:38:50 +00:00
Guido Cossu
f96fac0aee All functionalities ready.
Todo: add all the fermion action modules
2017-01-20 12:56:20 +00:00
7a327a3f28 Merge branch 'develop' into feature/qed-fvol 2017-01-19 14:22:36 -08:00
Lanny91
07f2ebea1b Meson module now takes list of gamma matrices to insert at source and sink. 2017-01-19 22:18:42 +00:00
Guido Cossu
851f2ad8ef Adding fermions actions support in the factories 2017-01-19 10:00:02 +00:00
Guido Cossu
23e0561dd6 Added all required functionalities, time for cleaning
All actions to be added
2017-01-18 16:31:51 +00:00
Lanny91
8ae1a95ec6 Legal banners and module descriptions 2017-01-17 18:14:20 +00:00
Lanny91
82b7d4eaf0 Added noise loop dependencies 2017-01-17 15:58:32 +00:00
Lanny91
78774fbdc0 Construct loop propagator 2017-01-17 15:29:45 +00:00
Guido Cossu
924130833e Moved more parameters to serialization 2017-01-17 13:22:18 +00:00
Guido Cossu
0157274762 HMC factories 2017-01-17 10:46:49 +00:00
Guido Cossu
87e8aad5a0 Added support for input file HMC modules (missing the actions yet) 2017-01-16 16:07:12 +00:00
Guido Cossu
c6f59c2933 Adding factories 2017-01-16 10:18:09 +00:00
Lanny91
b7f90aa011 Added momentum choice for wall source 2017-01-13 15:54:19 +00:00
92f8950a56 Charged scalar prop: cleaning and output 2017-01-13 13:30:56 +00:00
65987a8a58 First implementation of the scalar QED propagator, runs but absolutely not checked 2017-01-12 20:44:23 +00:00
889d828bc2 Code cleaning 2017-01-12 18:17:44 +00:00
Lanny91
f22b79da8f Added missing type aliases 2017-01-12 12:52:12 +00:00
Lanny91
3855673ebf Added header for wall source 2017-01-12 11:42:37 +00:00
Lanny91
4db82da0db Wall sources 2017-01-12 11:41:10 +00:00
Lanny91
0cdc3d2fa5 Merge remote-tracking branch 'refs/remotes/paboyle/feature/hadrons' into feature/hadrons 2017-01-12 11:26:55 +00:00
ad98b6193d creating the necessary caches for the FFT EM scalar propagator 2017-01-11 18:40:43 +00:00
fc760016b3 More uniform cache name for scalar momentum propagators 2017-01-11 18:39:58 +00:00
2da86f7dae Merge branch 'feature/hadrons' into feature/qed-fvol 2017-01-11 18:38:05 +00:00
Guido Cossu
0dfda4bb90 Working on the RNGModule 2017-01-09 11:06:18 +00:00
Guido Cossu
1189ebc8b5 Cleaning up the checkpointers interface 2017-01-05 15:52:52 +00:00
97843e2b58 Hadrons: free scalar buffer fix and output 2017-01-05 14:58:55 +00:00
82b3f54697 scalar free propagator fix 2017-01-05 14:58:07 +00:00
Guido Cossu
1bb8578173 Added module for checkpointers 2017-01-05 13:09:32 +00:00
Peter Boyle
c3b6d573b9 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2016-12-30 22:42:17 +00:00
673994b281 Hadrons: modules for scalar propagators 2016-12-29 22:44:58 +01:00
bbc0eff078 Hadrons: scalar sources 2016-12-29 22:44:22 +01:00
4c60e31070 Hadrons: code cleaning 2016-12-29 22:44:08 +01:00
afbf7d4c37 QED Gimpl moved in Photon.h 2016-12-29 22:43:38 +01:00
8c3cc32364 Scalar action 2016-12-29 22:42:58 +01:00
Peter Boyle
1e179c903d Worried about integer; suspect where statements are broken 2016-12-27 17:46:38 +00:00
Peter Boyle
669cfca9b7 No inline 2016-12-27 17:45:40 +00:00
Peter Boyle
ff2f559a57 Remove inline on gather optimised path 2016-12-27 17:45:19 +00:00
Peter Boyle
03c81bd902 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2016-12-27 11:25:35 +00:00
Peter Boyle
a869addef1 Stats switch off 2016-12-27 11:25:22 +00:00
Peter Boyle
1caa3fbc2d LOCK UNLOCK only 2016-12-27 11:24:45 +00:00
Peter Boyle
3d21297bbb Call the fast path compressor for wilson kernels to avoid if else on projector 2016-12-27 11:23:13 +00:00
Peter Boyle
25efefc5b4 Back to original thread policy post test 2016-12-23 09:49:04 +00:00
Peter Boyle
eabf316ed9 BGQ performance ASM 2016-12-22 21:56:08 +00:00
Peter Boyle
04ae7929a3 BGQ or KNL assembler now 2016-12-22 17:53:22 +00:00
Peter Boyle
caba0d42a5 L1p controls 2016-12-22 17:52:55 +00:00
Peter Boyle
9ae81c06d2 L1p controls for BG/Q 2016-12-22 17:52:21 +00:00
Peter Boyle
0903c48caa Hot start SU3 2016-12-22 17:51:45 +00:00
Peter Boyle
7dc36628a1 QPX finishing 2016-12-22 17:50:48 +00:00
Peter Boyle
b8cdb3e90a Debug hack; raises from 62GF/s to 72 GF/s per node on BG/Q 2016-12-22 17:50:14 +00:00
Peter Boyle
5241245534 Default to static scheduling 2016-12-22 17:49:21 +00:00
Dr Peter Boyle
960316e207 type conversion in printf 2016-12-22 17:27:01 +00:00
Guido Cossu
5214846341 Adding a resource manager 2016-12-22 12:41:56 +00:00
4c3fd9fa3f stochastic QED field module in Hadrons 2016-12-22 00:29:41 +01:00
17b3a10d46 stochastic QED: function to cache 1/sqrt(khat^2) 2016-12-22 00:29:19 +01:00
149a46b92c Merge branch 'feature/hadrons' into feature/qed-fvol 2016-12-22 00:26:43 +01:00
Guido Cossu
ce1a115e0b Removing redundant arguments for integrator functions, step 1 2016-12-20 17:51:30 +00:00
db9c28a773 qed-fvol: Photon parameter name fix 2016-12-20 12:41:39 +01:00
9ac3ac41df serialisable Photon parameters 2016-12-20 12:41:01 +01:00
2af9ab9034 old Makefile cleaning 2016-12-20 12:40:26 +01:00
6f1ea96293 Merge branch 'develop' into feature/qed-fvol 2016-12-20 12:33:02 +01:00
paboyle
3f2d53a994 BGQ assembler beginning 2016-12-20 10:21:26 +00:00
azusayamaguchi
df9108154d Debugged 2 versions of assembler; ls vectorised, xyzt vectorised 2016-12-17 23:47:51 +00:00
azusayamaguchi
b3e7f600da Partial implementation of 4d vectorisation assembler 2016-12-16 23:50:30 +00:00
azusayamaguchi
d4071daf2a Template specialise 2016-12-16 22:28:29 +00:00
azusayamaguchi
a2a6329094 AVX512 only for ASM compilation 2016-12-16 22:03:29 +00:00
azusayamaguchi
eabc577940 Assembler possibly working 2016-12-16 16:55:36 +00:00
2e3c5890b6 qed-fvol: build fix 2016-12-15 20:06:46 +00:00
bc6678732f Merge branch 'feature/hadrons' into feature/qed-fvol
# Conflicts:
#	Makefile.am
#	configure.ac
#	lib/qcd/action/gauge/Photon.h
2016-12-15 19:53:00 +00:00
b10ae00c8a Merge commit '6ad73145bc9754a5f26093eee5a34473ba0cff82' into feature/qed-fvol 2016-12-15 19:48:58 +00:00
Azusa Yamaguchi
0cd6b1858c Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-12-14 09:23:22 +00:00
Guido Cossu
0bd296dda4 Adding check of the Dag part in the benchmark 2016-12-14 03:15:09 +00:00
Guido Cossu
af0ccdd8e9 Moving output order 2016-12-14 02:02:42 +00:00
Guido Cossu
2fb92dbc6e Cleaning up previous debug lines 2016-12-13 07:53:43 +00:00
Guido Cossu
5c74b6028b Commit for debugging, lot of IO 2016-12-13 06:35:30 +00:00
Guido Cossu
e0be2b6e6c Adding a new tests for the Ls vec CG 2016-12-13 04:59:18 +00:00
Guido Cossu
ef72f322d2 consistency of tests 2016-12-13 02:24:20 +00:00
Azusa Yamaguchi
426197e446 Nc=3 2016-12-12 09:10:54 +00:00
Azusa Yamaguchi
99e2c1e666 Kernels options 2016-12-12 09:08:53 +00:00
Azusa Yamaguchi
1440565a10 Decrease verbosity 2016-12-12 09:08:04 +00:00
Azusa Yamaguchi
e9f0c0ea39 Staggered kernels options 2016-12-12 09:07:38 +00:00
Guido Cossu
7bc2065113 Adding report at the end of the DWF HMC tests 2016-12-12 04:21:34 +00:00
Guido Cossu
2bd4233919 Completed testing of the HMC for Ls vectorised version (on AVX2) 2016-12-07 04:56:37 +00:00
Guido Cossu
143c70e29f Debugged the threaded version. Cleaning up 2016-12-07 04:40:25 +00:00
Guido Cossu
b812d5e39c Added single threaded version of the derivative for the Ls vectorised DWF 2016-12-06 16:31:13 +00:00
Guido Cossu
01480da0a8 Merge branch 'develop' into feature/hmc_generalise 2016-12-05 05:10:27 +00:00
James Harrison
6ad73145bc Calculate Wilson loop average over multiple configurations. 2016-11-30 15:17:22 +00:00
Azusa Yamaguchi
c097fd041a Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-11-29 13:44:17 +00:00
Azusa Yamaguchi
77fb25fb29 Push 5d tests 2016-11-29 13:43:56 +00:00
Azusa Yamaguchi
389e0a77bd Staggerd Fermion 5D 2016-11-29 13:13:56 +00:00
f7293f2ddb Merge pull request #69 from jch1g10/feature/qed-fvol 2016-11-26 07:04:04 +09:00
Azusa Yamaguchi
95f43d27ae Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-11-22 13:49:22 +00:00
Azusa Yamaguchi
668ca57702 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-11-22 13:49:11 +00:00
Guido Cossu
62749d05a6 Naming the scalar action 2016-11-17 12:26:20 +00:00
Guido Cossu
3834feb4b7 Adding action names 2016-11-16 16:46:49 +00:00
James Harrison
6b8ee7bae0 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-11-15 13:08:08 +00:00
James Harrison
739c2308b5 Set imaginary part of stochastic QED field to zero using real() instead of conjugate(). 2016-11-15 13:07:52 +00:00
Guido Cossu
454302414d Small modif at the test hmc 2016-11-15 12:31:13 +00:00
James Harrison
a71b69389b QedFVol: calculate square Wilson loops up to 10x10 2016-11-14 18:23:04 +00:00
James Harrison
d49e502f53 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-11-14 18:00:33 +00:00
James Harrison
92ec3404f8 Set imaginary part of stochastic QED field to zero after FFT into position space 2016-11-14 17:59:02 +00:00
James Harrison
f4ebea3381 QedFVol: add functions for computing spatial and timelike Wilson loops 2016-11-14 17:51:53 +00:00
James Harrison
cf167d0cd1 QedFVol: implement exponentiation of photon field 2016-11-14 17:02:29 +00:00
Guido Cossu
6f8b771a37 Adding date of the last commit 2016-11-10 18:52:00 +00:00
Guido Cossu
4e1ffdd17c Adding git info to the configure output 2016-11-10 18:44:36 +00:00
Guido Cossu
a783282b8b Merge branch 'develop' into feature/hmc_generalise 2016-11-10 18:13:07 +00:00
Guido Cossu
19b85d8486 Some comments in the hmc files 2016-11-10 17:55:58 +00:00
paboyle
c363bdd784 Merge branch 'release/v0.6.0' 2016-11-09 12:43:14 +00:00
James Harrison
c30d96ea50 QedFVol: x86intrin.h namespace fix 2016-11-09 11:06:20 +00:00
James Harrison
7ffe17ada1 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-11-08 14:52:43 +00:00
Azusa Yamaguchi
ee686a7d85 Compiles now 2016-11-03 16:58:23 +00:00
Azusa Yamaguchi
1c5b7a6be5 Staggered phases first cut, c1, c2, u0 2016-11-03 16:26:56 +00:00
330a9b3f4c Merge pull request #65 from jch1g10/feature/qed-fvol 2016-11-01 19:53:25 +00:00
James Harrison
28ff66a381 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-11-01 16:07:46 +00:00
James Harrison
78c7bcee36 QedFVol: Change variables of type "double" to type "Real". 2016-11-01 16:06:05 +00:00
Azusa Yamaguchi
164d3691db Staggered 2016-11-01 14:24:22 +00:00
00a7b95631 Merge remote-tracking branch 'gh-james/feature/qed-fvol' into feature/qed-fvol 2016-10-31 18:46:23 +00:00
94d8321d01 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-10-31 18:41:30 +00:00
James Harrison
ac24cc9f99 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-10-29 11:05:26 +01:00
Guido Cossu
1d666771f9 Debugging the RNG, eliminate the barrier after broadcast 2016-10-26 16:08:23 +01:00
Guido Cossu
d50055cd96 Making the ILDG support optional 2016-10-26 09:48:01 +01:00
James Harrison
3ab4c8c0bb QedFVol: calculate plaquette and 2x2 Wilson loop of stochastic QED field 2016-10-25 13:32:02 +01:00
Guido Cossu
47c7159177 ILDG reader/writer works
Fill the xml header with the required information, todo.
2016-10-24 21:57:54 +01:00
Guido Cossu
f415db583a Adding ILDG format 2016-10-24 15:48:22 +01:00
Guido Cossu
f55c16f984 Adding a barrier in the RNG save 2016-10-24 11:02:14 +01:00
Guido Cossu
df67e013ca More debug output for the RNG 2016-10-22 13:34:17 +01:00
Guido Cossu
3e990c9d0a Reverting the broadcast change 2016-10-22 13:26:43 +01:00
Guido Cossu
4b740fc8fd Debugging the RNG state save 2016-10-22 13:06:00 +01:00
Guido Cossu
cccd14b09e Small cleanup 2016-10-21 17:20:54 +01:00
Guido Cossu
e6acffdfc2 Fixing the plaquette computation 2016-10-21 16:06:34 +01:00
26d124283e Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-10-21 15:23:31 +01:00
0d889b7041 QedFVol: first attempt at generating a QED field 2016-10-21 15:21:32 +01:00
ab31ad006a Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-10-21 14:42:18 +01:00
Guido Cossu
392130a537 Working on the 5d 2016-10-21 14:22:25 +01:00
Guido Cossu
deef2673b2 Separating the Lattice theories stub from the QCD.h file 2016-10-20 17:24:08 +01:00
Guido Cossu
977b0a6dd9 Merge branch 'develop' into feature/hmc_generalise 2016-10-20 17:04:41 +01:00
Guido Cossu
977d844394 Few modifications on stdout messages 2016-10-20 17:01:59 +01:00
6e4a06e180 qed-fvol: initial commit 2016-10-20 15:04:00 +01:00
Guido Cossu
590675e2ca Csum in hex format 2016-10-19 17:26:25 +01:00
Guido Cossu
8c65bdf6d3 Printing checksum for the RNG file 2016-10-19 16:56:11 +01:00
Guido Cossu
74f1ed3bc5 Adding some documentation for HMC 2016-10-19 10:51:13 +01:00
Guido Cossu
79270ef510 Added a test for EODWF Scaled Shamir with general HMC 2016-10-14 17:34:26 +01:00
Guido Cossu
e250e6b7bb Moving parameters outside of the HMCrunner 2016-10-14 17:22:32 +01:00
Guido Cossu
261342c15f Adding gh-pages 2016-10-13 11:51:25 +01:00
Guido Cossu
eda4dd622e Some more edit 2016-10-11 15:45:20 +01:00
Guido Cossu
c68a2b9637 Minor fix 2016-10-10 11:54:58 +01:00
Guido Cossu
293df6cd20 Generalising the HMCRunner and moving parameters to the user level 2016-10-10 11:49:55 +01:00
Guido Cossu
65f61bb3bf Reset QCD colours to 3 2016-10-10 09:46:17 +01:00
Guido Cossu
26b9740d53 Some fix for the GenericHMCrunner 2016-10-10 09:43:05 +01:00
Guido Cossu
6eb873dd96 Added scalar action phi^4
Check Norm2 output (Complex type assumption)
2016-10-07 17:28:46 +01:00
Guido Cossu
11b4c80b27 Added support for hmc and binary IO for a general field 2016-10-07 13:37:29 +01:00
Guido Cossu
c065e454c3 Adding Binrary IO, untested 2016-10-06 10:12:11 +01:00
Guido Cossu
d9b5fbd374 In the middle of adding a general binary writer 2016-10-04 11:24:08 +01:00
Guido Cossu
cfbc1a26b8 Now the gauge implementation has to take care of the Nexp 2016-10-03 16:20:06 +01:00
Guido Cossu
257f69f931 One more function to generalise the HMC integrator 2016-10-03 15:50:04 +01:00
Guido Cossu
e415260961 First cut on generalised HMC
Backward compatibility OK
2016-10-03 15:28:00 +01:00
paboyle
446c768cd3 Merge branch 'hotfix/v0.5.1'
Double precision compile fix
2016-07-01 16:33:59 +01:00
835 changed files with 135243 additions and 24533 deletions

23
.gitignore vendored
View File

@@ -83,6 +83,7 @@ ltmain.sh
.Trashes
ehthumbs.db
Thumbs.db
.dirstamp
# build directory #
###################
@@ -92,28 +93,24 @@ build*/*
#####################
*.xcodeproj/*
build.sh
.vscode
*.code-workspace
# Eigen source #
################
lib/Eigen/*
# FFTW source #
################
lib/fftw/*
Grid/Eigen
Eigen/*
# libtool macros #
##################
m4/lt*
m4/libtool.m4
# Buck files #
##############
.buck*
buck-out
BUCK
make-bin-BUCK.sh
# github pages #
################
gh-pages/
# generated sources #
#####################
lib/qcd/spin/gamma-gen/*.h
lib/qcd/spin/gamma-gen/*.cc
Grid/qcd/spin/gamma-gen/*.h
Grid/qcd/spin/gamma-gen/*.cc

View File

@@ -7,64 +7,13 @@ cache:
matrix:
include:
- os: osx
osx_image: xcode7.2
osx_image: xcode8.3
compiler: clang
- compiler: gcc
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-4.9
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: VERSION=-4.9
- compiler: gcc
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-5
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: VERSION=-5
- compiler: clang
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-4.8
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
- compiler: clang
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-4.8
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
env: PREC=single
- os: osx
osx_image: xcode8.3
compiler: clang
env: PREC=double
before_install:
- export GRIDDIR=`pwd`
@@ -72,35 +21,41 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
install:
- export CWD=`pwd`
- echo $CWD
- export CC=$CC$VERSION
- export CXX=$CXX$VERSION
- echo $PATH
- which autoconf
- autoconf --version
- which automake
- automake --version
- which $CC
- $CC --version
- which $CXX
- $CXX --version
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
script:
- ./bootstrap.sh
- mkdir build
- cd build
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
- mkdir lime
- cd lime
- mkdir build
- cd build
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
- tar xf lime-1.3.2.tar.gz
- cd lime-1.3.2
- ./configure --prefix=$CWD/build/lime/install
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1
- echo make clean
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- make install
- cd $CWD/build
- ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1
- echo make clean
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
- make -j4
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- make check

37
Grid/DisableWarnings.h Normal file
View File

@@ -0,0 +1,37 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/DisableWarnings.h
Copyright (C) 2016
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef DISABLE_WARNINGS_H
#define DISABLE_WARNINGS_H
//disables and intel compiler specific warning (in json.hpp)
#pragma warning disable 488
#endif

View File

@@ -2,12 +2,13 @@
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/stencil/Stencil_common.cc
Source file: ./lib/Grid.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -26,9 +27,23 @@ Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
//
// Grid.h
// simd
//
// Created by Peter Boyle on 09/05/2014.
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
//
namespace Grid {
}
#ifndef GRID_H
#define GRID_H
#include <Grid/GridCore.h>
#include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/Action.h>
#include <Grid/qcd/utils/GaugeFix.h>
#include <Grid/qcd/smearing/Smearing.h>
#include <Grid/parallelIO/MetaData.h>
#include <Grid/qcd/hmc/HMC_aggregate.h>
#endif

View File

@@ -35,55 +35,27 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
//
#ifndef GRID_H
#define GRID_H
#ifndef GRID_BASE_H
#define GRID_BASE_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
#include <Grid/GridStd.h>
///////////////////
// Grid headers
///////////////////
#include "Config.h"
#include <Grid/Timer.h>
#include <Grid/PerfCount.h>
#include <Grid/Log.h>
#include <Grid/AlignedAllocator.h>
#include <Grid/Simd.h>
#include <Grid/perfmon/Timer.h>
#include <Grid/perfmon/PerfCount.h>
#include <Grid/log/Log.h>
#include <Grid/allocator/AlignedAllocator.h>
#include <Grid/simd/Simd.h>
#include <Grid/serialisation/Serialisation.h>
#include <Grid/Threads.h>
#include <Grid/Lexicographic.h>
#include <Grid/Init.h>
#include <Grid/Communicator.h>
#include <Grid/Cartesian.h>
#include <Grid/Tensors.h>
#include <Grid/Lattice.h>
#include <Grid/Cshift.h>
#include <Grid/Stencil.h>
#include <Grid/Algorithms.h>
#include <Grid/threads/Threads.h>
#include <Grid/util/Util.h>
#include <Grid/util/Sha.h>
#include <Grid/communicator/Communicator.h>
#include <Grid/cartesian/Cartesian.h>
#include <Grid/tensors/Tensors.h>
#include <Grid/lattice/Lattice.h>
#include <Grid/cshift/Cshift.h>
#include <Grid/stencil/Stencil.h>
#include <Grid/parallelIO/BinaryIO.h>
#include <Grid/FFT.h>
#include <Grid/qcd/QCD.h>
#include <Grid/parallelIO/NerscIO.h>
#include <Grid/qcd/hmc/NerscCheckpointer.h>
#include <Grid/qcd/hmc/HmcRunner.h>
#include <Grid/algorithms/Algorithms.h>
#endif

View File

@@ -2,12 +2,12 @@
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/hmc/HMC.cc
Source file: ./lib/Grid.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
@@ -27,10 +27,16 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#ifndef GRID_QCD_CORE_H
#define GRID_QCD_CORE_H
namespace Grid{
namespace QCD{
/////////////////////////
// Core Grid QCD headers
/////////////////////////
#include <Grid/GridCore.h>
#include <Grid/qcd/QCD.h>
#include <Grid/qcd/spin/Spin.h>
#include <Grid/qcd/utils/Utils.h>
#include <Grid/qcd/representations/Representations.h>
}
}
#endif

29
Grid/GridStd.h Normal file
View File

@@ -0,0 +1,29 @@
#ifndef GRID_STD_H
#define GRID_STD_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <string>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
#include <zlib.h>
///////////////////
// Grid config
///////////////////
#include "Config.h"
#endif /* GRID_STD_H */

14
Grid/Grid_Eigen_Dense.h Normal file
View File

@@ -0,0 +1,14 @@
#pragma once
// Force Eigen to use MKL if Grid has been configured with --enable-mkl
#ifdef USE_MKL
#define EIGEN_USE_MKL_ALL
#endif
#if defined __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
#include <Grid/Eigen/Dense>
#if defined __GNUC__
#pragma GCC diagnostic pop
#endif

63
Grid/Makefile.am Normal file
View File

@@ -0,0 +1,63 @@
extra_sources=
extra_headers=
if BUILD_COMMS_MPI3
extra_sources+=communicator/Communicator_mpi3.cc
extra_sources+=communicator/Communicator_base.cc
extra_sources+=communicator/SharedMemoryMPI.cc
extra_sources+=communicator/SharedMemory.cc
endif
if BUILD_COMMS_NONE
extra_sources+=communicator/Communicator_none.cc
extra_sources+=communicator/Communicator_base.cc
extra_sources+=communicator/SharedMemoryNone.cc
extra_sources+=communicator/SharedMemory.cc
endif
if BUILD_HDF5
extra_sources+=serialisation/Hdf5IO.cc
extra_headers+=serialisation/Hdf5IO.h
extra_headers+=serialisation/Hdf5Type.h
endif
all: version-cache
version-cache:
@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
a="uncommited changes";\
else\
a="clean";\
fi;\
echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
if [ -e version-cache ]; then\
d=`diff vertmp version-cache`;\
if [ "$${d}" != "" ]; then\
mv vertmp version-cache;\
rm -f Version.h;\
fi;\
else\
mv vertmp version-cache;\
rm -f Version.h;\
fi;\
rm -f vertmp
Version.h:
cp version-cache Version.h
.PHONY: version-cache
#
# Libraries
#
include Make.inc
include Eigen.inc
lib_LIBRARIES = libGrid.a
CCFILES += $(extra_sources)
HFILES += $(extra_headers) Config.h Version.h
libGrid_a_SOURCES = $(CCFILES)
libGrid_adir = $(includedir)/Grid
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)

View File

@@ -37,39 +37,31 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/approx/Chebyshev.h>
#include <Grid/algorithms/approx/Remez.h>
#include <Grid/algorithms/approx/MultiShiftFunction.h>
#include <Grid/algorithms/approx/Forecast.h>
#include <Grid/algorithms/iterative/Deflation.h>
#include <Grid/algorithms/iterative/ConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateGradientShifted.h>
#include <Grid/algorithms/iterative/ConjugateResidual.h>
#include <Grid/algorithms/iterative/NormalEquations.h>
#include <Grid/algorithms/iterative/SchurRedBlack.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
// Lanczos support
#include <Grid/algorithms/iterative/MatrixUtils.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
#include <Grid/algorithms/iterative/MinimalResidual.h>
#include <Grid/algorithms/iterative/GeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/CoarsenedMatrix.h>
#include <Grid/algorithms/FFT.h>
// Eigen/lanczos
// EigCg
// MCR
// Pcg
// Multishift CG
// Hdcg
// GCR
// etc..
// integrator/Leapfrog
// integrator/Omelyan
// integrator/ForceGradient
// montecarlo/hmc
// montecarlo/rhmc
// montecarlo/metropolis
// etc...
#endif

View File

@@ -103,29 +103,32 @@ namespace Grid {
GridBase *CoarseGrid;
GridBase *FineGrid;
std::vector<Lattice<Fobj> > subspace;
int checkerboard;
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid) :
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :
CoarseGrid(_CoarseGrid),
FineGrid(_FineGrid),
subspace(nbasis,_FineGrid)
subspace(nbasis,_FineGrid),
checkerboard(_checkerboard)
{
};
void Orthogonalise(void){
CoarseScalar InnerProd(CoarseGrid);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
// std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
// CheckOrthogonal();
}
void CheckOrthogonal(void){
CoarseVector iProj(CoarseGrid);
CoarseVector eProj(CoarseGrid);
Lattice<CComplex> pokey(CoarseGrid);
for(int i=0;i<nbasis;i++){
blockProject(iProj,subspace[i],subspace);
eProj=zero;
for(int ss=0;ss<CoarseGrid->oSites();ss++){
parallel_for(int ss=0;ss<CoarseGrid->oSites();ss++){
eProj._odata[ss](i)=CComplex(1.0);
}
eProj=eProj - iProj;
@@ -137,6 +140,7 @@ namespace Grid {
blockProject(CoarseVec,FineVec,subspace);
}
void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
FineVec.checkerboard = subspace[0].checkerboard;
blockPromote(CoarseVec,FineVec,subspace);
}
void CreateSubspaceRandom(GridParallelRNG &RNG){
@@ -147,6 +151,7 @@ namespace Grid {
Orthogonalise();
}
/*
virtual void CreateSubspaceLanczos(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)
{
// Run a Lanczos with sloppy convergence
@@ -195,7 +200,7 @@ namespace Grid {
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
}
}
*/
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
RealD scale;
@@ -206,6 +211,7 @@ namespace Grid {
for(int b=0;b<nn;b++){
subspace[b] = zero;
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
@@ -267,8 +273,7 @@ namespace Grid {
SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,compressor);
PARALLEL_FOR_LOOP
for(int ss=0;ss<Grid()->oSites();ss++){
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
siteVector res = zero;
siteVector nbr;
int ptype;
@@ -292,12 +297,57 @@ PARALLEL_FOR_LOOP
};
RealD Mdag (const CoarseVector &in, CoarseVector &out){
return M(in,out);
// // corresponds to Petrov-Galerkin coarsening
// return M(in,out);
// corresponds to Galerkin coarsening
CoarseVector tmp(Grid());
G5C(tmp, in);
M(tmp, out);
G5C(out, out);
return norm2(out);
};
// Defer support for further coarsening for now
void Mdiag (const CoarseVector &in, CoarseVector &out){};
void Mdir (const CoarseVector &in, CoarseVector &out,int dir, int disp){};
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
conformable(_grid,in._grid);
conformable(in._grid,out._grid);
SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,compressor);
auto point = [dir, disp](){
if(dir == 0 and disp == 0)
return 8;
else
return (4 * dir + 1 - disp) / 2;
}();
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
siteVector res = zero;
siteVector nbr;
int ptype;
StencilEntry *SE;
SE=Stencil.GetEntry(ptype,point,ss);
if(SE->_is_local&&SE->_permute) {
permute(nbr,in._odata[SE->_offset],ptype);
} else if(SE->_is_local) {
nbr = in._odata[SE->_offset];
} else {
nbr = Stencil.CommBuf()[SE->_offset];
}
res = res + A[point]._odata[ss]*nbr;
vstream(out._odata[ss],res);
}
};
void Mdiag(const CoarseVector &in, CoarseVector &out){
Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
};
CoarsenedMatrix(GridCartesian &CoarseGrid) :
@@ -380,8 +430,7 @@ PARALLEL_FOR_LOOP
Subspace.ProjectToSubspace(oProj,oblock);
// blockProject(iProj,iblock,Subspace.subspace);
// blockProject(oProj,oblock,Subspace.subspace);
PARALLEL_FOR_LOOP
for(int ss=0;ss<Grid()->oSites();ss++){
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
for(int j=0;j<nbasis;j++){
if( disp!= 0 ) {
A[p]._odata[ss](j,i) = oProj._odata[ss](j);
@@ -414,7 +463,7 @@ PARALLEL_FOR_LOOP
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
#endif
// ForceHermitian();
AssertHermitian();
// AssertHermitian();
// ForceDiagonal();
}
void ForceDiagonal(void) {
@@ -427,7 +476,7 @@ PARALLEL_FOR_LOOP
A[p]=zero;
}
GridParallelRNG RNG(Grid()); RNG.SeedRandomDevice();
GridParallelRNG RNG(Grid()); RNG.SeedFixedIntegers(std::vector<int>({55,72,19,17,34}));
Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val);
Complex one(1.0);

View File

@@ -230,6 +230,7 @@ namespace Grid {
// Barrel shift and collect global pencil
std::vector<int> lcoor(Nd), gcoor(Nd);
result = source;
int pc = processor_coor[dim];
for(int p=0;p<processors[dim];p++) {
PARALLEL_REGION
{
@@ -240,7 +241,8 @@ namespace Grid {
for(int idx=0;idx<sgrid->lSites();idx++) {
sgrid->LocalIndexToLocalCoor(idx,cbuf);
peekLocalSite(s,result,cbuf);
cbuf[dim]+=p*L;
cbuf[dim]+=((pc+p) % processors[dim])*L;
// cbuf[dim]+=p*L;
pokeLocalSite(s,pgbuf,cbuf);
}
}
@@ -278,7 +280,6 @@ namespace Grid {
flops+= flops_call*NN;
// writing out result
int pc = processor_coor[dim];
PARALLEL_REGION
{
std::vector<int> clbuf(Nd), cgbuf(Nd);

View File

@@ -162,15 +162,10 @@ namespace Grid {
_Mat.M(in,out);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
ComplexD dot;
_Mat.M(in,out);
dot= innerProduct(in,out);
n1=real(dot);
dot = innerProduct(out,out);
n2=real(dot);
ComplexD dot= innerProduct(in,out); n1=real(dot);
n2=norm2(out);
}
void HermOp(const Field &in, Field &out){
_Mat.M(in,out);
@@ -189,13 +184,15 @@ namespace Grid {
virtual RealD MpcDag (const Field &in, Field &out) =0;
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
Field tmp(in._grid);
tmp.checkerboard = in.checkerboard;
ni=Mpc(in,tmp);
no=MpcDag(tmp,out);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
out.checkerboard = in.checkerboard;
MpcDagMpc(in,out,n1,n2);
}
void HermOp(const Field &in, Field &out){
virtual void HermOp(const Field &in, Field &out){
RealD n1,n2;
HermOpAndNorm(in,out,n1,n2);
}
@@ -212,7 +209,6 @@ namespace Grid {
void OpDir (const Field &in, Field &out,int dir,int disp) {
assert(0);
}
};
template<class Matrix,class Field>
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
@@ -222,12 +218,14 @@ namespace Grid {
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid);
tmp.checkerboard = !in.checkerboard;
//std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl;
_Mat.Meooe(in,tmp);
_Mat.MooeeInv(tmp,out);
_Mat.Meooe(out,tmp);
//std::cout << "cb in " << in.checkerboard << " cb out " << out.checkerboard << std::endl;
_Mat.Mooee(in,out);
return axpy_norm(out,-1.0,tmp,out);
}
@@ -270,7 +268,6 @@ namespace Grid {
return axpy_norm(out,-1.0,tmp,in);
}
};
template<class Matrix,class Field>
class SchurDiagTwoOperator : public SchurOperatorBase<Field> {
protected:
@@ -299,6 +296,82 @@ namespace Grid {
return axpy_norm(out,-1.0,tmp,in);
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi
///////////////////////////////////////////////////////////////////////////////////////////////////
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
///////////////////////////////////////////////////////////////////////////////////////////////////
// Staggered use
///////////////////////////////////////////////////////////////////////////////////////////////////
template<class Matrix,class Field>
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
protected:
Matrix &_Mat;
Field tmp;
RealD mass;
double tMpc;
double tIP;
double tMeo;
double taxpby_norm;
uint64_t ncall;
public:
void Report(void)
{
std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
std::cout << GridLogMessage << " HermOpAndNorm.IP "<< tIP /ncall<<" usec "<<std::endl;
std::cout << GridLogMessage << " Mpc.MeoMoe "<< tMeo/ncall<<" usec "<<std::endl;
std::cout << GridLogMessage << " Mpc.axpby_norm "<< taxpby_norm/ncall<<" usec "<<std::endl;
}
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
{
assert( _Mat.isTrivialEE() );
mass = _Mat.Mass();
tMpc=0;
tIP =0;
tMeo=0;
taxpby_norm=0;
ncall=0;
}
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
ncall++;
tMpc-=usecond();
n2 = Mpc(in,out);
tMpc+=usecond();
tIP-=usecond();
ComplexD dot= innerProduct(in,out);
tIP+=usecond();
n1 = real(dot);
}
virtual void HermOp(const Field &in, Field &out){
ncall++;
tMpc-=usecond();
_Mat.Meooe(in,out);
_Mat.Meooe(out,tmp);
tMpc+=usecond();
taxpby_norm-=usecond();
axpby(out,-1.0,mass*mass,tmp,in);
taxpby_norm+=usecond();
}
virtual RealD Mpc (const Field &in, Field &out) {
tMeo-=usecond();
_Mat.Meooe(in,out);
_Mat.Meooe(out,tmp);
tMeo+=usecond();
taxpby_norm-=usecond();
RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
taxpby_norm+=usecond();
return nn;
}
virtual RealD MpcDag (const Field &in, Field &out){
return Mpc(in,out);
}
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
assert(0);// Never need with staggered
}
};
template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;
/////////////////////////////////////////////////////////////
@@ -307,6 +380,12 @@ namespace Grid {
template<class Field> class OperatorFunction {
public:
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
virtual void operator() (LinearOperatorBase<Field> &Linop, const std::vector<Field> &in,std::vector<Field> &out) {
assert(in.size()==out.size());
for(int k=0;k<in.size();k++){
(*this)(Linop,in[k],out[k]);
}
};
};
template<class Field> class LinearFunction {
@@ -314,6 +393,14 @@ namespace Grid {
virtual void operator() (const Field &in, Field &out) = 0;
};
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {
public:
void operator() (const Field &in, Field &out){
out = in;
};
};
/////////////////////////////////////////////////////////////
// Base classes for Multishift solvers for operators
/////////////////////////////////////////////////////////////
@@ -336,6 +423,64 @@ namespace Grid {
};
*/
////////////////////////////////////////////////////////////////////////////////////////////
// Hermitian operator Linear function and operator function
////////////////////////////////////////////////////////////////////////////////////////////
template<class Field>
class HermOpOperatorFunction : public OperatorFunction<Field> {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Linop.HermOp(in,out);
};
};
template<typename Field>
class PlainHermOp : public LinearFunction<Field> {
public:
LinearOperatorBase<Field> &_Linop;
PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)
{}
void operator()(const Field& in, Field& out) {
_Linop.HermOp(in,out);
}
};
template<typename Field>
class FunctionHermOp : public LinearFunction<Field> {
public:
OperatorFunction<Field> & _poly;
LinearOperatorBase<Field> &_Linop;
FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop)
: _poly(poly), _Linop(linop) {};
void operator()(const Field& in, Field& out) {
_poly(_Linop,in,out);
}
};
template<class Field>
class Polynomial : public OperatorFunction<Field> {
private:
std::vector<RealD> Coeffs;
public:
Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Field AtoN(in._grid);
Field Mtmp(in._grid);
AtoN = in;
out = AtoN*Coeffs[0];
for(int n=1;n<Coeffs.size();n++){
Mtmp = AtoN;
Linop.HermOp(Mtmp,AtoN);
out=out+AtoN*Coeffs[n];
}
};
};
}

View File

@@ -55,6 +55,14 @@ namespace Grid {
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
public:
virtual GridBase *RedBlackGrid(void)=0;
//////////////////////////////////////////////////////////////////////
// Query the even even properties to make algorithmic decisions
//////////////////////////////////////////////////////////////////////
virtual RealD Mass(void) { return 0.0; };
virtual int ConstEE(void) { return 0; }; // Disable assumptions unless overridden
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
// half checkerboard operaions
virtual void Meooe (const Field &in, Field &out)=0;
virtual void Mooee (const Field &in, Field &out)=0;

View File

@@ -8,6 +8,7 @@
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Christoph Lehner <clehner@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -33,40 +34,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
namespace Grid {
////////////////////////////////////////////////////////////////////////////////////////////
// Simple general polynomial with user supplied coefficients
////////////////////////////////////////////////////////////////////////////////////////////
template<class Field>
class HermOpOperatorFunction : public OperatorFunction<Field> {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Linop.HermOp(in,out);
};
};
template<class Field>
class Polynomial : public OperatorFunction<Field> {
private:
std::vector<RealD> Coeffs;
public:
Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Field AtoN(in._grid);
Field Mtmp(in._grid);
AtoN = in;
out = AtoN*Coeffs[0];
// std::cout <<"Poly in " <<norm2(in)<<" size "<< Coeffs.size()<<std::endl;
// std::cout <<"Coeffs[0]= "<<Coeffs[0]<< " 0 " <<norm2(out)<<std::endl;
for(int n=1;n<Coeffs.size();n++){
Mtmp = AtoN;
Linop.HermOp(Mtmp,AtoN);
out=out+AtoN*Coeffs[n];
// std::cout <<"Coeffs "<<n<<"= "<< Coeffs[n]<< " 0 " <<std::endl;
// std::cout << n<<" " <<norm2(out)<<std::endl;
}
};
struct ChebyParams : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams,
RealD, alpha,
RealD, beta,
int, Npoly);
};
////////////////////////////////////////////////////////////////////////////////////////////
@@ -83,7 +55,9 @@ namespace Grid {
public:
void csv(std::ostream &out){
RealD diff = hi-lo;
for (RealD x=lo-0.2*diff; x<hi+0.2*diff; x+=(hi-lo)/1000) {
RealD delta = (hi-lo)*1.0e-9;
for (RealD x=lo; x<hi; x+=delta) {
delta*=1.1;
RealD f = approx(x);
out<< x<<" "<<f<<std::endl;
}
@@ -99,6 +73,7 @@ namespace Grid {
};
Chebyshev(){};
Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);};
Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);};
@@ -193,12 +168,54 @@ namespace Grid {
return sum;
};
RealD approxD(RealD x)
{
RealD Un;
RealD Unm;
RealD Unp;
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
RealD U0=1;
RealD U1=2*y;
RealD sum;
sum = Coeffs[1]*U0;
sum+= Coeffs[2]*U1*2.0;
Un =U1;
Unm=U0;
for(int i=2;i<order-1;i++){
Unp=2*y*Un-Unm;
Unm=Un;
Un =Unp;
sum+= Un*Coeffs[i+1]*(i+1.0);
}
return sum/(0.5*(hi-lo));
};
RealD approxInv(RealD z, RealD x0, int maxiter, RealD resid) {
RealD x = x0;
RealD eps;
int i;
for (i=0;i<maxiter;i++) {
eps = approx(x) - z;
if (fabs(eps / z) < resid)
return x;
x = x - eps / approxD(x);
}
return std::numeric_limits<double>::quiet_NaN();
}
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
GridBase *grid=in._grid;
// std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
//<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
int vol=grid->gSites();

View File

@@ -0,0 +1,152 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/approx/Forecast.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef INCLUDED_FORECAST_H
#define INCLUDED_FORECAST_H
namespace Grid {
// Abstract base class.
// Takes a matrix (Mat), a source (phi), and a vector of Fields (chi)
// and returns a forecasted solution to the system D*psi = phi (psi).
template<class Matrix, class Field>
class Forecast
{
public:
virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0;
};
// Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012),
// used to forecast solutions across poles of the EOFA heatbath.
//
// Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C)
template<class Matrix, class Field>
class ChronoForecast : public Forecast<Matrix,Field>
{
public:
Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns)
{
int degree = prev_solns.size();
Field chi(phi); // forecasted solution
// Trivial cases
if(degree == 0){ chi = zero; return chi; }
else if(degree == 1){ return prev_solns[0]; }
RealD dot;
ComplexD xp;
Field r(phi); // residual
Field Mv(phi);
std::vector<Field> v(prev_solns); // orthonormalized previous solutions
std::vector<Field> MdagMv(degree,phi);
// Array to hold the matrix elements
std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree));
// Solution and source vectors
std::vector<ComplexD> a(degree);
std::vector<ComplexD> b(degree);
// Orthonormalize the vector basis
for(int i=0; i<degree; i++){
v[i] *= 1.0/std::sqrt(norm2(v[i]));
for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; }
}
// Perform sparse matrix multiplication and construct rhs
for(int i=0; i<degree; i++){
b[i] = innerProduct(v[i],phi);
Mat.M(v[i],Mv);
Mat.Mdag(Mv,MdagMv[i]);
G[i][i] = innerProduct(v[i],MdagMv[i]);
}
// Construct the matrix
for(int j=0; j<degree; j++){
for(int k=j+1; k<degree; k++){
G[j][k] = innerProduct(v[j],MdagMv[k]);
G[k][j] = std::conj(G[j][k]);
}}
// Gauss-Jordan elimination with partial pivoting
for(int i=0; i<degree; i++){
// Perform partial pivoting
int k = i;
for(int j=i+1; j<degree; j++){ if(std::abs(G[j][j]) > std::abs(G[k][k])){ k = j; } }
if(k != i){
xp = b[k];
b[k] = b[i];
b[i] = xp;
for(int j=0; j<degree; j++){
xp = G[k][j];
G[k][j] = G[i][j];
G[i][j] = xp;
}
}
// Convert matrix to upper triangular form
for(int j=i+1; j<degree; j++){
xp = G[j][i]/G[i][i];
b[j] -= xp * b[i];
for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; }
}
}
// Use Gaussian elimination to solve equations and calculate initial guess
chi = zero;
r = phi;
for(int i=degree-1; i>=0; i--){
a[i] = 0.0;
for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; }
a[i] = (b[i]-a[i])/G[i][i];
chi += a[i]*v[i];
r -= a[i]*MdagMv[i];
}
RealD true_r(0.0);
ComplexD tmp;
for(int i=0; i<degree; i++){
tmp = -b[i];
for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
tmp = std::conj(tmp)*tmp;
true_r += std::sqrt(tmp.real());
}
RealD error = std::sqrt(norm2(r)/norm2(phi));
std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl;
return chi;
};
};
}
#endif

View File

@@ -25,7 +25,7 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/GridCore.h>
namespace Grid {
double MultiShiftFunction::approx(double x)

View File

@@ -16,7 +16,7 @@
#define INCLUDED_ALG_REMEZ_H
#include <stddef.h>
#include <Config.h>
#include <Grid/GridStd.h>
#ifdef HAVE_LIBGMP
#include "bigfloat.h"

View File

@@ -0,0 +1,698 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
Copyright (C) 2017
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
namespace Grid {
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec };
//////////////////////////////////////////////////////////////////////////
// Block conjugate gradient. Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
template <class Field>
class BlockConjugateGradient : public OperatorFunction<Field> {
public:
typedef typename Field::scalar_type scomplex;
int blockDim ;
int Nblock;
BlockCGtype CGtype;
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
Integer PrintInterval; //GridLogMessages or Iterative
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
{};
////////////////////////////////////////////////////////////////////////////////////////////////////
// Thin QR factorisation (google it)
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
//Dimensions
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
//
// Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
//
// Q C = R => Q = R C^{-1}
//
// Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
//
// Set C = L^{dag}, and then Q^dag Q = ident
//
// Checks:
// Cdag C = Rdag R ; passes.
// QdagQ = 1 ; passes
////////////////////////////////////////////////////////////////////////////////////////////////////
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
Field & Q,
const Field & R)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
sliceInnerProductMatrix(m_rr,R,R,Orthog);
// Force manifest hermitian to avoid rounding related
m_rr = 0.5*(m_rr+m_rr.adjoint());
Eigen::MatrixXcd L = m_rr.llt().matrixL();
C = L.adjoint();
Cinv = C.inverse();
////////////////////////////////////////////////////////////////////////////////////////////////////
// Q = R C^{-1}
//
// Q_j = R_i Cinv(i,j)
//
// NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
////////////////////////////////////////////////////////////////////////////////////////////////////
sliceMulMatrix(Q,Cinv,R,Orthog);
}
// see comments above
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
std::vector<Field> & Q,
const std::vector<Field> & R)
{
InnerProductMatrix(m_rr,R,R);
m_rr = 0.5*(m_rr+m_rr.adjoint());
Eigen::MatrixXcd L = m_rr.llt().matrixL();
C = L.adjoint();
Cinv = C.inverse();
MulMatrix(Q,Cinv,R);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Call one of several implementations
////////////////////////////////////////////////////////////////////////////////////////////////////
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
if ( CGtype == BlockCGrQ ) {
BlockCGrQsolve(Linop,Src,Psi);
} else if (CGtype == CGmultiRHS ) {
CGmultiRHSsolve(Linop,Src,Psi);
} else {
assert(0);
}
}
virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi)
{
if ( CGtype == BlockCGrQVec ) {
BlockCGrQsolveVec(Linop,Src,Psi);
} else {
assert(0);
}
}
////////////////////////////////////////////////////////////////////////////
// BlockCGrQ implementation:
//--------------------------
// X is guess/Solution
// B is RHS
// Solve A X_i = B_i ; i refers to Nblock index
////////////////////////////////////////////////////////////////////////////
void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = B._grid->_fdimensions[Orthog];
/* FAKE */
Nblock=8;
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
X.checkerboard = B.checkerboard;
conformable(X, B);
Field tmp(B);
Field Q(B);
Field D(B);
Field Z(B);
Field AD(B);
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,B,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,B,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,X,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
/************************************************************************
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
************************************************************************
* Dimensions:
*
* X,B==(Nferm x Nblock)
* A==(Nferm x Nferm)
*
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
*
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
* for k:
* Z = AD
* M = [D^dag Z]^{-1}
* X = X + D MC
* QS = Q - ZM
* D = Q + D S^dag
* C = S C
*/
///////////////////////////////////////
// Initial block: initial search dir is guess
///////////////////////////////////////
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
Linop.HermOp(X, AD);
tmp = B - AD;
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
D=Q;
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
//3. Z = AD
MatrixTimer.Start();
Linop.HermOp(D, Z);
MatrixTimer.Stop();
//4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
sliceInnerTimer.Stop();
m_M = m_DZ.inverse();
//5. X = X + D MC
m_tmp = m_M * m_C;
sliceMaddTimer.Start();
sliceMaddMatrix(X,m_tmp, D,X,Orthog);
sliceMaddTimer.Stop();
//6. QS = Q - ZM
sliceMaddTimer.Start();
sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
sliceMaddTimer.Stop();
QRTimer.Start();
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
QRTimer.Stop();
//7. D = Q + D S^dag
m_tmp = m_S.adjoint();
sliceMaddTimer.Start();
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
sliceMaddTimer.Stop();
//8. C = S C
m_C = m_S*m_C;
/*********************
* convergence monitor
*********************
*/
m_rr = m_C.adjoint() * m_C;
RealD max_resid=0;
RealD rrsum=0;
RealD rr;
for(int b=0;b<Nblock;b++) {
rrsum+=real(m_rr(b,b));
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(X, AD);
AD = AD-B;
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
//////////////////////////////////////////////////////////////////////////
// multiRHS conjugate gradient. Dimension zero should be the block direction
// Use this for spread out across nodes
//////////////////////////////////////////////////////////////////////////
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = blockDim; // First dimension is block dim
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
std::vector<ComplexD> v_pAp(Nblock);
std::vector<RealD> v_rr (Nblock);
std::vector<RealD> v_rr_inv(Nblock);
std::vector<RealD> v_alpha(Nblock);
std::vector<RealD> v_beta(Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
R = Src - AP;
P = R;
sliceNorm(v_rr,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch sliceNormTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
sliceInnerTimer.Start();
sliceInnerProductVector(v_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
for(int b=0;b<Nblock;b++){
v_alpha[b] = v_rr[b]/real(v_pAp[b]);
}
// Psi, R update
sliceMaddTimer.Start();
sliceMaddVector(Psi,v_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddVector(R ,v_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
for(int b=0;b<Nblock;b++){
v_rr_inv[b] = 1.0/v_rr[b];
}
sliceNormTimer.Start();
sliceNorm(v_rr,R,Orthog);
sliceNormTimer.Stop();
for(int b=0;b<Nblock;b++){
v_beta[b] = v_rr_inv[b] *v_rr[b];
}
// Search update
sliceMaddTimer.Start();
sliceMaddVector(P,v_beta,P,R,Orthog);
sliceMaddTimer.Stop();
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
for(int b=0;b<Nblock;b++){
RealD rr = v_rr[b]/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tNorm " << sliceNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){
for(int b=0;b<Nblock;b++){
for(int bp=0;bp<Nblock;bp++) {
m(b,bp) = innerProduct(X[b],Y[bp]);
}}
}
void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){
// Should make this cache friendly with site outermost, parallel_for
// Deal with case AP aliases with either Y or X
std::vector<Field> tmp(Nblock,X[0]);
for(int b=0;b<Nblock;b++){
tmp[b] = Y[b];
for(int bp=0;bp<Nblock;bp++) {
tmp[b] = tmp[b] + (scale*m(bp,b))*X[bp];
}
}
for(int b=0;b<Nblock;b++){
AP[b] = tmp[b];
}
}
void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){
// Should make this cache friendly with site outermost, parallel_for
for(int b=0;b<Nblock;b++){
AP[b] = zero;
for(int bp=0;bp<Nblock;bp++) {
AP[b] += (m(bp,b))*X[bp];
}
}
}
double normv(const std::vector<Field> &P){
double nn = 0.0;
for(int b=0;b<Nblock;b++) {
nn+=norm2(P[b]);
}
return nn;
}
////////////////////////////////////////////////////////////////////////////
// BlockCGrQvec implementation:
//--------------------------
// X is guess/Solution
// B is RHS
// Solve A X_i = B_i ; i refers to Nblock index
////////////////////////////////////////////////////////////////////////////
void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X)
{
Nblock = B.size();
assert(Nblock == X.size());
std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
for(int b=0;b<Nblock;b++){
X[b].checkerboard = B[b].checkerboard;
conformable(X[b], B[b]);
conformable(X[b], X[0]);
}
Field Fake(B[0]);
std::vector<Field> tmp(Nblock,Fake);
std::vector<Field> Q(Nblock,Fake);
std::vector<Field> D(Nblock,Fake);
std::vector<Field> Z(Nblock,Fake);
std::vector<Field> AD(Nblock,Fake);
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
RealD sssum=0;
for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);}
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);}
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);}
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
/************************************************************************
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
************************************************************************
* Dimensions:
*
* X,B==(Nferm x Nblock)
* A==(Nferm x Nferm)
*
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
*
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
* for k:
* Z = AD
* M = [D^dag Z]^{-1}
* X = X + D MC
* QS = Q - ZM
* D = Q + D S^dag
* C = S C
*/
///////////////////////////////////////
// Initial block: initial search dir is guess
///////////////////////////////////////
std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
for(int b=0;b<Nblock;b++) {
Linop.HermOp(X[b], AD[b]);
tmp[b] = B[b] - AD[b];
}
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
for(int b=0;b<Nblock;b++) D[b]=Q[b];
std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl;
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
//3. Z = AD
MatrixTimer.Start();
for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);
MatrixTimer.Stop();
//4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start();
InnerProductMatrix(m_DZ,D,Z);
sliceInnerTimer.Stop();
m_M = m_DZ.inverse();
//5. X = X + D MC
m_tmp = m_M * m_C;
sliceMaddTimer.Start();
MaddMatrix(X,m_tmp, D,X);
sliceMaddTimer.Stop();
//6. QS = Q - ZM
sliceMaddTimer.Start();
MaddMatrix(tmp,m_M,Z,Q,-1.0);
sliceMaddTimer.Stop();
QRTimer.Start();
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
QRTimer.Stop();
//7. D = Q + D S^dag
m_tmp = m_S.adjoint();
sliceMaddTimer.Start();
MaddMatrix(D,m_tmp,D,Q);
sliceMaddTimer.Stop();
//8. C = S C
m_C = m_S*m_C;
/*********************
* convergence monitor
*********************
*/
m_rr = m_C.adjoint() * m_C;
RealD max_resid=0;
RealD rrsum=0;
RealD rr;
for(int b=0;b<Nblock;b++) {
rrsum+=real(m_rr(b,b));
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
}
#endif

View File

@@ -0,0 +1,244 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when CAGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the CAGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
CommunicationAvoidingGeneralisedMinimalResidual(RealD tol,
Integer maxit,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular GMRES" << std::endl;
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: src " << ssq << std::endl;
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// this should probably be made a class member so that it is only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(v, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
MatrixTimer.Start();
LinOp.Op(v[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + v[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@@ -45,13 +45,16 @@ class ConjugateGradient : public OperatorFunction<Field> {
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
Field &psi) {
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
@@ -68,7 +71,6 @@ class ConjugateGradient : public OperatorFunction<Field> {
Linop.HermOpAndNorm(psi, mmp, d, b);
r = src - mmp;
p = r;
@@ -76,18 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> {
cp = a;
ssq = norm2(src);
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: p " << a << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq;
@@ -96,40 +92,48 @@ class ConjugateGradient : public OperatorFunction<Field> {
return;
}
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq
<< std::endl;
std::cout << GridLogIterative << std::setprecision(8)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer;
GridStopWatch InnerTimer;
GridStopWatch AxpyNormTimer;
GridStopWatch LinearCombTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
for (k = 1; k <= MaxIterations*1000; k++) {
c = cp;
MatrixTimer.Start();
Linop.HermOpAndNorm(p, mmp, d, qq);
Linop.HermOp(p, mmp);
MatrixTimer.Stop();
LinalgTimer.Start();
// RealD qqck = norm2(mmp);
// ComplexD dck = innerProduct(p,mmp);
InnerTimer.Start();
ComplexD dc = innerProduct(p,mmp);
InnerTimer.Stop();
d = dc.real();
a = c / d;
b_pred = a * (a * qq - d) / c;
AxpyNormTimer.Start();
cp = axpy_norm(r, -a, mmp, r);
AxpyNormTimer.Stop();
b = cp / c;
// Fuse these loops ; should be really easy
psi = a * p + psi;
p = p * b + r;
LinearCombTimer.Start();
parallel_for(int ss=0;ss<src._grid->oSites();ss++){
vstream(psi[ss], a * p[ss] + psi[ss]);
vstream(p [ss], b * p[ss] + r[ss]);
}
LinearCombTimer.Stop();
LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
<< " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
// Stopping condition
if (cp <= rsq) {
@@ -137,31 +141,36 @@ class ConjugateGradient : public OperatorFunction<Field> {
Linop.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage
<< "ConjugateGradient: Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual << " target "
<< Tolerance << std::endl;
std::cout << GridLogMessage << "Time elapsed: Iterations "
<< SolverTimer.Elapsed() << " Matrix "
<< MatrixTimer.Elapsed() << " Linalg "
<< LinalgTimer.Elapsed();
std::cout << std::endl;
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
<< std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
}

View File

@@ -35,6 +35,7 @@ namespace Grid {
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
public:
RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid; //Grid for single-precision fields
@@ -42,12 +43,16 @@ namespace Grid {
LinearOperatorBase<FieldF> &Linop_f;
LinearOperatorBase<FieldD> &Linop_d;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
LinearFunction<FieldF> *guesser;
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
Linop_f(_Linop_f), Linop_d(_Linop_d),
Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
OuterLoopNormMult(100.), guesser(NULL){ };
void useGuesser(LinearFunction<FieldF> &g){
@@ -55,9 +60,8 @@ namespace Grid {
}
void operator() (const FieldD &src_d_in, FieldD &sol_d){
(*this)(src_d_in,sol_d,NULL);
}
void operator() (const FieldD &src_d_in, FieldD &sol_d, RealD *shift){
TotalInnerIterations = 0;
GridStopWatch TotalTimer;
TotalTimer.Start();
@@ -77,7 +81,7 @@ namespace Grid {
FieldD src_d(DoublePrecGrid);
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
RealD inner_tol = Tolerance;
RealD inner_tol = InnerTolerance;
FieldF src_f(SinglePrecGrid);
src_f.checkerboard = cb;
@@ -85,17 +89,18 @@ namespace Grid {
FieldF sol_f(SinglePrecGrid);
sol_f.checkerboard = cb;
ConjugateGradientShifted<FieldF> CG_f(inner_tol, MaxInnerIterations);
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
CG_f.ErrorOnNoConverge = false;
GridStopWatch InnerCGtimer;
GridStopWatch PrecChangeTimer;
for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
//Compute double precision rsd and also new RHS vector.
Linop_d.HermOp(sol_d, tmp_d);
if(shift) axpy(tmp_d,*shift,sol_d,tmp_d);
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
@@ -119,8 +124,9 @@ namespace Grid {
//Inner CG
CG_f.Tolerance = inner_tol;
InnerCGtimer.Start();
CG_f(Linop_f, src_f, sol_f,shift);
CG_f(Linop_f, src_f, sol_f);
InnerCGtimer.Stop();
TotalInnerIterations += CG_f.IterationsToComplete;
//Convert sol back to double and add to double prec solution
PrecChangeTimer.Start();
@@ -133,11 +139,13 @@ namespace Grid {
//Final trial CG
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
ConjugateGradientShifted<FieldD> CG_d(Tolerance, MaxInnerIterations);
CG_d(Linop_d, src_d_in, sol_d,shift);
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
CG_d(Linop_d, src_d_in, sol_d);
TotalFinalStepIterations = CG_d.IterationsToComplete;
TotalTimer.Stop();
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
}
};

View File

@@ -43,9 +43,9 @@ namespace Grid {
public:
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
int verbose;
MultiShiftFunction shifts;
int iter;
ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :
MaxIterations(maxit),
@@ -61,7 +61,6 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
std::vector<Field> results(nshift,grid);
(*this)(Linop,src,results,psi);
}
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi)
{
int nshift = shifts.order;
@@ -107,12 +106,11 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
RealD a,b,c,d;
RealD cp,bp,qq; //prev
int cb=src.checkerboard;
// Matrix mult fields
Field r(grid);
Field p(grid); p.checkerboard = src.checkerboard;
Field p(grid);
Field tmp(grid);
Field mmp(grid);mmp.checkerboard = src.checkerboard;
Field mmp(grid);
// Check lightest mass
for(int s=0;s<nshift;s++){
@@ -135,9 +133,6 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
p=src;
//MdagM+m[0]
std::cout << "p.checkerboard " << p.checkerboard
<< "mmp.checkerboard " << mmp.checkerboard << std::endl;
Linop.HermOpAndNorm(p,mmp,d,qq);
axpy(mmp,mass[0],p,mmp);
RealD rn = norm2(p);
@@ -170,6 +165,15 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
}
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch AXPYTimer;
GridStopWatch ShiftTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
// Iteration loop
int k;
@@ -177,7 +181,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
for (k=1;k<=MaxIterations;k++){
a = c /cp;
AXPYTimer.Start();
axpy(p,a,p,r);
AXPYTimer.Stop();
// Note to self - direction ps is iterated seperately
// for each shift. Does not appear to have any scope
@@ -186,6 +192,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
// However SAME r is used. Could load "r" and update
// ALL ps[s]. 2/3 Bandwidth saving
// New Kernel: Load r, vector of coeffs, vector of pointers ps
AXPYTimer.Start();
for(int s=0;s<nshift;s++){
if ( ! converged[s] ) {
if (s==0){
@@ -196,22 +203,34 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
}
}
}
AXPYTimer.Stop();
cp=c;
MatrixTimer.Start();
//Linop.HermOpAndNorm(p,mmp,d,qq); // d is used
// The below is faster on KNL
Linop.HermOp(p,mmp);
d=real(innerProduct(p,mmp));
Linop.HermOpAndNorm(p,mmp,d,qq);
MatrixTimer.Stop();
AXPYTimer.Start();
axpy(mmp,mass[0],p,mmp);
AXPYTimer.Stop();
RealD rn = norm2(p);
d += rn*mass[0];
bp=b;
b=-cp/d;
AXPYTimer.Start();
c=axpy_norm(r,b,mmp,r);
AXPYTimer.Stop();
// Toggle the recurrence history
bs[0] = b;
iz = 1-iz;
ShiftTimer.Start();
for(int s=1;s<nshift;s++){
if((!converged[s])){
RealD z0 = z[s][1-iz];
@@ -221,6 +240,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
}
}
ShiftTimer.Stop();
for(int s=0;s<nshift;s++){
int ss = s;
@@ -263,6 +283,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
if ( all_converged ){
SolverTimer.Stop();
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
@@ -275,9 +298,19 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
RealD cn = norm2(src);
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
}
iter = k;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMarix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
// ugly hack
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;

View File

@@ -0,0 +1,256 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
Copyright (C) 2015
Author: Christopher Kelly <ckelly@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
#define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
namespace Grid {
template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class ConjugateGradientReliableUpdate : public LinearFunction<FieldD> {
public:
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
Integer ReliableUpdatesPerformed;
bool DoFinalCleanup; //Final DP cleanup, defaults to true
Integer IterationsToCleanup; //Final DP cleanup step iterations
LinearOperatorBase<FieldF> &Linop_f;
LinearOperatorBase<FieldD> &Linop_d;
GridBase* SinglePrecGrid;
RealD Delta; //reliable update parameter
//Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single
LinearOperatorBase<FieldF> *Linop_fallback;
RealD fallback_transition_tol;
ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
Delta(_delta),
Linop_f(_Linop_f),
Linop_d(_Linop_d),
SinglePrecGrid(_sp_grid),
ErrorOnNoConverge(err_on_no_conv),
DoFinalCleanup(true),
Linop_fallback(NULL)
{};
void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){
Linop_fallback = &_Linop_fallback;
fallback_transition_tol = _fallback_transition_tol;
}
void operator()(const FieldD &src, FieldD &psi) {
LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f;
bool using_fallback = false;
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD cp, c, a, d, b, ssq, qq, b_pred;
FieldD p(src);
FieldD mmp(src);
FieldD r(src);
// Initial residual computation & set up
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
Linop_d.HermOpAndNorm(psi, mmp, d, b);
r = src - mmp;
p = r;
a = norm2(p);
cp = a;
ssq = norm2(src);
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq;
// Check if guess is really REALLY good :)
if (cp <= rsq) {
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n";
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
return;
}
//Single prec initialization
FieldF r_f(SinglePrecGrid);
r_f.checkerboard = r.checkerboard;
precisionChange(r_f, r);
FieldF psi_f(r_f);
psi_f = zero;
FieldF p_f(r_f);
FieldF mmp_f(r_f);
RealD MaxResidSinceLastRelUp = cp; //initial residual
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k = 0;
int l = 0;
for (k = 1; k <= MaxIterations; k++) {
c = cp;
MatrixTimer.Start();
Linop_f_use->HermOpAndNorm(p_f, mmp_f, d, qq);
MatrixTimer.Stop();
LinalgTimer.Start();
a = c / d;
b_pred = a * (a * qq - d) / c;
cp = axpy_norm(r_f, -a, mmp_f, r_f);
b = cp / c;
// Fuse these loops ; should be really easy
psi_f = a * p_f + psi_f;
//p_f = p_f * b + r_f;
LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
if(cp > MaxResidSinceLastRelUp){
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl;
MaxResidSinceLastRelUp = cp;
}
// Stopping condition
if (cp <= rsq) {
//Although not written in the paper, I assume that I have to add on the final solution
precisionChange(mmp, psi_f);
psi = psi + mmp;
SolverTimer.Stop();
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src;
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl;
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
ReliableUpdatesPerformed = l;
if(DoFinalCleanup){
//Do a final CG to cleanup
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate performing final cleanup.\n";
ConjugateGradient<FieldD> CG(Tolerance,MaxIterations);
CG.ErrorOnNoConverge = ErrorOnNoConverge;
CG(Linop_d,src,psi);
IterationsToCleanup = CG.IterationsToComplete;
}
else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n";
return;
}
else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate "
<< cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n";
precisionChange(mmp, psi_f);
psi = psi + mmp;
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
r = src - mmp;
psi_f = zero;
precisionChange(r_f, r);
cp = norm2(r);
MaxResidSinceLastRelUp = cp;
b = cp/c;
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl;
l = l+1;
}
p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence
if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl;
Linop_f_use = Linop_fallback;
using_fallback = true;
}
}
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge"
<< std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
ReliableUpdatesPerformed = l;
}
};
};
#endif

View File

@@ -0,0 +1,104 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_DEFLATION_H
#define GRID_DEFLATION_H
namespace Grid {
template<class Field>
class ZeroGuesser: public LinearFunction<Field> {
public:
virtual void operator()(const Field &src, Field &guess) { guess = zero; };
};
template<class Field>
class SourceGuesser: public LinearFunction<Field> {
public:
virtual void operator()(const Field &src, Field &guess) { guess = src; };
};
////////////////////////////////
// Fine grid deflation
////////////////////////////////
template<class Field>
class DeflatedGuesser: public LinearFunction<Field> {
private:
const std::vector<Field> &evec;
const std::vector<RealD> &eval;
public:
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
virtual void operator()(const Field &src,Field &guess) {
guess = zero;
assert(evec.size()==eval.size());
auto N = evec.size();
for (int i=0;i<N;i++) {
const Field& tmp = evec[i];
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
}
guess.checkerboard = src.checkerboard;
}
};
template<class FineField, class CoarseField>
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
private:
const std::vector<FineField> &subspace;
const std::vector<CoarseField> &evec_coarse;
const std::vector<RealD> &eval_coarse;
public:
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
const std::vector<CoarseField> &_evec_coarse,
const std::vector<RealD> &_eval_coarse)
: subspace(_subspace),
evec_coarse(_evec_coarse),
eval_coarse(_eval_coarse)
{
}
void operator()(const FineField &src,FineField &guess) {
int N = (int)evec_coarse.size();
CoarseField src_coarse(evec_coarse[0]._grid);
CoarseField guess_coarse(evec_coarse[0]._grid); guess_coarse = zero;
blockProject(src_coarse,src,subspace);
for (int i=0;i<N;i++) {
const CoarseField & tmp = evec_coarse[i];
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
}
blockPromote(guess_coarse,guess,subspace);
guess.checkerboard = src.checkerboard;
};
};
}
#endif

View File

@@ -0,0 +1,256 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when FCAGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the FCAGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch PrecTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
LinearFunction<Field> &Preconditioner;
FlexibleCommunicationAvoidingGeneralisedMinimalResidual(RealD tol,
Integer maxit,
LinearFunction<Field> &Prec,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.)
, Preconditioner(Prec) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular FGMRES" << std::endl;
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: src " << ssq << std::endl;
PrecTimer.Reset();
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// these should probably be made class members so that they are only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, z, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(z, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
PrecTimer.Start();
Preconditioner(v[iter], z[iter]);
PrecTimer.Stop();
MatrixTimer.Start();
LinOp.Op(z[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + z[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@@ -0,0 +1,254 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when FGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the FGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch PrecTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
LinearFunction<Field> &Preconditioner;
FlexibleGeneralisedMinimalResidual(RealD tol,
Integer maxit,
LinearFunction<Field> &Prec,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.)
, Preconditioner(Prec) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: src " << ssq << std::endl;
PrecTimer.Reset();
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// these should probably be made class members so that they are only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, z, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(z, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
PrecTimer.Start();
Preconditioner(v[iter], z[iter]);
PrecTimer.Stop();
MatrixTimer.Start();
LinOp.Op(z[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + z[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@@ -0,0 +1,242 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/GeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class GeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the GMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
GeneralisedMinimalResidual(RealD tol,
Integer maxit,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "GeneralisedMinimalResidual: src " << ssq << std::endl;
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "GeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// this should probably be made a class member so that it is only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(v, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
MatrixTimer.Start();
LinOp.Op(v[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + v[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@@ -0,0 +1,842 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Chulwoo Jung <chulwoo@bnl.gov>
Author: Christoph Lehner <clehner@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_BIRL_H
#define GRID_BIRL_H
#include <string.h> //memset
//#include <zlib.h>
#include <sys/stat.h>
namespace Grid {
////////////////////////////////////////////////////////
// Move following 100 LOC to lattice/Lattice_basis.h
////////////////////////////////////////////////////////
template<class Field>
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
{
for(int j=0; j<k; ++j){
auto ip = innerProduct(basis[j],w);
w = w - ip*basis[j];
}
}
template<class Field>
void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm)
{
typedef typename Field::vector_object vobj;
GridBase* grid = basis[0]._grid;
parallel_region
{
std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
for(int j=j0; j<j1; ++j) B[j]=0.;
for(int j=j0; j<j1; ++j){
for(int k=k0; k<k1; ++k){
B[j] +=Qt(j,k) * basis[k]._odata[ss];
}
}
for(int j=j0; j<j1; ++j){
basis[j]._odata[ss] = B[j];
}
}
}
}
// Extract a single rotated vector
template<class Field>
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
{
typedef typename Field::vector_object vobj;
GridBase* grid = basis[0]._grid;
result.checkerboard = basis[0].checkerboard;
parallel_for(int ss=0;ss < grid->oSites();ss++){
vobj B = zero;
for(int k=k0; k<k1; ++k){
B +=Qt(j,k) * basis[k]._odata[ss];
}
result._odata[ss] = B;
}
}
template<class Field>
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
{
int vlen = idx.size();
assert(vlen>=1);
assert(vlen<=sort_vals.size());
assert(vlen<=_v.size());
for (size_t i=0;i<vlen;i++) {
if (idx[i] != i) {
//////////////////////////////////////
// idx[i] is a table of desired sources giving a permutation.
// Swap v[i] with v[idx[i]].
// Find j>i for which _vnew[j] = _vold[i],
// track the move idx[j] => idx[i]
// track the move idx[i] => i
//////////////////////////////////////
size_t j;
for (j=i;j<idx.size();j++)
if (idx[j]==i)
break;
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy
std::swap(sort_vals[i],sort_vals[idx[i]]);
idx[j] = idx[i];
idx[i] = i;
}
}
}
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
{
std::vector<int> idx(sort_vals.size());
std::iota(idx.begin(), idx.end(), 0);
// sort indexes based on comparing values in v
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
});
return idx;
}
template<class Field>
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
{
std::vector<int> idx = basisSortGetIndex(sort_vals);
if (reverse)
std::reverse(idx.begin(), idx.end());
basisReorderInPlace(_v,sort_vals,idx);
}
/////////////////////////////////////////////////////////////
// Implicitly restarted lanczos
/////////////////////////////////////////////////////////////
template<class Field> class ImplicitlyRestartedLanczosTester
{
public:
virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
};
enum IRLdiagonalisation {
IRLdiagonaliseWithDSTEGR,
IRLdiagonaliseWithQR,
IRLdiagonaliseWithEigen
};
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
{
public:
LinearFunction<Field> &_HermOp;
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
{
return TestConvergence(j,resid,B,eval,evalMaxApprox);
}
int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox)
{
Field v(B);
RealD eval_poly = eval;
// Apply operator
_HermOp(B,v);
RealD vnum = real(innerProduct(B,v)); // HermOp.
RealD vden = norm2(B);
RealD vv0 = norm2(v);
eval = vnum/vden;
v -= eval*B;
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
int conv=0;
if( (vv<eresid*eresid) ) conv = 1;
return conv;
}
};
template<class Field>
class ImplicitlyRestartedLanczos {
private:
const RealD small = 1.0e-8;
int MaxIter;
int MinRestart; // Minimum number of restarts; only check for convergence after
int Nstop; // Number of evecs checked for convergence
int Nk; // Number of converged sought
// int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk
int Nm; // Nm -- total number of vectors
IRLdiagonalisation diagonalisation;
int orth_period;
RealD OrthoTime;
RealD eresid, betastp;
////////////////////////////////
// Embedded objects
////////////////////////////////
LinearFunction<Field> &_PolyOp;
LinearFunction<Field> &_HermOp;
ImplicitlyRestartedLanczosTester<Field> &_Tester;
// Default tester provided (we need a ref to something in default case)
ImplicitlyRestartedLanczosHermOpTester<Field> SimpleTester;
/////////////////////////
// Constructor
/////////////////////////
public:
//////////////////////////////////////////////////////////////////
// PAB:
//////////////////////////////////////////////////////////////////
// Too many options & knobs.
// Eliminate:
// orth_period
// betastp
// MinRestart
//
// Do we really need orth_period
// What is the theoretical basis & guarantees of betastp ?
// Nstop=Nk viable?
// MinRestart avoidable with new convergence test?
// Could cut to PolyOp, HermOp, Tester, Nk, Nm, resid, maxiter (+diagonalisation)
// HermOp could be eliminated if we dropped the Power method for max eval.
// -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear
//////////////////////////////////////////////////////////////////
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
LinearFunction<Field> & HermOp,
ImplicitlyRestartedLanczosTester<Field> & Tester,
int _Nstop, // sought vecs
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
int _MaxIter, // Max iterations
RealD _betastp=0.0, // if beta(k) < betastp: converged
int _MinRestart=1, int _orth_period = 1,
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester),
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
eresid(_eresid), betastp(_betastp),
MaxIter(_MaxIter) , MinRestart(_MinRestart),
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
LinearFunction<Field> & HermOp,
int _Nstop, // sought vecs
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
int _MaxIter, // Max iterations
RealD _betastp=0.0, // if beta(k) < betastp: converged
int _MinRestart=1, int _orth_period = 1,
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester),
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
eresid(_eresid), betastp(_betastp),
MaxIter(_MaxIter) , MinRestart(_MinRestart),
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
////////////////////////////////
// Helpers
////////////////////////////////
template<typename T> static RealD normalise(T& v)
{
RealD nn = norm2(v);
nn = sqrt(nn);
v = v * (1.0/nn);
return nn;
}
void orthogonalize(Field& w, std::vector<Field>& evec,int k)
{
OrthoTime-=usecond()/1e6;
basisOrthogonalize(evec,w,k);
normalise(w);
OrthoTime+=usecond()/1e6;
}
/* Rudy Arthur's thesis pp.137
------------------------
Require: M > K P = M K †
Compute the factorization AVM = VM HM + fM eM
repeat
Q=I
for i = 1,...,P do
QiRi =HM θiI Q = QQi
H M = Q †i H M Q i
end for
βK =HM(K+1,K) σK =Q(M,K)
r=vK+1βK +rσK
VK =VM(1:M)Q(1:M,1:K)
HK =HM(1:K,1:K)
→AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM
until convergence
*/
void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse=false)
{
GridBase *grid = src._grid;
assert(grid == evec[0]._grid);
GridLogIRL.TimingMode(1);
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl;
std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl;
std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl;
std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl;
std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl;
if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl;
} else if ( diagonalisation == IRLdiagonaliseWithQR ) {
std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl;
} else if ( diagonalisation == IRLdiagonaliseWithEigen ) {
std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl;
}
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
assert(Nm <= evec.size() && Nm <= eval.size());
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
RealD evalMaxApprox = 0.0;
{
auto src_n = src;
auto tmp = src;
const int _MAX_ITER_IRL_MEVAPP_ = 50;
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
normalise(src_n);
_HermOp(src_n,tmp);
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
RealD vden = norm2(src_n);
RealD na = vnum/vden;
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
i=_MAX_ITER_IRL_MEVAPP_;
evalMaxApprox = na;
std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
src_n = tmp;
}
}
std::vector<RealD> lme(Nm);
std::vector<RealD> lme2(Nm);
std::vector<RealD> eval2(Nm);
std::vector<RealD> eval2_copy(Nm);
Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm);
Field f(grid);
Field v(grid);
int k1 = 1;
int k2 = Nk;
RealD beta_k;
Nconv = 0;
// Set initial vector
evec[0] = src;
normalise(evec[0]);
// Initial Nk steps
OrthoTime=0.;
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl;
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
//////////////////////////////////
// Restarting loop begins
//////////////////////////////////
int iter;
for(iter = 0; iter<MaxIter; ++iter){
OrthoTime=0.;
std::cout<< GridLogMessage <<" **********************"<< std::endl;
std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl;
std::cout<< GridLogMessage <<" **********************"<< std::endl;
std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl;
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
f *= lme[Nm-1];
std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl;
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
//////////////////////////////////
// getting eigenvalues
//////////////////////////////////
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k+k1-1];
lme2[k] = lme[k+k1-1];
}
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
std::cout<<GridLogIRL <<" diagonalized "<<std::endl;
//////////////////////////////////
// sorting
//////////////////////////////////
eval2_copy = eval2;
std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>());
std::cout<<GridLogIRL <<" evals sorted "<<std::endl;
const int chunk=8;
for(int io=0; io<k2;io+=chunk){
std::cout<<GridLogIRL << "eval "<< std::setw(3) << io ;
for(int ii=0;ii<chunk;ii++){
if ( (io+ii)<k2 )
std::cout<< " "<< std::setw(12)<< eval2[io+ii];
}
std::cout << std::endl;
}
//////////////////////////////////
// Implicitly shifted QR transformations
//////////////////////////////////
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
for(int ip=k2; ip<Nm; ++ip){
QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
}
std::cout<<GridLogIRL <<"QR decomposed "<<std::endl;
assert(k2<Nm); assert(k2<Nm); assert(k1>0);
basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis
std::cout<<GridLogIRL <<"basisRotated by Qt"<<std::endl;
////////////////////////////////////////////////////
// Compressed vector f and beta(k2)
////////////////////////////////////////////////////
f *= Qt(k2-1,Nm-1);
f += lme[k2-1] * evec[k2];
beta_k = norm2(f);
beta_k = sqrt(beta_k);
std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl;
RealD betar = 1.0/beta_k;
evec[k2] = betar * f;
lme[k2-1] = beta_k;
////////////////////////////////////////////////////
// Convergence test
////////////////////////////////////////////////////
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k];
lme2[k] = lme[k];
}
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
std::cout<<GridLogIRL <<" Diagonalized "<<std::endl;
Nconv = 0;
if (iter >= MinRestart) {
std::cout << GridLogIRL << "Test convergence: rotate subset of vectors to test convergence " << std::endl;
Field B(grid); B.checkerboard = evec[0].checkerboard;
// power of two search pattern; not every evalue in eval2 is assessed.
int allconv =1;
for(int jj = 1; jj<=Nstop; jj*=2){
int j = Nstop-jj;
RealD e = eval2_copy[j]; // Discard the evalue
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
allconv=0;
}
}
// Do evec[0] for good measure
{
int j=0;
RealD e = eval2_copy[0];
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
}
if ( allconv ) Nconv = Nstop;
// test if we converged, if so, terminate
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
// if( Nconv>=Nstop || beta_k < betastp){
if( Nconv>=Nstop){
goto converged;
}
} else {
std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n";
} // end of iter loop
}
std::cout<<GridLogError<<"\n NOT converged.\n";
abort();
converged:
{
Field B(grid); B.checkerboard = evec[0].checkerboard;
basisRotate(evec,Qt,0,Nk,0,Nk,Nm);
std::cout << GridLogIRL << " Rotated basis"<<std::endl;
Nconv=0;
//////////////////////////////////////////////////////////////////////
// Full final convergence test; unconditionally applied
//////////////////////////////////////////////////////////////////////
for(int j = 0; j<=Nk; j++){
B=evec[j];
if( _Tester.ReconstructEval(j,eresid,B,eval2[j],evalMaxApprox) ) {
Nconv++;
}
}
if ( Nconv < Nstop )
std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
eval=eval2;
//Keep only converged
eval.resize(Nconv);// Nstop?
evec.resize(Nconv,grid);// Nstop?
basisSortInPlace(evec,eval,reverse);
}
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n";
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL << " -- Iterations = "<< iter << "\n";
std::cout << GridLogIRL << " -- beta(k) = "<< beta_k << "\n";
std::cout << GridLogIRL << " -- Nconv = "<< Nconv << "\n";
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
}
private:
/* Saad PP. 195
1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
2. For k = 1,2,...,m Do:
3. wk:=Avkβkv_{k1}
4. αk:=(wk,vk) //
5. wk:=wkαkvk // wk orthog vk
6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
7. vk+1 := wk/βk+1
8. EndDo
*/
void step(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
std::vector<Field>& evec,
Field& w,int Nm,int k)
{
const RealD tiny = 1.0e-20;
assert( k< Nm );
GridStopWatch gsw_op,gsw_o;
Field& evec_k = evec[k];
_PolyOp(evec_k,w); std::cout<<GridLogIRL << "PolyOp" <<std::endl;
if(k>0) w -= lme[k-1] * evec[k-1];
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
RealD alph = real(zalph);
w = w - alph * evec_k;// 5. wk:=wkαkvk
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
// 7. vk+1 := wk/βk+1
lmd[k] = alph;
lme[k] = beta;
if (k>0 && k % orth_period == 0) {
orthogonalize(w,evec,k); // orthonormalise
std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
}
if(k < Nm-1) evec[k+1] = w;
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
if ( beta < tiny )
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
}
void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,
int Nk, int Nm,
Eigen::MatrixXd & Qt, // Nm x Nm
GridBase *grid)
{
Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk);
for(int i=0;i<Nk;i++) TriDiag(i,i) = lmd[i];
for(int i=0;i<Nk-1;i++) TriDiag(i,i+1) = lme[i];
for(int i=0;i<Nk-1;i++) TriDiag(i+1,i) = lme[i];
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eigensolver(TriDiag);
for (int i = 0; i < Nk; i++) {
lmd[Nk-1-i] = eigensolver.eigenvalues()(i);
}
for (int i = 0; i < Nk; i++) {
for (int j = 0; j < Nk; j++) {
Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i);
}
}
}
///////////////////////////////////////////////////////////////////////////
// File could end here if settle on Eigen ??? !!!
///////////////////////////////////////////////////////////////////////////
void QR_decomp(std::vector<RealD>& lmd, // Nm
std::vector<RealD>& lme, // Nm
int Nk, int Nm, // Nk, Nm
Eigen::MatrixXd& Qt, // Nm x Nm matrix
RealD Dsh, int kmin, int kmax)
{
int k = kmin-1;
RealD x;
RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]);
RealD c = ( lmd[k] -Dsh) *Fden;
RealD s = -lme[k] *Fden;
RealD tmpa1 = lmd[k];
RealD tmpa2 = lmd[k+1];
RealD tmpb = lme[k];
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
x =-s*lme[k+1];
lme[k+1] = c*lme[k+1];
for(int i=0; i<Nk; ++i){
RealD Qtmp1 = Qt(k,i);
RealD Qtmp2 = Qt(k+1,i);
Qt(k,i) = c*Qtmp1 - s*Qtmp2;
Qt(k+1,i)= s*Qtmp1 + c*Qtmp2;
}
// Givens transformations
for(int k = kmin; k < kmax-1; ++k){
RealD Fden = 1.0/hypot(x,lme[k-1]);
RealD c = lme[k-1]*Fden;
RealD s = - x*Fden;
RealD tmpa1 = lmd[k];
RealD tmpa2 = lmd[k+1];
RealD tmpb = lme[k];
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
lme[k-1] = c*lme[k-1] -s*x;
if(k != kmax-2){
x = -s*lme[k+1];
lme[k+1] = c*lme[k+1];
}
for(int i=0; i<Nk; ++i){
RealD Qtmp1 = Qt(k,i);
RealD Qtmp2 = Qt(k+1,i);
Qt(k,i) = c*Qtmp1 -s*Qtmp2;
Qt(k+1,i) = s*Qtmp1 +c*Qtmp2;
}
}
}
void diagonalize(std::vector<RealD>& lmd, std::vector<RealD>& lme,
int Nk, int Nm,
Eigen::MatrixXd & Qt,
GridBase *grid)
{
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid);
} else if ( diagonalisation == IRLdiagonaliseWithQR ) {
diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid);
} else if ( diagonalisation == IRLdiagonaliseWithEigen ) {
diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid);
} else {
assert(0);
}
}
#ifdef USE_LAPACK
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
double *vl, double *vu, int *il, int *iu, double *abstol,
int *m, double *w, double *z, int *ldz, int *isuppz,
double *work, int *lwork, int *iwork, int *liwork,
int *info);
#endif
void diagonalize_lapack(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
int Nk, int Nm,
Eigen::MatrixXd& Qt,
GridBase *grid)
{
#ifdef USE_LAPACK
const int size = Nm;
int NN = Nk;
double evals_tmp[NN];
double evec_tmp[NN][NN];
memset(evec_tmp[0],0,sizeof(double)*NN*NN);
double DD[NN];
double EE[NN];
for (int i = 0; i< NN; i++) {
for (int j = i - 1; j <= i + 1; j++) {
if ( j < NN && j >= 0 ) {
if (i==j) DD[i] = lmd[i];
if (i==j) evals_tmp[i] = lmd[i];
if (j==(i-1)) EE[j] = lme[j];
}
}
}
int evals_found;
int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
int liwork = 3+NN*10 ;
int iwork[liwork];
double work[lwork];
int isuppz[2*NN];
char jobz = 'V'; // calculate evals & evecs
char range = 'I'; // calculate all evals
// char range = 'A'; // calculate all evals
char uplo = 'U'; // refer to upper half of original matrix
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
int ifail[NN];
int info;
int total = grid->_Nprocessors;
int node = grid->_processor;
int interval = (NN/total)+1;
double vl = 0.0, vu = 0.0;
int il = interval*node+1 , iu = interval*(node+1);
if (iu > NN) iu=NN;
double tol = 0.0;
if (1) {
memset(evals_tmp,0,sizeof(double)*NN);
if ( il <= NN){
LAPACK_dstegr(&jobz, &range, &NN,
(double*)DD, (double*)EE,
&vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
&tol, // tolerance
&evals_found, evals_tmp, (double*)evec_tmp, &NN,
isuppz,
work, &lwork, iwork, &liwork,
&info);
for (int i = iu-1; i>= il-1; i--){
evals_tmp[i] = evals_tmp[i - (il-1)];
if (il>1) evals_tmp[i-(il-1)]=0.;
for (int j = 0; j< NN; j++){
evec_tmp[i][j] = evec_tmp[i - (il-1)][j];
if (il>1) evec_tmp[i-(il-1)][j]=0.;
}
}
}
{
grid->GlobalSumVector(evals_tmp,NN);
grid->GlobalSumVector((double*)evec_tmp,NN*NN);
}
}
// Safer to sort instead of just reversing it,
// but the document of the routine says evals are sorted in increasing order.
// qr gives evals in decreasing order.
for(int i=0;i<NN;i++){
lmd [NN-1-i]=evals_tmp[i];
for(int j=0;j<NN;j++){
Qt((NN-1-i),j)=evec_tmp[i][j];
}
}
#else
assert(0);
#endif
}
void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,
int Nk, int Nm,
Eigen::MatrixXd & Qt,
GridBase *grid)
{
int QRiter = 100*Nm;
int kmin = 1;
int kmax = Nk;
// (this should be more sophisticated)
for(int iter=0; iter<QRiter; ++iter){
// determination of 2x2 leading submatrix
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
// (Dsh: shift)
// transformation
QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm
// Convergence criterion (redef of kmin and kamx)
for(int j=kmax-1; j>= kmin; --j){
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
if(fabs(lme[j-1])+dds > dds){
kmax = j+1;
goto continued;
}
}
QRiter = iter;
return;
continued:
for(int j=0; j<kmax-1; ++j){
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
if(fabs(lme[j])+dds > dds){
kmin = j+1;
break;
}
}
}
std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n";
abort();
}
};
}
#endif

View File

@@ -0,0 +1,406 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h
Copyright (C) 2015
Author: Christoph Lehner <clehner@bnl.gov>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_LOCAL_COHERENCE_IRL_H
#define GRID_LOCAL_COHERENCE_IRL_H
namespace Grid {
struct LanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
ChebyParams, Cheby,/*Chebyshev*/
int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/
int, Nk, /*Vecs in Lanczos seek converge*/
int, Nm, /*Total vecs in Lanczos include restart*/
RealD, resid, /*residual*/
int, MaxIt,
RealD, betastp, /* ? */
int, MinRes); // Must restart
};
struct LocalCoherenceLanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
bool, saveEvecs,
bool, doFine,
bool, doFineRead,
bool, doCoarse,
bool, doCoarseRead,
LanczosParams, FineParams,
LanczosParams, CoarseParams,
ChebyParams, Smoother,
RealD , coarse_relax_tol,
std::vector<int>, blockSize,
std::string, config,
std::vector < std::complex<double> >, omega,
RealD, mass,
RealD, M5);
};
// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function
template<class Fobj,class CComplex,int nbasis>
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
LinearOperatorBase<FineField> &_Linop;
std::vector<FineField> &subspace;
ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) :
_Linop(linop), subspace(_subspace)
{
assert(subspace.size() >0);
};
void operator()(const CoarseField& in, CoarseField& out) {
GridBase *FineGrid = subspace[0]._grid;
int checkerboard = subspace[0].checkerboard;
FineField fin (FineGrid); fin.checkerboard= checkerboard;
FineField fout(FineGrid); fout.checkerboard = checkerboard;
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
}
};
template<class Fobj,class CComplex,int nbasis>
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
OperatorFunction<FineField> & _poly;
LinearOperatorBase<FineField> &_Linop;
std::vector<FineField> &subspace;
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
LinearOperatorBase<FineField>& linop,
std::vector<FineField> & _subspace) :
_poly(poly),
_Linop(linop),
subspace(_subspace)
{ };
void operator()(const CoarseField& in, CoarseField& out) {
GridBase *FineGrid = subspace[0]._grid;
int checkerboard = subspace[0].checkerboard;
FineField fin (FineGrid); fin.checkerboard =checkerboard;
FineField fout(FineGrid);fout.checkerboard =checkerboard;
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
}
};
template<class Fobj,class CComplex,int nbasis>
class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > >
{
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
LinearFunction<CoarseField> & _Poly;
OperatorFunction<FineField> & _smoother;
LinearOperatorBase<FineField> &_Linop;
RealD _coarse_relax_tol;
std::vector<FineField> &_subspace;
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
OperatorFunction<FineField> &smoother,
LinearOperatorBase<FineField> &Linop,
std::vector<FineField> &subspace,
RealD coarse_relax_tol=5.0e3)
: _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
_coarse_relax_tol(coarse_relax_tol)
{ };
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
CoarseField v(B);
RealD eval_poly = eval;
// Apply operator
_Poly(B,v);
RealD vnum = real(innerProduct(B,v)); // HermOp.
RealD vden = norm2(B);
RealD vv0 = norm2(v);
eval = vnum/vden;
v -= eval*B;
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
int conv=0;
if( (vv<eresid*eresid) ) conv = 1;
return conv;
}
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
GridBase *FineGrid = _subspace[0]._grid;
int checkerboard = _subspace[0].checkerboard;
FineField fB(FineGrid);fB.checkerboard =checkerboard;
FineField fv(FineGrid);fv.checkerboard =checkerboard;
blockPromote(B,fv,_subspace);
_smoother(_Linop,fv,fB);
RealD eval_poly = eval;
_Linop.HermOp(fB,fv);
RealD vnum = real(innerProduct(fB,fv)); // HermOp.
RealD vden = norm2(fB);
RealD vv0 = norm2(fv);
eval = vnum/vden;
fv -= eval*fB;
RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
if( (vv<eresid*eresid) ) return 1;
return 0;
}
};
////////////////////////////////////////////
// Make serializable Lanczos params
////////////////////////////////////////////
template<class Fobj,class CComplex,int nbasis>
class LocalCoherenceLanczos
{
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<Fobj> FineField;
protected:
GridBase *_CoarseGrid;
GridBase *_FineGrid;
int _checkerboard;
LinearOperatorBase<FineField> & _FineOp;
std::vector<RealD> &evals_fine;
std::vector<RealD> &evals_coarse;
std::vector<FineField> &subspace;
std::vector<CoarseField> &evec_coarse;
private:
std::vector<RealD> _evals_fine;
std::vector<RealD> _evals_coarse;
std::vector<FineField> _subspace;
std::vector<CoarseField> _evec_coarse;
public:
LocalCoherenceLanczos(GridBase *FineGrid,
GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard) :
_CoarseGrid(CoarseGrid),
_FineGrid(FineGrid),
_FineOp(FineOp),
_checkerboard(checkerboard),
evals_fine (_evals_fine),
evals_coarse(_evals_coarse),
subspace (_subspace),
evec_coarse(_evec_coarse)
{
evals_fine.resize(0);
evals_coarse.resize(0);
};
//////////////////////////////////////////////////////////////////////////
// Alternate constructore, external storage for use by Hadrons module
//////////////////////////////////////////////////////////////////////////
LocalCoherenceLanczos(GridBase *FineGrid,
GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard,
std::vector<FineField> &ext_subspace,
std::vector<CoarseField> &ext_coarse,
std::vector<RealD> &ext_eval_fine,
std::vector<RealD> &ext_eval_coarse
) :
_CoarseGrid(CoarseGrid),
_FineGrid(FineGrid),
_FineOp(FineOp),
_checkerboard(checkerboard),
evals_fine (ext_eval_fine),
evals_coarse(ext_eval_coarse),
subspace (ext_subspace),
evec_coarse (ext_coarse)
{
evals_fine.resize(0);
evals_coarse.resize(0);
};
void Orthogonalise(void ) {
CoarseScalar InnerProd(_CoarseGrid);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
};
template<typename T> static RealD normalise(T& v)
{
RealD nn = norm2(v);
nn = ::sqrt(nn);
v = v * (1.0/nn);
return nn;
}
/*
void fakeFine(void)
{
int Nk = nbasis;
subspace.resize(Nk,_FineGrid);
subspace[0]=1.0;
subspace[0].checkerboard=_checkerboard;
normalise(subspace[0]);
PlainHermOp<FineField> Op(_FineOp);
for(int k=1;k<Nk;k++){
subspace[k].checkerboard=_checkerboard;
Op(subspace[k-1],subspace[k]);
normalise(subspace[k]);
}
}
*/
void testFine(RealD resid)
{
assert(evals_fine.size() == nbasis);
assert(subspace.size() == nbasis);
PlainHermOp<FineField> Op(_FineOp);
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
for(int k=0;k<nbasis;k++){
assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
}
}
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
{
assert(evals_fine.size() == nbasis);
assert(subspace.size() == nbasis);
//////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
for(int k=0;k<evec_coarse.size();k++){
if ( k < nbasis ) {
assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1);
} else {
assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1);
}
}
}
void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid,
RealD MaxIt, RealD betastp, int MinRes)
{
assert(nbasis<=Nm);
Chebyshev<FineField> Cheby(cheby_parms);
FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp);
PlainHermOp<FineField> Op(_FineOp);
evals_fine.resize(Nm);
subspace.resize(Nm,_FineGrid);
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
int Nconv;
IRL.calc(evals_fine,subspace,src,Nconv,false);
// Shrink down to number saved
assert(Nstop>=nbasis);
assert(Nconv>=nbasis);
evals_fine.resize(nbasis);
subspace.resize(nbasis,_FineGrid);
}
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
int Nstop, int Nk, int Nm,RealD resid,
RealD MaxIt, RealD betastp, int MinRes)
{
Chebyshev<FineField> Cheby(cheby_op);
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
//////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
evals_coarse.resize(Nm);
evec_coarse.resize(Nm,_CoarseGrid);
CoarseField src(_CoarseGrid); src=1.0;
ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
int Nconv=0;
IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
assert(Nconv>=Nstop);
evals_coarse.resize(Nstop);
evec_coarse.resize (Nstop,_CoarseGrid);
for (int i=0;i<Nstop;i++){
std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl;
}
}
};
}
#endif

View File

@@ -0,0 +1,156 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/MinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MINIMAL_RESIDUAL_H
#define GRID_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field> class MinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // throw an assert when the MR fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
RealD overRelaxParam;
Integer IterationsToComplete; // Number of iterations the MR took to finish.
// Filled in upon completion
MinimalResidual(RealD tol, Integer maxit, Real ovrelparam = 1.0, bool err_on_no_conv = true)
: Tolerance(tol), MaxIterations(maxit), overRelaxParam(ovrelparam), ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
Complex a, c;
Real d;
Field Mr(src);
Field r(src);
// Initial residual computation & set up
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Linop.Op(psi, Mr);
r = src - Mr;
RealD cp = norm2(r);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "MinimalResidual: src " << ssq << std::endl;
std::cout << GridLogIterative << "MinimalResidual: mp " << d << std::endl;
std::cout << GridLogIterative << "MinimalResidual: cp,r " << cp << std::endl;
if (cp <= rsq) {
return;
}
std::cout << GridLogIterative << "MinimalResidual: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
MatrixTimer.Start();
Linop.Op(r, Mr);
MatrixTimer.Stop();
LinalgTimer.Start();
c = innerProduct(Mr, r);
d = norm2(Mr);
a = c / d;
a = a * overRelaxParam;
psi = psi + r * a;
r = r - Mr * a;
cp = norm2(r);
LinalgTimer.Stop();
std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
std::cout << GridLogDebug << "a = " << a << " c = " << c << " d = " << d << std::endl;
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
Linop.Op(psi, Mr);
r = src - Mr;
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "MinimalResidual Converged on iteration " << k
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "MR Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MR Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MR Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
if (ErrorOnNoConverge)
assert(true_residual / Tolerance < 10000.0);
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "MinimalResidual did NOT converge"
<< std::endl;
if (ErrorOnNoConverge)
assert(0);
IterationsToComplete = k;
}
};
} // namespace Grid
#endif

View File

@@ -0,0 +1,273 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class FieldD, class FieldF, typename std::enable_if<getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction<FieldD> {
public:
bool ErrorOnNoConverge; // Throw an assert when MPFGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the MPFGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch PrecTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
GridStopWatch ChangePrecTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
GridBase* SinglePrecGrid;
LinearFunction<FieldF> &Preconditioner;
MixedPrecisionFlexibleGeneralisedMinimalResidual(RealD tol,
Integer maxit,
GridBase * sp_grid,
LinearFunction<FieldF> &Prec,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.)
, SinglePrecGrid(sp_grid)
, Preconditioner(Prec) {};
void operator()(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
FieldD r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "MPFGMRES: guess " << guess << std::endl;
std::cout << GridLogIterative << "MPFGMRES: src " << ssq << std::endl;
PrecTimer.Reset();
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
ChangePrecTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "MPFGMRES: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: PrecChange " << ChangePrecTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "MPFGMRES did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi, RealD rsq) {
RealD cp = 0;
FieldD w(src._grid);
FieldD r(src._grid);
// these should probably be made class members so that they are only allocated once, not in every restart
std::vector<FieldD> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
std::vector<FieldD> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, z, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "MPFGMRES: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(z, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<FieldD> &LinOp, std::vector<FieldD> &v, std::vector<FieldD> &z, FieldD &w, int iter) {
FieldF v_f(SinglePrecGrid);
FieldF z_f(SinglePrecGrid);
ChangePrecTimer.Start();
precisionChange(v_f, v[iter]);
precisionChange(z_f, z[iter]);
ChangePrecTimer.Stop();
PrecTimer.Start();
Preconditioner(v_f, z_f);
PrecTimer.Stop();
ChangePrecTimer.Start();
precisionChange(z[iter], z_f);
ChangePrecTimer.Stop();
MatrixTimer.Start();
LinOp.Op(z[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<FieldD> const &z, FieldD &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + z[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@@ -139,7 +139,10 @@ namespace Grid {
MatTimer.Start();
Linop.HermOpAndNorm(psi,Az,zAz,zAAz);
MatTimer.Stop();
LinalgTimer.Start();
r=src-Az;
LinalgTimer.Stop();
/////////////////////
// p = Prec(r)
@@ -152,8 +155,10 @@ namespace Grid {
Linop.HermOp(z,tmp);
MatTimer.Stop();
LinalgTimer.Start();
ttmp=tmp;
tmp=tmp-r;
LinalgTimer.Stop();
/*
std::cout<<GridLogMessage<<r<<std::endl;
@@ -166,12 +171,14 @@ namespace Grid {
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
MatTimer.Stop();
LinalgTimer.Start();
//p[0],q[0],qq[0]
p[0]= z;
q[0]= Az;
qq[0]= zAAz;
cp =norm2(r);
LinalgTimer.Stop();
for(int k=0;k<nstep;k++){
@@ -181,12 +188,14 @@ namespace Grid {
int peri_k = k %mmax;
int peri_kp= kp%mmax;
LinalgTimer.Start();
rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
a = rq/qq[peri_k];
axpy(psi,a,p[peri_k],psi);
cp = axpy_norm(r,-a,q[peri_k],r);
LinalgTimer.Stop();
if((k==nstep-1)||(cp<rsq)){
return cp;
@@ -202,6 +211,8 @@ namespace Grid {
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
Linop.HermOp(z,tmp);
MatTimer.Stop();
LinalgTimer.Start();
tmp=tmp-r;
std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl;
@@ -219,9 +230,9 @@ namespace Grid {
}
qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
LinalgTimer.Stop();
}
assert(0); // never reached
return cp;
}

View File

@@ -0,0 +1,473 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_SCHUR_RED_BLACK_H
#define GRID_SCHUR_RED_BLACK_H
/*
* Red black Schur decomposition
*
* M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo)
* (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 )
* = L D U
*
* L^-1 = (1 0 )
* (-MoeMee^{-1} 1 )
* L^{dag} = ( 1 Mee^{-dag} Moe^{dag} )
* ( 0 1 )
* L^{-d} = ( 1 -Mee^{-dag} Moe^{dag} )
* ( 0 1 )
*
* U^-1 = (1 -Mee^{-1} Meo)
* (0 1 )
* U^{dag} = ( 1 0)
* (Meo^dag Mee^{-dag} 1)
* U^{-dag} = ( 1 0)
* (-Meo^dag Mee^{-dag} 1)
***********************
* M psi = eta
***********************
*Odd
* i) D_oo psi_o = L^{-1} eta_o
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
*
* Wilson:
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
* Stag:
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
*
* L^-1 eta_o= (1 0 ) (e
* (-MoeMee^{-1} 1 )
*
*Even
* ii) Mee psi_e + Meo psi_o = src_e
*
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
*
*
* TODO: Other options:
*
* a) change checkerboards for Schur e<->o
*
* Left precon by Moo^-1
* b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o
* eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
*
* Right precon by Moo^-1
* c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o
* eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
* psi_o = M_oo^-1 phi_o
* TODO: Deflation
*/
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Use base class to share code
///////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackBase {
protected:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
public:
SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise = 0;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
/////////////////////////////////////////////////////////////
// Shared code
/////////////////////////////////////////////////////////////
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out)
{
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Guesser>
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
int nblock = in.size();
std::vector<Field> src_o(nblock,grid);
std::vector<Field> sol_o(nblock,grid);
std::vector<Field> guess_save;
Field resid(fgrid);
Field tmp(grid);
////////////////////////////////////////////////
// Prepare RedBlack source
////////////////////////////////////////////////
for(int b=0;b<nblock;b++){
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
}
////////////////////////////////////////////////
// Make the guesses
////////////////////////////////////////////////
if ( subGuess ) guess_save.resize(nblock,grid);
for(int b=0;b<nblock;b++){
guess(src_o[b],sol_o[b]);
if ( subGuess ) {
guess_save[b] = sol_o[b];
}
}
//////////////////////////////////////////////////////////////
// Call the block solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
RedBlackSolve(_Matrix,src_o,sol_o);
////////////////////////////////////////////////
// A2A boolean behavioural control & reconstruct other checkerboard
////////////////////////////////////////////////
for(int b=0;b<nblock;b++) {
if (subGuess) sol_o[b] = sol_o[b] - guess_save[b];
///////// Needs even source //////////////
pickCheckerboard(Even,tmp,in[b]);
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
/////////////////////////////////////////////////
// Check unprec residual if possible
/////////////////////////////////////////////////
if ( ! subGuess ) {
_Matrix.M(out[b],resid);
resid = resid-in[b];
RealD ns = norm2(in[b]);
RealD nr = norm2(resid);
std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
} else {
std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
}
}
}
template<class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field resid(fgrid);
Field src_o(grid);
Field src_e(grid);
Field sol_o(grid);
////////////////////////////////////////////////
// RedBlack source
////////////////////////////////////////////////
RedBlackSource(_Matrix,in,src_e,src_o);
////////////////////////////////
// Construct the guess
////////////////////////////////
Field tmp(grid);
guess(src_o,sol_o);
Field guess_save(grid);
guess_save = sol_o;
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
RedBlackSolve(_Matrix,src_o,sol_o);
////////////////////////////////////////////////
// Fionn A2A boolean behavioural control
////////////////////////////////////////////////
if (subGuess) sol_o= sol_o-guess_save;
///////////////////////////////////////////////////
// RedBlack solution needs the even source
///////////////////////////////////////////////////
RedBlackSolution(_Matrix,sol_o,src_e,out);
// Verify the unprec residual
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
} else {
std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
}
}
/////////////////////////////////////////////////////////////
// Override in derived. Not virtual as template methods
/////////////////////////////////////////////////////////////
virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0;
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0;
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) =0;
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)=0;
};
template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess)
{
}
//////////////////////////////////////////////////////
// Override RedBlack specialisation
//////////////////////////////////////////////////////
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field Mtmp(grid);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,src);
/////////////////////////////////////////////////////
// src_o = (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
}
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field sol_e(grid);
Field src_e(grid);
src_e = src_e_c; // Const correctness
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
}
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
}
};
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Site diagonal has Mooee on it.
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess) {};
//////////////////////////////////////////////////////
// Override RedBlack specialisation
//////////////////////////////////////////////////////
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field Mtmp(grid);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,src);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
}
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field sol_e(grid);
Field src_e_i(grid);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e_i = src_e-tmp; assert( src_e_i.checkerboard ==Even);
_Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
}
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Site diagonal is identity, right preconditioned by Mee^inv
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
//=> psi = MeeInv phi
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
: SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess) {};
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
Field tmp(grid);
Field Mtmp(grid);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,src);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
}
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field sol_o_i(grid);
Field tmp(grid);
Field sol_e(grid);
////////////////////////////////////////////////
// MooeeInv due to pecond
////////////////////////////////////////////////
_Matrix.MooeeInv(sol_o,tmp);
sol_o_i = tmp;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o_i,tmp); assert( tmp.checkerboard ==Even);
tmp = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(tmp,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_o_i); assert( sol_o_i.checkerboard ==Odd );
};
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
}
};
}
#endif

View File

@@ -0,0 +1,125 @@
#include <Grid/GridCore.h>
#include <fcntl.h>
namespace Grid {
MemoryStats *MemoryProfiler::stats = nullptr;
bool MemoryProfiler::debug = false;
int PointerCache::victim;
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
void *PointerCache::Insert(void *ptr,size_t bytes) {
if (bytes < 4096 ) return ptr;
#ifdef GRID_OMP
assert(omp_in_parallel()==0);
#endif
void * ret = NULL;
int v = -1;
for(int e=0;e<Ncache;e++) {
if ( Entries[e].valid==0 ) {
v=e;
break;
}
}
if ( v==-1 ) {
v=victim;
victim = (victim+1)%Ncache;
}
if ( Entries[v].valid ) {
ret = Entries[v].address;
Entries[v].valid = 0;
Entries[v].address = NULL;
Entries[v].bytes = 0;
}
Entries[v].address=ptr;
Entries[v].bytes =bytes;
Entries[v].valid =1;
return ret;
}
void *PointerCache::Lookup(size_t bytes) {
if (bytes < 4096 ) return NULL;
#ifdef _OPENMP
assert(omp_in_parallel()==0);
#endif
for(int e=0;e<Ncache;e++){
if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
Entries[e].valid = 0;
return Entries[e].address;
}
}
return NULL;
}
void check_huge_pages(void *Buf,uint64_t BYTES)
{
#ifdef __linux__
int fd = open("/proc/self/pagemap", O_RDONLY);
assert(fd >= 0);
const int page_size = 4096;
uint64_t virt_pfn = (uint64_t)Buf / page_size;
off_t offset = sizeof(uint64_t) * virt_pfn;
uint64_t npages = (BYTES + page_size-1) / page_size;
uint64_t pagedata[npages];
uint64_t ret = lseek(fd, offset, SEEK_SET);
assert(ret == offset);
ret = ::read(fd, pagedata, sizeof(uint64_t)*npages);
assert(ret == sizeof(uint64_t) * npages);
int nhugepages = npages / 512;
int n4ktotal, nnothuge;
n4ktotal = 0;
nnothuge = 0;
for (int i = 0; i < nhugepages; ++i) {
uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size;
for (int j = 0; j < 512; ++j) {
uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size;
++n4ktotal;
if (pageaddr != baseaddr + j * page_size)
++nnothuge;
}
}
int rank = CartesianCommunicator::RankWorld();
printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge);
#endif
}
std::string sizeString(const size_t bytes)
{
constexpr unsigned int bufSize = 256;
const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"};
char buf[256];
size_t s = 0;
double count = bytes;
while (count >= 1024 && s < 7)
{
s++;
count /= 1024;
}
if (count - floor(count) == 0.0)
{
snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]);
}
else
{
snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]);
}
return std::string(buf);
}
}

View File

@@ -64,6 +64,66 @@ namespace Grid {
};
std::string sizeString(size_t bytes);
struct MemoryStats
{
size_t totalAllocated{0}, maxAllocated{0},
currentlyAllocated{0}, totalFreed{0};
};
class MemoryProfiler
{
public:
static MemoryStats *stats;
static bool debug;
};
#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")"
#define profilerDebugPrint \
if (MemoryProfiler::stats)\
{\
auto s = MemoryProfiler::stats;\
std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl;\
std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \
<< std::endl;\
std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \
<< std::endl;\
std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \
<< std::endl;\
std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \
<< std::endl;\
}
#define profilerAllocate(bytes)\
if (MemoryProfiler::stats)\
{\
auto s = MemoryProfiler::stats;\
s->totalAllocated += (bytes);\
s->currentlyAllocated += (bytes);\
s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated);\
}\
if (MemoryProfiler::debug)\
{\
std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl;\
profilerDebugPrint;\
}
#define profilerFree(bytes)\
if (MemoryProfiler::stats)\
{\
auto s = MemoryProfiler::stats;\
s->totalFreed += (bytes);\
s->currentlyAllocated -= (bytes);\
}\
if (MemoryProfiler::debug)\
{\
std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl;\
profilerDebugPrint;\
}
void check_huge_pages(void *Buf,uint64_t BYTES);
////////////////////////////////////////////////////////////////////
// A lattice of something, but assume the something is SIMDized.
////////////////////////////////////////////////////////////////////
@@ -90,20 +150,39 @@ public:
pointer allocate(size_type __n, const void* _p= 0)
{
size_type bytes = __n*sizeof(_Tp);
profilerAllocate(bytes);
_Tp *ptr = (_Tp *) PointerCache::Lookup(bytes);
// if ( ptr != NULL )
// std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl;
//////////////////
// Hack 2MB align; could make option probably doesn't need configurability
//////////////////
//define GRID_ALLOC_ALIGN (128)
#define GRID_ALLOC_ALIGN (2*1024*1024)
#ifdef HAVE_MM_MALLOC_H
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128);
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN);
#else
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes);
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes);
#endif
// std::cout << "alignedAllocator " << std::hex << ptr <<std::dec <<std::endl;
// First touch optimise in threaded loop
uint8_t *cp = (uint8_t *)ptr;
#ifdef GRID_OMP
#pragma omp parallel for
#endif
for(size_type n=0;n<bytes;n+=4096){
cp[n]=0;
}
return ptr;
}
void deallocate(pointer __p, size_type __n) {
size_type bytes = __n * sizeof(_Tp);
profilerFree(bytes);
pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes);
#ifdef HAVE_MM_MALLOC_H
@@ -154,10 +233,13 @@ public:
#ifdef GRID_COMMS_SHMEM
pointer allocate(size_type __n, const void* _p= 0)
{
size_type bytes = __n*sizeof(_Tp);
profilerAllocate(bytes);
#ifdef CRAY
_Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
_Tp *ptr = (_Tp *) shmem_align(bytes,64);
#else
_Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp));
_Tp *ptr = (_Tp *) shmem_align(64,bytes);
#endif
#ifdef PARANOID_SYMMETRIC_HEAP
static void * bcast;
@@ -175,20 +257,39 @@ public:
#endif
return ptr;
}
void deallocate(pointer __p, size_type) {
void deallocate(pointer __p, size_type __n) {
size_type bytes = __n*sizeof(_Tp);
profilerFree(bytes);
shmem_free((void *)__p);
}
#else
pointer allocate(size_type __n, const void* _p= 0)
{
size_type bytes = __n*sizeof(_Tp);
profilerAllocate(bytes);
#ifdef HAVE_MM_MALLOC_H
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
_Tp * ptr = (_Tp *) _mm_malloc(bytes, GRID_ALLOC_ALIGN);
#else
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
_Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN, bytes);
#endif
uint8_t *cp = (uint8_t *)ptr;
if ( ptr ) {
// One touch per 4k page, static OMP loop to catch same loop order
#ifdef GRID_OMP
#pragma omp parallel for schedule(static)
#endif
for(size_type n=0;n<bytes;n+=4096){
cp[n]=0;
}
}
return ptr;
}
void deallocate(pointer __p, size_type) {
void deallocate(pointer __p, size_type __n) {
size_type bytes = __n*sizeof(_Tp);
profilerFree(bytes);
#ifdef HAVE_MM_MALLOC_H
_mm_free((void *)__p);
#else

View File

@@ -8,6 +8,7 @@
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -43,16 +44,25 @@ namespace Grid{
class GridBase : public CartesianCommunicator , public GridThread {
public:
int dummy;
// Give Lattice access
template<class object> friend class Lattice;
GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
GridBase(const std::vector<int> & processor_grid,
const CartesianCommunicator &parent,
int &split_rank)
: CartesianCommunicator(processor_grid,parent,split_rank) {};
GridBase(const std::vector<int> & processor_grid,
const CartesianCommunicator &parent)
: CartesianCommunicator(processor_grid,parent,dummy) {};
virtual ~GridBase() = default;
// Physics Grid information.
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
std::vector<int> _fdimensions;// Global dimensions of array prior to cb removal
std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal
std::vector<int> _gdimensions;// Global dimensions of array after cb removal
std::vector<int> _ldimensions;// local dimensions of array with processor images removed
std::vector<int> _rdimensions;// Reduced local dimensions with simd lane images and processor images removed
@@ -66,9 +76,10 @@ public:
std::vector<int> _slice_stride;
std::vector<int> _slice_nblock;
// Might need these at some point
// std::vector<int> _lstart; // local start of array in gcoors. _processor_coor[d]*_ldimensions[d]
// std::vector<int> _lend; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
std::vector<int> _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d]
std::vector<int> _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
bool _isCheckerBoarded;
public:
@@ -121,6 +132,11 @@ public:
Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
}
inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) {
lcoor.resize(_ndimension);
for (int d = 0; d < _ndimension; d++)
lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d];
}
//////////////////////////////////////////////////////////
// SIMD lane addressing
@@ -129,6 +145,7 @@ public:
{
Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
}
inline int PermuteDim(int dimension){
return _simd_layout[dimension]>1;
}
@@ -169,18 +186,40 @@ public:
inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
inline int Nd (void) const { return _ndimension;};
inline const std::vector<int> LocalStarts(void) { return _lstart; };
inline const std::vector<int> &FullDimensions(void) { return _fdimensions;};
inline const std::vector<int> &GlobalDimensions(void) { return _gdimensions;};
inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;};
inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;};
////////////////////////////////////////////////////////////////
// Utility to print the full decomposition details
////////////////////////////////////////////////////////////////
void show_decomposition(){
std::cout << GridLogMessage << "\tFull Dimensions : " << _fdimensions << std::endl;
std::cout << GridLogMessage << "\tSIMD layout : " << _simd_layout << std::endl;
std::cout << GridLogMessage << "\tGlobal Dimensions : " << _gdimensions << std::endl;
std::cout << GridLogMessage << "\tLocal Dimensions : " << _ldimensions << std::endl;
std::cout << GridLogMessage << "\tReduced Dimensions : " << _rdimensions << std::endl;
std::cout << GridLogMessage << "\tOuter strides : " << _ostride << std::endl;
std::cout << GridLogMessage << "\tInner strides : " << _istride << std::endl;
std::cout << GridLogMessage << "\tiSites : " << _isites << std::endl;
std::cout << GridLogMessage << "\toSites : " << _osites << std::endl;
std::cout << GridLogMessage << "\tlSites : " << lSites() << std::endl;
std::cout << GridLogMessage << "\tgSites : " << gSites() << std::endl;
std::cout << GridLogMessage << "\tNd : " << _ndimension << std::endl;
}
////////////////////////////////////////////////////////////////
// Global addressing
////////////////////////////////////////////////////////////////
void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){
assert(gidx< gSites());
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
}
void LocalIndexToLocalCoor(int lidx,std::vector<int> &lcoor){
assert(lidx<lSites());
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
}
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
@@ -207,16 +246,16 @@ public:
std::vector<int> lcoor;
GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor);
rank = RankFromProcessorCoor(pcoor);
/*
std::vector<int> cblcoor(lcoor);
for(int d=0;d<cblcoor.size();d++){
if( this->CheckerBoarded(d) ) {
cblcoor[d] = lcoor[d]/2;
}
}
i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
o_idx= oIndex(lcoor); // this implies divide by 2 on checkerdim
*/
i_idx= iIndex(lcoor);
o_idx= oIndex(lcoor);
}
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)

View File

@@ -0,0 +1,174 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/cartesian/Cartesian_full.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_CARTESIAN_FULL_H
#define GRID_CARTESIAN_FULL_H
namespace Grid{
/////////////////////////////////////////////////////////////////////////////////////////
// Grid Support.
/////////////////////////////////////////////////////////////////////////////////////////
class GridCartesian: public GridBase {
public:
int dummy;
virtual int CheckerBoardFromOindexTable (int Oindex) {
return 0;
}
virtual int CheckerBoardFromOindex (int Oindex)
{
return 0;
}
virtual int CheckerBoarded(int dim){
return 0;
}
virtual int CheckerBoard(const std::vector<int> &site){
return 0;
}
virtual int CheckerBoardDestination(int cb,int shift,int dim){
return 0;
}
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift, int ocb){
return shift;
}
virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){
return shift;
}
/////////////////////////////////////////////////////////////////////////
// Constructor takes a parent grid and possibly subdivides communicator.
/////////////////////////////////////////////////////////////////////////
GridCartesian(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,
const GridCartesian &parent) : GridBase(processor_grid,parent,dummy)
{
Init(dimensions,simd_layout,processor_grid);
}
GridCartesian(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,
const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank)
{
Init(dimensions,simd_layout,processor_grid);
}
/////////////////////////////////////////////////////////////////////////
// Construct from comm world
/////////////////////////////////////////////////////////////////////////
GridCartesian(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid) : GridBase(processor_grid)
{
Init(dimensions,simd_layout,processor_grid);
}
virtual ~GridCartesian() = default;
void Init(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid)
{
///////////////////////
// Grid information
///////////////////////
_isCheckerBoarded = false;
_ndimension = dimensions.size();
_fdimensions.resize(_ndimension);
_gdimensions.resize(_ndimension);
_ldimensions.resize(_ndimension);
_rdimensions.resize(_ndimension);
_simd_layout.resize(_ndimension);
_lstart.resize(_ndimension);
_lend.resize(_ndimension);
_ostride.resize(_ndimension);
_istride.resize(_ndimension);
_fsites = _gsites = _osites = _isites = 1;
for (int d = 0; d < _ndimension; d++)
{
_fdimensions[d] = dimensions[d]; // Global dimensions
_gdimensions[d] = _fdimensions[d]; // Global dimensions
_simd_layout[d] = simd_layout[d];
_fsites = _fsites * _fdimensions[d];
_gsites = _gsites * _gdimensions[d];
// Use a reduced simd grid
_ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions
//std::cout << _ldimensions[d] << " " << _gdimensions[d] << " " << _processors[d] << std::endl;
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition
assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]);
_lstart[d] = _processor_coor[d] * _ldimensions[d];
_lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1;
_osites *= _rdimensions[d];
_isites *= _simd_layout[d];
// Addressing support
if (d == 0)
{
_ostride[d] = 1;
_istride[d] = 1;
}
else
{
_ostride[d] = _ostride[d - 1] * _rdimensions[d - 1];
_istride[d] = _istride[d - 1] * _simd_layout[d - 1];
}
}
///////////////////////
// subplane information
///////////////////////
_slice_block.resize(_ndimension);
_slice_stride.resize(_ndimension);
_slice_nblock.resize(_ndimension);
int block = 1;
int nblock = 1;
for (int d = 0; d < _ndimension; d++)
nblock *= _rdimensions[d];
for (int d = 0; d < _ndimension; d++)
{
nblock /= _rdimensions[d];
_slice_block[d] = block;
_slice_stride[d] = _ostride[d] * _rdimensions[d];
_slice_nblock[d] = nblock;
block = block * _rdimensions[d];
}
};
};
}
#endif

View File

@@ -112,33 +112,67 @@ public:
}
};
GridRedBlackCartesian(const GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors) {};
////////////////////////////////////////////////////////////
// Create Redblack from original grid; require full grid pointer ?
////////////////////////////////////////////////////////////
GridRedBlackCartesian(const GridBase *base) : GridBase(base->_processors,*base)
{
int dims = base->_ndimension;
std::vector<int> checker_dim_mask(dims,1);
int checker_dim = 0;
Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim);
};
GridRedBlackCartesian(const std::vector<int> &dimensions,
////////////////////////////////////////////////////////////
// Create redblack from original grid, with non-trivial checker dim mask
////////////////////////////////////////////////////////////
GridRedBlackCartesian(const GridBase *base,
const std::vector<int> &checker_dim_mask,
int checker_dim
) : GridBase(base->_processors,*base)
{
Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim) ;
}
virtual ~GridRedBlackCartesian() = default;
#if 0
////////////////////////////////////////////////////////////
// Create redblack grid ;; deprecate these. Should not
// need direct creation of redblack without a full grid to base on
////////////////////////////////////////////////////////////
GridRedBlackCartesian(const GridBase *base,
const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,
const std::vector<int> &checker_dim_mask,
int checker_dim
) : GridBase(processor_grid)
) : GridBase(processor_grid,*base)
{
Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim);
}
GridRedBlackCartesian(const std::vector<int> &dimensions,
////////////////////////////////////////////////////////////
// Create redblack grid
////////////////////////////////////////////////////////////
GridRedBlackCartesian(const GridBase *base,
const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid) : GridBase(processor_grid)
const std::vector<int> &processor_grid) : GridBase(processor_grid,*base)
{
std::vector<int> checker_dim_mask(dimensions.size(),1);
Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0);
int checker_dim = 0;
Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim);
}
#endif
void Init(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,
const std::vector<int> &checker_dim_mask,
int checker_dim)
{
///////////////////////
// Grid information
///////////////////////
_isCheckerBoarded = true;
_checker_dim = checker_dim;
assert(checker_dim_mask[checker_dim] == 1);
_ndimension = dimensions.size();
@@ -151,6 +185,8 @@ public:
_ldimensions.resize(_ndimension);
_rdimensions.resize(_ndimension);
_simd_layout.resize(_ndimension);
_lstart.resize(_ndimension);
_lend.resize(_ndimension);
_ostride.resize(_ndimension);
_istride.resize(_ndimension);
@@ -159,26 +195,36 @@ public:
_checker_dim_mask = checker_dim_mask;
for(int d=0;d<_ndimension;d++){
for (int d = 0; d < _ndimension; d++)
{
_fdimensions[d] = dimensions[d];
_gdimensions[d] = _fdimensions[d];
_fsites = _fsites * _fdimensions[d];
_gsites = _gsites * _gdimensions[d];
if (d==_checker_dim) {
if (d == _checker_dim)
{
assert((_gdimensions[d] & 0x1) == 0);
_gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard
_gsites /= 2;
}
_ldimensions[d] = _gdimensions[d] / _processors[d];
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
_lstart[d] = _processor_coor[d] * _ldimensions[d];
_lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1;
// Use a reduced simd grid
_simd_layout[d] = simd_layout[d];
_rdimensions[d]= _ldimensions[d]/_simd_layout[d];
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; // this is not checking if this is integer
assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]);
assert(_rdimensions[d] > 0);
// all elements of a simd vector must have same checkerboard.
// If Ls vectorised, this must still be the case; e.g. dwf rb5d
if ( _simd_layout[d]>1 ) {
if ( checker_dim_mask[d] ) {
if (_simd_layout[d] > 1)
{
if (checker_dim_mask[d])
{
assert((_rdimensions[d] & 0x1) == 0);
}
}
@@ -187,15 +233,16 @@ public:
_isites *= _simd_layout[d];
// Addressing support
if ( d==0 ) {
if (d == 0)
{
_ostride[d] = 1;
_istride[d] = 1;
} else {
}
else
{
_ostride[d] = _ostride[d - 1] * _rdimensions[d - 1];
_istride[d] = _istride[d - 1] * _simd_layout[d - 1];
}
}
////////////////////////////////////////////////////////////////////////////////////////////
@@ -207,9 +254,11 @@ public:
int block = 1;
int nblock = 1;
for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d];
for (int d = 0; d < _ndimension; d++)
nblock *= _rdimensions[d];
for(int d=0;d<_ndimension;d++){
for (int d = 0; d < _ndimension; d++)
{
nblock /= _rdimensions[d];
_slice_block[d] = block;
_slice_stride[d] = _ostride[d] * _rdimensions[d];
@@ -221,23 +270,29 @@ public:
// Create a checkerboard lookup table
////////////////////////////////////////////////
int rvol = 1;
for(int d=0;d<_ndimension;d++){
for (int d = 0; d < _ndimension; d++)
{
rvol = rvol * _rdimensions[d];
}
_checker_board.resize(rvol);
for(int osite=0;osite<_osites;osite++){
for (int osite = 0; osite < _osites; osite++)
{
_checker_board[osite] = CheckerBoardFromOindex(osite);
}
};
protected:
virtual int oIndex(std::vector<int> &coor)
{
int idx = 0;
for(int d=0;d<_ndimension;d++) {
if( d==_checker_dim ) {
for (int d = 0; d < _ndimension; d++)
{
if (d == _checker_dim)
{
idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]);
} else {
}
else
{
idx += _ostride[d] * (coor[d] % _rdimensions[d]);
}
}
@@ -247,16 +302,19 @@ protected:
virtual int iIndex(std::vector<int> &lcoor)
{
int idx = 0;
for(int d=0;d<_ndimension;d++) {
if( d==_checker_dim ) {
for (int d = 0; d < _ndimension; d++)
{
if (d == _checker_dim)
{
idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d]));
} else {
}
else
{
idx += _istride[d] * (lcoor[d] / _rdimensions[d]);
}
}
return idx;
}
};
}
#endif

View File

@@ -28,6 +28,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_COMMUNICATOR_H
#define GRID_COMMUNICATOR_H
#include <Grid/communicator/SharedMemory.h>
#include <Grid/communicator/Communicator_base.h>
#endif

View File

@@ -25,39 +25,25 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/GridCore.h>
#include <fcntl.h>
#include <unistd.h>
#include <limits.h>
#include <sys/mman.h>
namespace Grid {
///////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////
void * CartesianCommunicator::ShmCommBuf;
uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024;
/////////////////////////////////
// Alloc, free shmem region
/////////////////////////////////
void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
void *ptr = (void *)heap_top;
heap_top += bytes;
heap_bytes+= bytes;
if (heap_bytes >= MAX_MPI_SHM_BYTES) {
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
std::cout<< " Current value is " << (MAX_MPI_SHM_BYTES/(1024*1024)) <<std::endl;
assert(heap_bytes<MAX_MPI_SHM_BYTES);
}
return ptr;
}
void CartesianCommunicator::ShmBufferFreeAll(void) {
heap_top =(size_t)ShmBufferSelf();
heap_bytes=0;
}
CartesianCommunicator::CommunicatorPolicy_t
CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
int CartesianCommunicator::nCommThreads = -1;
/////////////////////////////////
// Grid information queries
/////////////////////////////////
int CartesianCommunicator::Dimensions(void) { return _ndimension; };
int CartesianCommunicator::IsBoss(void) { return _processor==0; };
int CartesianCommunicator::BossRank(void) { return 0; };
int CartesianCommunicator::ThisRank(void) { return _processor; };
@@ -86,39 +72,5 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
GlobalSumVector((double *)c,2*N);
}
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes)
{
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
{
SendToRecvFromComplete(waitall);
}
void CartesianCommunicator::StencilBarrier(void){};
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
void *CartesianCommunicator::ShmBuffer(int rank) {
return NULL;
}
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
return NULL;
}
void CartesianCommunicator::ShmInitGeneric(void){
ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
ShmCommBuf=(void *)&ShmBufStorageVector[0];
}
#endif
}

View File

@@ -32,98 +32,33 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
///////////////////////////////////
// Processor layout information
///////////////////////////////////
#ifdef GRID_COMMS_MPI
#include <mpi.h>
#endif
#ifdef GRID_COMMS_MPI3
#include <mpi.h>
#endif
#ifdef GRID_COMMS_MPI3L
#include <mpi.h>
#endif
#ifdef GRID_COMMS_SHMEM
#include <mpp/shmem.h>
#endif
#include <Grid/communicator/SharedMemory.h>
namespace Grid {
class CartesianCommunicator {
class CartesianCommunicator : public SharedMemory {
public:
// 65536 ranks per node adequate for now
// 128MB shared memory for comms enought for 48^4 local vol comms
// Give external control (command line override?) of this
static const int MAXLOG2RANKSPERNODE = 16;
static uint64_t MAX_MPI_SHM_BYTES;
////////////////////////////////////////////
// Policies
////////////////////////////////////////////
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
static CommunicatorPolicy_t CommunicatorPolicy;
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
static int nCommThreads;
////////////////////////////////////////////
// Communicator should know nothing of the physics grid, only processor grid.
////////////////////////////////////////////
int _Nprocessors; // How many in all
std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.
int _processor; // linear processor rank
std::vector<int> _processor_coor; // linear processor coordinate
unsigned long _ndimension;
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPI3L)
static MPI_Comm communicator_world;
MPI_Comm communicator;
typedef MPI_Request CommsRequest_t;
#else
typedef int CommsRequest_t;
#endif
////////////////////////////////////////////////////////////////////
// Helper functionality for SHM Windows common to all other impls
////////////////////////////////////////////////////////////////////
// Longer term; drop this in favour of a master / slave model with
// cartesian communicator on a subset of ranks, slave ranks controlled
// by group leader with data xfer via shared memory
////////////////////////////////////////////////////////////////////
#ifdef GRID_COMMS_MPI3
static int ShmRank;
static int ShmSize;
static int GroupRank;
static int GroupSize;
static int WorldRank;
static int WorldSize;
std::vector<int> WorldDims;
std::vector<int> GroupDims;
std::vector<int> ShmDims;
std::vector<int> GroupCoor;
std::vector<int> ShmCoor;
std::vector<int> WorldCoor;
static std::vector<int> GroupRanks;
static std::vector<int> MyGroup;
static int ShmSetup;
static MPI_Win ShmWindow;
static MPI_Comm ShmComm;
std::vector<int> LexicographicToWorldRank;
static std::vector<void *> ShmCommBufs;
#else
static void ShmInitGeneric(void);
static commVector<uint8_t> ShmBufStorageVector;
#endif
/////////////////////////////////
// Grid information and queries
// Implemented in Communicator_base.C
/////////////////////////////////
static void * ShmCommBuf;
size_t heap_top;
size_t heap_bytes;
void *ShmBufferSelf(void);
void *ShmBuffer(int rank);
void *ShmBufferTranslate(int rank,void * local_p);
void *ShmBufferMalloc(size_t bytes);
void ShmBufferFreeAll(void) ;
static Grid_MPI_Comm communicator_world;
Grid_MPI_Comm communicator;
std::vector<Grid_MPI_Comm> communicator_halo;
////////////////////////////////////////////////
// Must call in Grid startup
@@ -131,9 +66,23 @@ class CartesianCommunicator {
static void Init(int *argc, char ***argv);
////////////////////////////////////////////////
// Constructor of any given grid
// Constructors to sub-divide a parent communicator
// and default to comm world
////////////////////////////////////////////////
CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank);
CartesianCommunicator(const std::vector<int> &pdimensions_in);
virtual ~CartesianCommunicator();
private:
////////////////////////////////////////////////
// Private initialise from an MPI communicator
// Can use after an MPI_Comm_split, but hidden from user so private
////////////////////////////////////////////////
void InitFromMPICommunicator(const std::vector<int> &processors, Grid_MPI_Comm communicator_base);
public:
////////////////////////////////////////////////////////////////////////////////////////
// Wraps MPI_Cart routines, or implements equivalent on other impls
@@ -142,6 +91,7 @@ class CartesianCommunicator {
int RankFromProcessorCoor(std::vector<int> &coor);
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
int Dimensions(void) ;
int IsBoss(void) ;
int BossRank(void) ;
int ThisRank(void) ;
@@ -168,6 +118,8 @@ class CartesianCommunicator {
void GlobalSumVector(ComplexF *c,int N);
void GlobalSum(ComplexD &c);
void GlobalSumVector(ComplexD *c,int N);
void GlobalXOR(uint32_t &);
void GlobalXOR(uint64_t &);
template<class obj> void GlobalSum(obj &o){
typedef typename obj::scalar_type scalar_type;
@@ -200,14 +152,21 @@ class CartesianCommunicator {
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
double StencilSendToRecvFrom(void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes,int dir);
double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
int bytes,int dir);
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int i);
void StencilBarrier(void);
////////////////////////////////////////////////////////////
@@ -220,6 +179,23 @@ class CartesianCommunicator {
////////////////////////////////////////////////////////////
void Broadcast(int root,void* data, int bytes);
////////////////////////////////////////////////////////////
// All2All down one dimension
////////////////////////////////////////////////////////////
template<class T> void AllToAll(int dim,std::vector<T> &in, std::vector<T> &out){
assert(dim>=0);
assert(dim<_ndimension);
assert(in.size()==out.size());
int numnode = _processors[dim];
uint64_t bytes=sizeof(T);
uint64_t words=in.size()/numnode;
assert(numnode * words == in.size());
assert(words < (1ULL<<31));
AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes);
}
void AllToAll(int dim ,void *in,void *out,uint64_t words,uint64_t bytes);
void AllToAll(void *in,void *out,uint64_t words ,uint64_t bytes);
template<class obj> void Broadcast(int root,obj &data)
{
Broadcast(root,(void *)&data,sizeof(data));

View File

@@ -0,0 +1,508 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/Communicator_mpi.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
#include <Grid/communicator/SharedMemory.h>
namespace Grid {
Grid_MPI_Comm CartesianCommunicator::communicator_world;
////////////////////////////////////////////
// First initialise of comms system
////////////////////////////////////////////
void CartesianCommunicator::Init(int *argc, char ***argv)
{
int flag;
int provided;
MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) {
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
//If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
(nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) )
assert(0);
}
// Never clean up as done once.
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
GlobalSharedMemory::Init(communicator_world);
GlobalSharedMemory::SharedMemoryAllocate(
GlobalSharedMemory::MAX_MPI_SHM_BYTES,
GlobalSharedMemory::Hugepages);
}
///////////////////////////////////////////////////////////////////////////
// Use cartesian communicators now even in MPI3
///////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
assert(ierr==0);
}
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{
int rank;
int ierr=MPI_Cart_rank (communicator, &coor[0], &rank);
assert(ierr==0);
return rank;
}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{
coor.resize(_ndimension);
int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
assert(ierr==0);
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
// Initialises from communicator_world
////////////////////////////////////////////////////////////////////////////////////////////////////////
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
MPI_Comm optimal_comm;
////////////////////////////////////////////////////
// Remap using the shared memory optimising routine
// The remap creates a comm which must be freed
////////////////////////////////////////////////////
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm);
InitFromMPICommunicator(processors,optimal_comm);
SetCommunicator(optimal_comm);
///////////////////////////////////////////////////
// Free the temp communicator
///////////////////////////////////////////////////
MPI_Comm_free(&optimal_comm);
}
//////////////////////////////////
// Try to subdivide communicator
//////////////////////////////////
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
{
_ndimension = processors.size();
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
std::vector<int> parent_processor_coor(_ndimension,0);
std::vector<int> parent_processors (_ndimension,1);
// Can make 5d grid from 4d etc...
int pad = _ndimension-parent_ndimension;
for(int d=0;d<parent_ndimension;d++){
parent_processor_coor[pad+d]=parent._processor_coor[d];
parent_processors [pad+d]=parent._processors[d];
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// split the communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
// int Nparent = parent._processors ;
int Nparent;
MPI_Comm_size(parent.communicator,&Nparent);
int childsize=1;
for(int d=0;d<processors.size();d++) {
childsize *= processors[d];
}
int Nchild = Nparent/childsize;
assert (childsize * Nchild == Nparent);
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
std::vector<int> scoor(_ndimension); // coor of split within parent
std::vector<int> ssize(_ndimension); // coor of split within parent
for(int d=0;d<_ndimension;d++){
ccoor[d] = parent_processor_coor[d] % processors[d];
scoor[d] = parent_processor_coor[d] / processors[d];
ssize[d] = parent_processors[d] / processors[d];
}
// rank within subcomm ; srank is rank of subcomm within blocks of subcomms
int crank;
// Mpi uses the reverse Lexico convention to us; so reversed routines called
Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions
Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); // ssize is the number of split grids
MPI_Comm comm_split;
if ( Nchild > 1 ) {
if(0){
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"] ";
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processor_coor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << scoor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
std::cout<<std::endl;
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Declare victory
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
std::cout << " Split communicator " <<comm_split <<std::endl;
}
////////////////////////////////////////////////////////////////
// Split the communicator
////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
assert(ierr==0);
} else {
srank = 0;
int ierr = MPI_Comm_dup (parent.communicator,&comm_split);
assert(ierr==0);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Set up from the new split communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
InitFromMPICommunicator(processors,comm_split);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Take the right SHM buffers
//////////////////////////////////////////////////////////////////////////////////////////////////////
SetCommunicator(comm_split);
///////////////////////////////////////////////
// Free the temp communicator
///////////////////////////////////////////////
MPI_Comm_free(&comm_split);
if(0){
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
for(int d=0;d<processors.size();d++){
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
}
}
for(int d=0;d<processors.size();d++){
assert(_processor_coor[d] == ccoor[d] );
}
}
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
{
////////////////////////////////////////////////////
// Creates communicator, and the communicator_halo
////////////////////////////////////////////////////
_ndimension = processors.size();
_processor_coor.resize(_ndimension);
/////////////////////////////////
// Count the requested nodes
/////////////////////////////////
_Nprocessors=1;
_processors = processors;
for(int i=0;i<_ndimension;i++){
_Nprocessors*=_processors[i];
}
std::vector<int> periodic(_ndimension,1);
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator);
MPI_Comm_rank(communicator,&_processor);
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
if ( 0 && (communicator_base != communicator_world) ) {
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
for(int d=0;d<_processors.size();d++){
std::cout << _processor_coor[d]<<" ";
}
std::cout << std::endl;
}
int Size;
MPI_Comm_size(communicator,&Size);
communicator_halo.resize (2*_ndimension);
for(int i=0;i<_ndimension*2;i++){
MPI_Comm_dup(communicator,&communicator_halo[i]);
}
assert(Size==_Nprocessors);
}
CartesianCommunicator::~CartesianCommunicator()
{
int MPI_is_finalised;
MPI_Finalized(&MPI_is_finalised);
if (communicator && !MPI_is_finalised) {
MPI_Comm_free(&communicator);
for(int i=0;i<communicator_halo.size();i++){
MPI_Comm_free(&communicator_halo[i]);
}
}
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(float *f,int N)
{
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(double &d)
{
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(double *d,int N)
{
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
std::vector<CommsRequest_t> reqs(0);
// unsigned long xcrc = crc32(0L, Z_NULL, 0);
// unsigned long rcrc = crc32(0L, Z_NULL, 0);
// xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
// rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
// printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc);
}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
int sender,
int receiver,
int bytes)
{
MPI_Status stat;
assert(sender != receiver);
int tag = sender;
if ( _processor == sender ) {
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
}
if ( _processor == receiver ) {
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
}
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
int myrank = _processor;
int ierr;
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
MPI_Request xrq;
MPI_Request rrq;
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
} else {
// Give the CPU to MPI immediately; can use threads to overlap optionally
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
recv,bytes,MPI_CHAR,from, from,
communicator,MPI_STATUS_IGNORE);
assert(ierr==0);
}
}
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int dest,
void *recv,
int from,
int bytes,int dir)
{
std::vector<CommsRequest_t> list;
double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
StencilSendToRecvFromComplete(list,dir);
return offbytes;
}
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes,int dir)
{
int ncomm =communicator_halo.size();
int commdir=dir%ncomm;
MPI_Request xrq;
MPI_Request rrq;
int ierr;
int gdest = ShmRanks[dest];
int gfrom = ShmRanks[from];
int gme = ShmRanks[_processor];
assert(dest != _processor);
assert(from != _processor);
assert(gme == ShmRank);
double off_node_bytes=0.0;
if ( gfrom ==MPI_UNDEFINED) {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq);
assert(ierr==0);
list.push_back(rrq);
off_node_bytes+=bytes;
}
if ( gdest == MPI_UNDEFINED ) {
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq);
assert(ierr==0);
list.push_back(xrq);
off_node_bytes+=bytes;
}
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list,dir);
}
return off_node_bytes;
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
{
SendToRecvFromComplete(waitall);
}
void CartesianCommunicator::StencilBarrier(void)
{
MPI_Barrier (ShmComm);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
int nreq=list.size();
if (nreq==0) return;
std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
list.resize(0);
}
void CartesianCommunicator::Barrier(void)
{
int ierr = MPI_Barrier(communicator);
assert(ierr==0);
}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
{
int ierr=MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
communicator);
assert(ierr==0);
}
int CartesianCommunicator::RankWorld(void){
int r;
MPI_Comm_rank(communicator_world,&r);
return r;
}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{
int ierr= MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
communicator_world);
assert(ierr==0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
{
std::vector<int> row(_ndimension,1);
assert(dim>=0 && dim<_ndimension);
// Split the communicator
row[dim] = _processors[dim];
int me;
CartesianCommunicator Comm(row,*this,me);
Comm.AllToAll(in,out,words,bytes);
}
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
{
// MPI is a pain and uses "int" arguments
// 64*64*64*128*16 == 500Million elements of data.
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
// (Turns up on 32^3 x 64 Gparity too)
MPI_Datatype object;
int iwords;
int ibytes;
iwords = words;
ibytes = bytes;
assert(words == iwords); // safe to cast to int ?
assert(bytes == ibytes); // safe to cast to int ?
MPI_Type_contiguous(ibytes,MPI_BYTE,&object);
MPI_Type_commit(&object);
MPI_Alltoall(in,iwords,object,out,iwords,object,communicator);
MPI_Type_free(&object);
}
}

View File

@@ -25,16 +25,28 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/GridCore.h>
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////////////////////////////////////////
Grid_MPI_Comm CartesianCommunicator::communicator_world;
void CartesianCommunicator::Init(int *argc, char *** arv)
{
ShmInitGeneric();
GlobalSharedMemory::Init(communicator_world);
GlobalSharedMemory::SharedMemoryAllocate(
GlobalSharedMemory::MAX_MPI_SHM_BYTES,
GlobalSharedMemory::Hugepages);
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
: CartesianCommunicator(processors)
{
srank=0;
SetCommunicator(communicator_world);
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
@@ -50,14 +62,19 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
assert(_processors[d]==1);
_processor_coor[d] = 0;
}
SetCommunicator(communicator_world);
}
CartesianCommunicator::~CartesianCommunicator(){}
void CartesianCommunicator::GlobalSum(float &){}
void CartesianCommunicator::GlobalSumVector(float *,int N){}
void CartesianCommunicator::GlobalSum(double &){}
void CartesianCommunicator::GlobalSum(uint32_t &){}
void CartesianCommunicator::GlobalSum(uint64_t &){}
void CartesianCommunicator::GlobalSumVector(double *,int N){}
void CartesianCommunicator::GlobalXOR(uint32_t &){}
void CartesianCommunicator::GlobalXOR(uint64_t &){}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
@@ -87,10 +104,19 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
assert(0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
{
bcopy(in,out,bytes*words);
}
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
{
bcopy(in,out,bytes*words);
}
int CartesianCommunicator::RankWorld(void){return 0;}
void CartesianCommunicator::Barrier(void){}
@@ -104,6 +130,36 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest
dest=0;
}
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes, int dir)
{
std::vector<CommsRequest_t> list;
// Discard the "dir"
SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
SendToRecvFromComplete(list);
return 2.0*bytes;
}
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes, int dir)
{
// Discard the "dir"
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
return 2.0*bytes;
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
{
SendToRecvFromComplete(waitall);
}
void CartesianCommunicator::StencilBarrier(void){};
}

View File

@@ -0,0 +1,92 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/SharedMemory.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
namespace Grid {
// static data
uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL;
int GlobalSharedMemory::Hugepages = 0;
int GlobalSharedMemory::_ShmSetup;
int GlobalSharedMemory::_ShmAlloc;
uint64_t GlobalSharedMemory::_ShmAllocBytes;
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
Grid_MPI_Comm GlobalSharedMemory::WorldShmComm;
int GlobalSharedMemory::WorldShmRank;
int GlobalSharedMemory::WorldShmSize;
std::vector<int> GlobalSharedMemory::WorldShmRanks;
Grid_MPI_Comm GlobalSharedMemory::WorldComm;
int GlobalSharedMemory::WorldSize;
int GlobalSharedMemory::WorldRank;
int GlobalSharedMemory::WorldNodes;
int GlobalSharedMemory::WorldNode;
void GlobalSharedMemory::SharedMemoryFree(void)
{
assert(_ShmAlloc);
assert(_ShmAllocBytes>0);
for(int r=0;r<WorldShmSize;r++){
munmap(WorldShmCommBufs[r],_ShmAllocBytes);
}
_ShmAlloc = 0;
_ShmAllocBytes = 0;
}
/////////////////////////////////
// Alloc, free shmem region
/////////////////////////////////
void *SharedMemory::ShmBufferMalloc(size_t bytes){
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
void *ptr = (void *)heap_top;
heap_top += bytes;
heap_bytes+= bytes;
if (heap_bytes >= heap_size) {
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
assert(heap_bytes<heap_size);
}
return ptr;
}
void SharedMemory::ShmBufferFreeAll(void) {
heap_top =(size_t)ShmBufferSelf();
heap_bytes=0;
}
void *SharedMemory::ShmBufferSelf(void)
{
return ShmCommBufs[ShmRank];
}
}

View File

@@ -0,0 +1,165 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/SharedMemory.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
// TODO
// 1) move includes into SharedMemory.cc
//
// 2) split shared memory into a) optimal communicator creation from comm world
//
// b) shared memory buffers container
// -- static globally shared; init once
// -- per instance set of buffers.
//
#pragma once
#include <Grid/GridCore.h>
#if defined (GRID_COMMS_MPI3)
#include <mpi.h>
#endif
#include <semaphore.h>
#include <fcntl.h>
#include <unistd.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/mman.h>
#include <zlib.h>
#ifdef HAVE_NUMAIF_H
#include <numaif.h>
#endif
namespace Grid {
#if defined (GRID_COMMS_MPI3)
typedef MPI_Comm Grid_MPI_Comm;
typedef MPI_Request CommsRequest_t;
#else
typedef int CommsRequest_t;
typedef int Grid_MPI_Comm;
#endif
class GlobalSharedMemory {
private:
static const int MAXLOG2RANKSPERNODE = 16;
// Init once lock on the buffer allocation
static int _ShmSetup;
static int _ShmAlloc;
static uint64_t _ShmAllocBytes;
public:
static int ShmSetup(void) { return _ShmSetup; }
static int ShmAlloc(void) { return _ShmAlloc; }
static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; }
static uint64_t MAX_MPI_SHM_BYTES;
static int Hugepages;
static std::vector<void *> WorldShmCommBufs;
static Grid_MPI_Comm WorldComm;
static int WorldRank;
static int WorldSize;
static Grid_MPI_Comm WorldShmComm;
static int WorldShmRank;
static int WorldShmSize;
static int WorldNodes;
static int WorldNode;
static std::vector<int> WorldShmRanks;
//////////////////////////////////////////////////////////////////////////////////////
// Create an optimal reordered communicator that makes MPI_Cart_create get it right
//////////////////////////////////////////////////////////////////////////////////////
static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
static void OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
///////////////////////////////////////////////////
// Provide shared memory facilities off comm world
///////////////////////////////////////////////////
static void SharedMemoryAllocate(uint64_t bytes, int flags);
static void SharedMemoryFree(void);
};
//////////////////////////////
// one per communicator
//////////////////////////////
class SharedMemory
{
private:
static const int MAXLOG2RANKSPERNODE = 16;
size_t heap_top;
size_t heap_bytes;
size_t heap_size;
protected:
Grid_MPI_Comm ShmComm; // for barriers
int ShmRank;
int ShmSize;
std::vector<void *> ShmCommBufs;
std::vector<int> ShmRanks;// Mapping comm ranks to Shm ranks
public:
SharedMemory() {};
~SharedMemory();
///////////////////////////////////////////////////////////////////////////////////////
// set the buffers & sizes
///////////////////////////////////////////////////////////////////////////////////////
void SetCommunicator(Grid_MPI_Comm comm);
////////////////////////////////////////////////////////////////////////
// For this instance ; disjoint buffer sets between splits if split grid
////////////////////////////////////////////////////////////////////////
void ShmBarrier(void);
///////////////////////////////////////////////////
// Call on any instance
///////////////////////////////////////////////////
void SharedMemoryTest(void);
void *ShmBufferSelf(void);
void *ShmBuffer (int rank);
void *ShmBufferTranslate(int rank,void * local_p);
void *ShmBufferMalloc(size_t bytes);
void ShmBufferFreeAll(void) ;
//////////////////////////////////////////////////////////////////////////
// Make info on Nodes & ranks and Shared memory available
//////////////////////////////////////////////////////////////////////////
int NodeCount(void) { return GlobalSharedMemory::WorldNodes;};
int RankCount(void) { return GlobalSharedMemory::WorldSize;};
};
}

View File

@@ -0,0 +1,651 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/SharedMemory.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
#include <pwd.h>
namespace Grid {
/*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
{
assert(_ShmSetup==0);
WorldComm = comm;
MPI_Comm_rank(WorldComm,&WorldRank);
MPI_Comm_size(WorldComm,&WorldSize);
// WorldComm, WorldSize, WorldRank
/////////////////////////////////////////////////////////////////////
// Split into groups that can share memory
/////////////////////////////////////////////////////////////////////
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm);
MPI_Comm_rank(WorldShmComm ,&WorldShmRank);
MPI_Comm_size(WorldShmComm ,&WorldShmSize);
// WorldShmComm, WorldShmSize, WorldShmRank
// WorldNodes
WorldNodes = WorldSize/WorldShmSize;
assert( (WorldNodes * WorldShmSize) == WorldSize );
// FIXME: Check all WorldShmSize are the same ?
/////////////////////////////////////////////////////////////////////
// find world ranks in our SHM group (i.e. which ranks are on our node)
/////////////////////////////////////////////////////////////////////
MPI_Group WorldGroup, ShmGroup;
MPI_Comm_group (WorldComm, &WorldGroup);
MPI_Comm_group (WorldShmComm, &ShmGroup);
std::vector<int> world_ranks(WorldSize); for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
WorldShmRanks.resize(WorldSize);
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &WorldShmRanks[0]);
///////////////////////////////////////////////////////////////////
// Identify who is in my group and nominate the leader
///////////////////////////////////////////////////////////////////
int g=0;
std::vector<int> MyGroup;
MyGroup.resize(WorldShmSize);
for(int rank=0;rank<WorldSize;rank++){
if(WorldShmRanks[rank]!=MPI_UNDEFINED){
assert(g<WorldShmSize);
MyGroup[g++] = rank;
}
}
std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
int myleader = MyGroup[0];
std::vector<int> leaders_1hot(WorldSize,0);
std::vector<int> leaders_group(WorldNodes,0);
leaders_1hot [ myleader ] = 1;
///////////////////////////////////////////////////////////////////
// global sum leaders over comm world
///////////////////////////////////////////////////////////////////
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,WorldComm);
assert(ierr==0);
///////////////////////////////////////////////////////////////////
// find the group leaders world rank
///////////////////////////////////////////////////////////////////
int group=0;
for(int l=0;l<WorldSize;l++){
if(leaders_1hot[l]){
leaders_group[group++] = l;
}
}
///////////////////////////////////////////////////////////////////
// Identify the node of the group in which I (and my leader) live
///////////////////////////////////////////////////////////////////
WorldNode=-1;
for(int g=0;g<WorldNodes;g++){
if (myleader == leaders_group[g]){
WorldNode=g;
}
}
assert(WorldNode!=-1);
_ShmSetup=1;
}
// Gray encode support
int BinaryToGray (int binary) {
int gray = (binary>>1)^binary;
return gray;
}
int Log2Size(int TwoToPower,int MAXLOG2)
{
int log2size = -1;
for(int i=0;i<=MAXLOG2;i++){
if ( (0x1<<i) == TwoToPower ) {
log2size = i;
break;
}
}
return log2size;
}
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
{
#ifdef HYPERCUBE
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
assert(log2size != -1);
////////////////////////////////////////////////////////////////
// Identify the hypercube coordinate of this node using hostname
////////////////////////////////////////////////////////////////
// n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits
// i runs 0..7 3 bits
// r runs 0..3 2 bits
// 2^10 = 1024 nodes
const int maxhdim = 10;
std::vector<int> HyperCubeCoords(maxhdim,0);
std::vector<int> RootHyperCubeCoords(maxhdim,0);
int R;
int I;
int N;
const int namelen = _POSIX_HOST_NAME_MAX;
char name[namelen];
// Parse ICE-XA hostname to get hypercube location
gethostname(name,namelen);
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
assert(nscan==3);
int nlo = N%9;
int nhi = N/9;
uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
uint32_t rootcoor = hypercoor;
//////////////////////////////////////////////////////////////////
// Print debug info
//////////////////////////////////////////////////////////////////
for(int d=0;d<maxhdim;d++){
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
}
std::string hname(name);
std::cout << "hostname "<<hname<<std::endl;
std::cout << "R " << R << " I " << I << " N "<< N
<< " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
//////////////////////////////////////////////////////////////////
// broadcast node 0's base coordinate for this partition.
//////////////////////////////////////////////////////////////////
MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm);
hypercoor=hypercoor-rootcoor;
assert(hypercoor<WorldSize);
assert(hypercoor>=0);
//////////////////////////////////////
// Printing
//////////////////////////////////////
for(int d=0;d<maxhdim;d++){
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
}
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
// in a maximally symmetrical way
////////////////////////////////////////////////////////////////
int ndimension = processors.size();
std::vector<int> processor_coor(ndimension);
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
std::vector<int> HyperCoor(ndimension);
int dim = 0;
for(int l2=0;l2<log2size;l2++){
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
ShmDims[dim]*=2;
dim=(dim+1)%ndimension;
}
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////
for(int d=0;d<ndimension;d++){
NodeDims[d] = WorldDims[d]/ShmDims[d];
}
////////////////////////////////////////////////////////////////
// Map Hcube according to physical lattice
// must partition. Loop over dims and find out who would join.
////////////////////////////////////////////////////////////////
int hcoor = hypercoor;
for(int d=0;d<ndimension;d++){
int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
int msk = (0x1<<bits)-1;
HyperCoor[d]=hcoor & msk;
HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
hcoor = hcoor >> bits;
}
////////////////////////////////////////////////////////////////
// Check processor counts match
////////////////////////////////////////////////////////////////
int Nprocessors=1;
for(int i=0;i<ndimension;i++){
Nprocessors*=processors[i];
}
assert(WorldSize==Nprocessors);
////////////////////////////////////////////////////////////////
// Establish mapping between lexico physics coord and WorldRank
////////////////////////////////////////////////////////////////
int rank;
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
/////////////////////////////////////////////////////////////////
// Build the new communicator
/////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0);
#else
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
assert(log2size != -1);
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
// in a maximally symmetrical way
////////////////////////////////////////////////////////////////
int ndimension = processors.size();
std::vector<int> processor_coor(ndimension);
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
int dim = 0;
for(int l2=0;l2<log2size;l2++){
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
ShmDims[dim]*=2;
dim=(dim+1)%ndimension;
}
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////
for(int d=0;d<ndimension;d++){
NodeDims[d] = WorldDims[d]/ShmDims[d];
}
////////////////////////////////////////////////////////////////
// Check processor counts match
////////////////////////////////////////////////////////////////
int Nprocessors=1;
for(int i=0;i<ndimension;i++){
Nprocessors*=processors[i];
}
assert(WorldSize==Nprocessors);
////////////////////////////////////////////////////////////////
// Establish mapping between lexico physics coord and WorldRank
////////////////////////////////////////////////////////////////
int rank;
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
/////////////////////////////////////////////////////////////////
// Build the new communicator
/////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0);
#endif
}
////////////////////////////////////////////////////////////////////////////////////////////
// SHMGET
////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_MPI3_SHMGET
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
std::vector<int> shmids(WorldShmSize);
if ( WorldShmRank == 0 ) {
for(int r=0;r<WorldShmSize;r++){
size_t size = bytes;
key_t key = IPC_PRIVATE;
int flags = IPC_CREAT | SHM_R | SHM_W;
#ifdef SHM_HUGETLB
if (Hugepages) flags|=SHM_HUGETLB;
#endif
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
int errsv = errno;
printf("Errno %d\n",errsv);
printf("key %d\n",key);
printf("size %lld\n",size);
printf("flags %d\n",flags);
perror("shmget");
exit(1);
}
}
}
MPI_Barrier(WorldShmComm);
MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
MPI_Barrier(WorldShmComm);
for(int r=0;r<WorldShmSize;r++){
WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
if (WorldShmCommBufs[r] == (uint64_t *)-1) {
perror("Shared memory attach failure");
shmctl(shmids[r], IPC_RMID, NULL);
exit(2);
}
}
MPI_Barrier(WorldShmComm);
///////////////////////////////////
// Mark for clean up
///////////////////////////////////
for(int r=0;r<WorldShmSize;r++){
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
}
MPI_Barrier(WorldShmComm);
_ShmAlloc=1;
_ShmAllocBytes = bytes;
}
#endif
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs mapping intended
////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs and others map filesystems as mappable huge pages
////////////////////////////////////////////////////////////////////////////////////////////
char shm_name [NAME_MAX];
for(int r=0;r<WorldShmSize;r++){
sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",WorldNode,r);
int fd=open(shm_name,O_RDWR|O_CREAT,0666);
if ( fd == -1) {
printf("open %s failed\n",shm_name);
perror("open hugetlbfs");
exit(0);
}
int mmap_flag = MAP_SHARED ;
#ifdef MAP_POPULATE
mmap_flag|=MAP_POPULATE;
#endif
#ifdef MAP_HUGETLB
if ( flags ) mmap_flag |= MAP_HUGETLB;
#endif
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
if ( ptr == (void *)MAP_FAILED ) {
printf("mmap %s failed\n",shm_name);
perror("failed mmap"); assert(0);
}
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
};
#endif // MMAP
#ifdef GRID_MPI3_SHM_NONE
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbf and others map filesystems as mappable huge pages
////////////////////////////////////////////////////////////////////////////////////////////
char shm_name [NAME_MAX];
assert(WorldShmSize == 1);
for(int r=0;r<WorldShmSize;r++){
int fd=-1;
int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
#ifdef MAP_POPULATE
mmap_flag|=MAP_POPULATE;
#endif
#ifdef MAP_HUGETLB
if ( flags ) mmap_flag |= MAP_HUGETLB;
#endif
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
if ( ptr == (void *)MAP_FAILED ) {
printf("mmap %s failed\n",shm_name);
perror("failed mmap"); assert(0);
}
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
};
#endif // MMAP
#ifdef GRID_MPI3_SHMOPEN
////////////////////////////////////////////////////////////////////////////////////////////
// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
// tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for
// the posix shm virtual file system
////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
char shm_name [NAME_MAX];
if ( WorldShmRank == 0 ) {
for(int r=0;r<WorldShmSize;r++){
size_t size = bytes;
struct passwd *pw = getpwuid (getuid());
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
shm_unlink(shm_name);
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
ftruncate(fd, size);
int mmap_flag = MAP_SHARED;
#ifdef MAP_POPULATE
mmap_flag |= MAP_POPULATE;
#endif
#ifdef MAP_HUGETLB
if (flags) mmap_flag |= MAP_HUGETLB;
#endif
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
if ( ptr == (void * )MAP_FAILED ) {
perror("failed mmap");
assert(0);
}
assert(((uint64_t)ptr&0x3F)==0);
WorldShmCommBufs[r] =ptr;
close(fd);
}
}
MPI_Barrier(WorldShmComm);
if ( WorldShmRank != 0 ) {
for(int r=0;r<WorldShmSize;r++){
size_t size = bytes ;
struct passwd *pw = getpwuid (getuid());
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
int fd=shm_open(shm_name,O_RDWR,0666);
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0);
WorldShmCommBufs[r] =ptr;
close(fd);
}
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
}
#endif
////////////////////////////////////////////////////////
// Global shared functionality finished
// Now move to per communicator functionality
////////////////////////////////////////////////////////
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
{
int rank, size;
MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&size);
ShmRanks.resize(size);
/////////////////////////////////////////////////////////////////////
// Split into groups that can share memory
/////////////////////////////////////////////////////////////////////
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
MPI_Comm_rank(ShmComm ,&ShmRank);
MPI_Comm_size(ShmComm ,&ShmSize);
ShmCommBufs.resize(ShmSize);
//////////////////////////////////////////////////////////////////////
// Map ShmRank to WorldShmRank and use the right buffer
//////////////////////////////////////////////////////////////////////
assert (GlobalSharedMemory::ShmAlloc()==1);
heap_size = GlobalSharedMemory::ShmAllocBytes();
for(int r=0;r<ShmSize;r++){
uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
// std::cout << "SetCommunicator ShmCommBufs ["<< r<< "] = "<< ShmCommBufs[r]<< " wsr = "<<wsr<<std::endl;
}
ShmBufferFreeAll();
/////////////////////////////////////////////////////////////////////
// find comm ranks in our SHM group (i.e. which ranks are on our node)
/////////////////////////////////////////////////////////////////////
MPI_Group FullGroup, ShmGroup;
MPI_Comm_group (comm , &FullGroup);
MPI_Comm_group (ShmComm, &ShmGroup);
std::vector<int> ranks(size); for(int r=0;r<size;r++) ranks[r]=r;
MPI_Group_translate_ranks (FullGroup,size,&ranks[0],ShmGroup, &ShmRanks[0]);
}
//////////////////////////////////////////////////////////////////
// On node barrier
//////////////////////////////////////////////////////////////////
void SharedMemory::ShmBarrier(void)
{
MPI_Barrier (ShmComm);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// Test the shared memory is working
//////////////////////////////////////////////////////////////////////////////////////////////////////////
void SharedMemory::SharedMemoryTest(void)
{
ShmBarrier();
if ( ShmRank == 0 ) {
for(int r=0;r<ShmSize;r++){
uint64_t * check = (uint64_t *) ShmCommBufs[r];
check[0] = GlobalSharedMemory::WorldNode;
check[1] = r;
check[2] = 0x5A5A5A;
}
}
ShmBarrier();
for(int r=0;r<ShmSize;r++){
uint64_t * check = (uint64_t *) ShmCommBufs[r];
assert(check[0]==GlobalSharedMemory::WorldNode);
assert(check[1]==r);
assert(check[2]==0x5A5A5A);
}
ShmBarrier();
}
void *SharedMemory::ShmBuffer(int rank)
{
int gpeer = ShmRanks[rank];
if (gpeer == MPI_UNDEFINED){
return NULL;
} else {
return ShmCommBufs[gpeer];
}
}
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
{
static int count =0;
int gpeer = ShmRanks[rank];
assert(gpeer!=ShmRank); // never send to self
if (gpeer == MPI_UNDEFINED){
return NULL;
} else {
uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
return (void *) remote;
}
}
SharedMemory::~SharedMemory()
{
int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised);
if ( !MPI_is_finalised ) {
MPI_Comm_free(&ShmComm);
}
};
}

View File

@@ -0,0 +1,128 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/SharedMemory.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
namespace Grid {
/*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
{
assert(_ShmSetup==0);
WorldComm = 0;
WorldRank = 0;
WorldSize = 1;
WorldShmComm = 0 ;
WorldShmRank = 0 ;
WorldShmSize = 1 ;
WorldNodes = 1 ;
WorldNode = 0 ;
WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0;
WorldShmCommBufs.resize(1);
_ShmSetup=1;
}
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
{
optimal_comm = WorldComm;
}
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs mapping intended, use anonymous mmap
////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
void * ShmCommBuf ;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
int mmap_flag =0;
#ifdef MAP_ANONYMOUS
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
#endif
#ifdef MAP_ANON
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON;
#endif
#ifdef MAP_HUGETLB
if ( flags ) mmap_flag |= MAP_HUGETLB;
#endif
ShmCommBuf =(void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
if (ShmCommBuf == (void *)MAP_FAILED) {
perror("mmap failed ");
exit(EXIT_FAILURE);
}
#ifdef MADV_HUGEPAGE
if (!Hugepages ) madvise(ShmCommBuf,bytes,MADV_HUGEPAGE);
#endif
bzero(ShmCommBuf,bytes);
WorldShmCommBufs[0] = ShmCommBuf;
_ShmAllocBytes=bytes;
_ShmAlloc=1;
};
////////////////////////////////////////////////////////
// Global shared functionality finished
// Now move to per communicator functionality
////////////////////////////////////////////////////////
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
{
assert(GlobalSharedMemory::ShmAlloc()==1);
ShmRanks.resize(1);
ShmCommBufs.resize(1);
ShmRanks[0] = 0;
ShmRank = 0;
ShmSize = 1;
//////////////////////////////////////////////////////////////////////
// Map ShmRank to WorldShmRank and use the right buffer
//////////////////////////////////////////////////////////////////////
ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0];
heap_size = GlobalSharedMemory::ShmAllocBytes();
ShmBufferFreeAll();
return;
}
//////////////////////////////////////////////////////////////////
// On node barrier
//////////////////////////////////////////////////////////////////
void SharedMemory::ShmBarrier(void){ return ; }
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// Test the shared memory is working
//////////////////////////////////////////////////////////////////////////////////////////////////////////
void SharedMemory::SharedMemoryTest(void) { return; }
void *SharedMemory::ShmBuffer(int rank)
{
return NULL;
}
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
{
return NULL;
}
SharedMemory::~SharedMemory()
{};
}

View File

@@ -42,7 +42,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/cshift/Cshift_mpi.h>
#endif
#ifdef GRID_COMMS_MPI3L
#ifdef GRID_COMMS_MPIT
#include <Grid/cshift/Cshift_mpi.h>
#endif

View File

@@ -1,4 +1,3 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@@ -31,21 +30,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
namespace Grid {
template<class vobj>
class SimpleCompressor {
public:
void Point(int) {};
vobj operator() (const vobj &arg) {
return arg;
}
};
///////////////////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split with compression
// Gather for when there is no need to SIMD split
///////////////////////////////////////////////////////////////////
template<class vobj,class cobj,class compressor> void
Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
template<class vobj> void
Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimension,int plane,int cbmask, int off=0)
{
int rd = rhs._grid->_rdimensions[dimension];
@@ -54,45 +43,43 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimen
}
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int ent = 0;
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int stride=rhs._grid->_slice_stride[dimension];
if ( cbmask == 0x3 ) {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*stride;
int bo = n*e2;
buffer[off+bo+b]=compress(rhs._odata[so+o+b]);
table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
}
}
} else {
int bo=0;
std::vector<std::pair<int,int> > table;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*stride;
int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb &cbmask ) {
table.push_back(std::pair<int,int> (bo++,o+b));
table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
}
}
}
PARALLEL_FOR_LOOP
for(int i=0;i<table.size();i++){
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
}
parallel_for(int i=0;i<ent;i++){
buffer[table[i].first]=rhs._odata[table[i].second];
}
}
///////////////////////////////////////////////////////////////////
// Gather for when there *is* need to SIMD split with compression
// Gather for when there *is* need to SIMD split
///////////////////////////////////////////////////////////////////
template<class cobj,class vobj,class compressor> void
Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_object *> pointers,int dimension,int plane,int cbmask,compressor &compress)
template<class vobj> void
Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask)
{
int rd = rhs._grid->_rdimensions[dimension];
@@ -105,57 +92,40 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int n1=rhs._grid->_slice_stride[dimension];
int n2=rhs._grid->_slice_block[dimension];
if ( cbmask ==0x3){
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*n1;
int offset = b+n*n2;
cobj temp =compress(rhs._odata[so+o+b]);
int offset = b+n*e2;
extract<cobj>(temp,pointers,offset);
vobj temp =rhs._odata[so+o+b];
extract<vobj>(temp,pointers,offset);
}
}
} else {
assert(0); //Fixme think this is buggy
for(int n=0;n<e1;n++){
// Case of SIMD split AND checker dim cannot currently be hit, except in
// Test_cshift_red_black code.
std::cout << " Dense packed buffer WARNING " <<std::endl;
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o=n*rhs._grid->_slice_stride[dimension];
int o=n*n1;
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
int offset = b+n*rhs._grid->_slice_block[dimension];
int offset = b+n*e2;
if ( ocb & cbmask ) {
cobj temp =compress(rhs._odata[so+o+b]);
extract<cobj>(temp,pointers,offset);
vobj temp =rhs._odata[so+o+b];
extract<vobj>(temp,pointers,offset);
}
}
}
}
}
//////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split
//////////////////////////////////////////////////////
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
{
SimpleCompressor<vobj> dontcompress;
Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
}
//////////////////////////////////////////////////////
// Gather for when there *is* need to SIMD split
//////////////////////////////////////////////////////
template<class vobj> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask)
{
SimpleCompressor<vobj> dontcompress;
Gather_plane_extract<vobj,vobj,decltype(dontcompress)>(rhs,pointers,dimension,plane,cbmask,dontcompress);
}
//////////////////////////////////////////////////////
// Scatter for when there is no need to SIMD split
//////////////////////////////////////////////////////
@@ -171,35 +141,43 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int stride=rhs._grid->_slice_stride[dimension];
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent =0;
if ( cbmask ==0x3 ) {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
rhs._odata[so+o+b]=buffer[bo+b];
table[ent++] = std::pair<int,int>(so+o+b,bo+b);
}
}
} else {
int bo=0;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) {
rhs._odata[so+o+b]=buffer[bo++];
table[ent++]=std::pair<int,int> (so+o+b,bo++);
}
}
}
}
parallel_for(int i=0;i<ent;i++){
rhs._odata[table[i].first]=buffer[table[i].second];
}
}
//////////////////////////////////////////////////////
// Scatter for when there *is* need to SIMD split
//////////////////////////////////////////////////////
template<class vobj,class cobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<cobj *> pointers,int dimension,int plane,int cbmask)
template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask)
{
int rd = rhs._grid->_rdimensions[dimension];
@@ -213,8 +191,7 @@ PARALLEL_NESTED_LOOP2
int e2=rhs._grid->_slice_block[dimension];
if(cbmask ==0x3 ) {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int offset = b+n*rhs._grid->_slice_block[dimension];
@@ -222,7 +199,11 @@ PARALLEL_NESTED_LOOP2
}
}
} else {
assert(0); // think this is buggy FIXME
// Case of SIMD split AND checker dim cannot currently be hit, except in
// Test_cshift_red_black code.
// std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME
std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
@@ -253,31 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
int e2=rhs._grid->_slice_block[dimension];
int stride = rhs._grid->_slice_stride[dimension];
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent=0;
if(cbmask == 0x3 ){
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
//lhs._odata[lo+o]=rhs._odata[ro+o];
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
table[ent++] = std::pair<int,int>(lo+o,ro+o);
}
}
} else {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
if ( ocb&cbmask ) {
//lhs._odata[lo+o]=rhs._odata[ro+o];
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
table[ent++] = std::pair<int,int>(lo+o,ro+o);
}
}
}
}
parallel_for(int i=0;i<ent;i++){
lhs._odata[table[i].first]=rhs._odata[table[i].second];
}
}
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
@@ -295,17 +277,29 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block [dimension];
int stride = rhs._grid->_slice_stride[dimension];
PARALLEL_NESTED_LOOP2
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
int ent=0;
double t_tab,t_perm;
if ( cbmask == 0x3 ) {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
}}
} else {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) {
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
}}
}
}}
parallel_for(int i=0;i<ent;i++){
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
}
}
//////////////////////////////////////////////////////
@@ -318,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
double t_local;
if ( sshift[0] == sshift[1] ) {
Cshift_local(ret,rhs,dimension,shift,0x3);
} else {
@@ -326,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
}
}
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
{
GridBase *grid = rhs._grid;
int fd = grid->_fdimensions[dimension];
@@ -338,8 +334,8 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
// Map to always positive shift modulo global full dimension.
shift = (shift+fd)%fd;
ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
// the permute type
ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
int permute_dim =grid->PermuteDim(dimension);
int permute_type=grid->PermuteType(dimension);
int permute_type_dist;
@@ -348,22 +344,31 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
int o = 0;
int bo = x * grid->_ostride[dimension];
int cb= (cbmask==0x2)? Odd : Even;
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
int sx = (x+sshift)%rd;
// FIXME : This must change where we have a
// Rotate slice.
// Document how this works ; why didn't I do this when I first wrote it...
// wrap is whether sshift > rd.
// num is sshift mod rd.
//
// shift 7
//
// XoXo YcYc
// oXoX cYcY
// XoXo YcYc
// oXoX cYcY
//
// sshift --
//
// XX YY ; 3
// XX YY ; 0
// XX YY ; 3
// XX YY ; 0
//
int permute_slice=0;
if(permute_dim){
int wrap = sshift/rd;
int wrap = sshift/rd; wrap=wrap % ly;
int num = sshift%rd;
if ( x< rd-num ) permute_slice=wrap;
@@ -375,15 +380,12 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
} else {
permute_type_dist = permute_type;
}
}
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
}
return ret;
}
}
#endif

View File

@@ -54,13 +54,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
if ( !comm_dim ) {
// std::cout << "Cshift_local" <<std::endl;
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
} else if ( splice_dim ) {
// std::cout << "Cshift_comms_simd" <<std::endl;
//std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift);
} else {
// std::cout << "Cshift_comms" <<std::endl;
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift);
}
return ret;
@@ -74,7 +74,6 @@ template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &r
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
// std::cout << "Cshift_comms dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if ( sshift[0] == sshift[1] ) {
// std::cout << "Single pass Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift,0x3);
@@ -92,9 +91,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vob
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
//std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if ( sshift[0] == sshift[1] ) {
//std::cout << "Single pass Cshift_comms" <<std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
} else {
//std::cout << "Two pass Cshift_comms" <<std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
}
@@ -154,10 +156,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
(void *)&recv_buf[0],
recv_from_rank,
bytes);
grid->Barrier();
// for(int i=0;i<words;i++){
// std::cout << "SendRecv ["<<i<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << " 0x" << cbmask<<std::endl;
// }
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
}
}
@@ -178,6 +178,10 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
int simd_layout = grid->_simd_layout[dimension];
int comm_dim = grid->_processors[dimension] >1 ;
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
assert(comm_dim==1);
assert(simd_layout==2);
assert(shift>=0);
@@ -243,7 +247,7 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
(void *)&recv_buf_extract[i][0],
recv_from_rank,
bytes);
grid->Barrier();
rpointers[i] = &recv_buf_extract[i][0];
} else {
rpointers[i] = &send_buf_extract[nbr_lane][0];

18920
Grid/json/json.hpp Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -39,8 +39,7 @@ namespace Grid {
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mult(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
@@ -56,8 +55,7 @@ PARALLEL_FOR_LOOP
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mac(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
@@ -73,8 +71,7 @@ PARALLEL_FOR_LOOP
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
sub(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
@@ -89,8 +86,7 @@ PARALLEL_FOR_LOOP
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
add(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
@@ -108,8 +104,7 @@ PARALLEL_FOR_LOOP
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(lhs,ret);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
obj1 tmp;
mult(&tmp,&lhs._odata[ss],&rhs);
vstream(ret._odata[ss],tmp);
@@ -120,8 +115,7 @@ PARALLEL_FOR_LOOP
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(ret,lhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
obj1 tmp;
mac(&tmp,&lhs._odata[ss],&rhs);
vstream(ret._odata[ss],tmp);
@@ -132,8 +126,7 @@ PARALLEL_FOR_LOOP
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(ret,lhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
sub(&tmp,&lhs._odata[ss],&rhs);
@@ -147,8 +140,7 @@ PARALLEL_FOR_LOOP
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(lhs,ret);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
add(&tmp,&lhs._odata[ss],&rhs);
@@ -166,8 +158,7 @@ PARALLEL_FOR_LOOP
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mult(&tmp,&lhs,&rhs._odata[ss]);
@@ -182,8 +173,7 @@ PARALLEL_FOR_LOOP
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mac(&tmp,&lhs,&rhs._odata[ss]);
@@ -198,8 +188,7 @@ PARALLEL_FOR_LOOP
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
sub(&tmp,&lhs,&rhs._odata[ss]);
@@ -213,8 +202,7 @@ PARALLEL_FOR_LOOP
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
add(&tmp,&lhs,&rhs._odata[ss]);
@@ -230,8 +218,7 @@ PARALLEL_FOR_LOOP
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
PARALLEL_FOR_LOOP
for(int ss=0;ss<x._grid->oSites();ss++){
parallel_for(int ss=0;ss<x._grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = a*x._odata[ss]+y._odata[ss];
vstream(ret._odata[ss],tmp);
@@ -245,8 +232,7 @@ PARALLEL_FOR_LOOP
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
PARALLEL_FOR_LOOP
for(int ss=0;ss<x._grid->oSites();ss++){
parallel_for(int ss=0;ss<x._grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vstream(ret._odata[ss],tmp);
@@ -258,19 +244,11 @@ PARALLEL_FOR_LOOP
template<class sobj,class vobj> strong_inline
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
axpy(ret,a,x,y);
return norm2(ret);
return axpy_norm_fast(ret,a,x,y);
}
template<class sobj,class vobj> strong_inline
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
axpby(ret,a,b,x,y);
return norm2(ret); // FIXME implement parallel norm in ss loop
return axpby_norm_fast(ret,a,b,x,y);
}
}

View File

@@ -121,8 +121,7 @@ public:
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
@@ -144,8 +143,7 @@ PARALLEL_FOR_LOOP
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
@@ -167,8 +165,7 @@ PARALLEL_FOR_LOOP
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
#ifdef STREAMING_STORES
//vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,eval(ss,expr));
@@ -191,8 +188,7 @@ PARALLEL_FOR_LOOP
checkerboard=cb;
_odata.resize(_grid->oSites());
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
@@ -213,8 +209,7 @@ PARALLEL_FOR_LOOP
checkerboard=cb;
_odata.resize(_grid->oSites());
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
@@ -235,8 +230,7 @@ PARALLEL_FOR_LOOP
checkerboard=cb;
_odata.resize(_grid->oSites());
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
vstream(_odata[ss] ,eval(ss,expr));
}
};
@@ -258,34 +252,65 @@ PARALLEL_FOR_LOOP
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
}
Lattice(Lattice&& r){ // move constructor
_grid = r._grid;
checkerboard = r.checkerboard;
_odata=std::move(r._odata);
}
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
{
_grid = r._grid;
checkerboard = r.checkerboard;
_odata =std::move(r._odata);
return *this;
}
virtual ~Lattice(void) = default;
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r;
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
return *this;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;
}
virtual ~Lattice(void) = default;
void reset(GridBase* grid) {
if (_grid != grid) {
_grid = grid;
_odata.resize(grid->oSites());
checkerboard = 0;
}
}
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r;
}
return *this;
}
// *=,+=,-= operators inherit behvour from correspond */+/- operation
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
*this = (*this)*r;

View File

@@ -47,8 +47,7 @@ namespace Grid {
inline Lattice<vInteger> LLComparison(vfunctor op,const Lattice<lobj> &lhs,const Lattice<robj> &rhs)
{
Lattice<vInteger> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=op(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
@@ -60,8 +59,7 @@ PARALLEL_FOR_LOOP
inline Lattice<vInteger> LSComparison(vfunctor op,const Lattice<lobj> &lhs,const robj &rhs)
{
Lattice<vInteger> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
ret._odata[ss]=op(lhs._odata[ss],rhs);
}
return ret;
@@ -73,8 +71,7 @@ PARALLEL_FOR_LOOP
inline Lattice<vInteger> SLComparison(vfunctor op,const lobj &lhs,const Lattice<robj> &rhs)
{
Lattice<vInteger> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=op(lhs._odata[ss],rhs);
}
return ret;
@@ -168,6 +165,5 @@ PARALLEL_FOR_LOOP
inline Lattice<vInteger> operator != (const lobj & lhs, const Lattice<robj> & rhs) {
return SLComparison(vne<lobj,robj>(),lhs,rhs);
}
}
#endif

View File

@@ -179,7 +179,7 @@ namespace Grid {
return ret;
}
#define DECLARE_RELATIONAL(op,functor) \
#define DECLARE_RELATIONAL_EQ(op,functor) \
template<class vsimd,IfSimd<vsimd> = 0>\
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
{\
@@ -198,11 +198,6 @@ namespace Grid {
typedef typename vsimd::scalar_type scalar;\
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
}\
template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
{ \
return lhs._internal op rhs._internal; \
} \
template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
{ \
@@ -212,14 +207,21 @@ namespace Grid {
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
{ \
return lhs op rhs._internal; \
}
} \
#define DECLARE_RELATIONAL(op,functor) \
DECLARE_RELATIONAL_EQ(op,functor) \
template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
{ \
return lhs._internal op rhs._internal; \
}
DECLARE_RELATIONAL(<,slt);
DECLARE_RELATIONAL(<=,sle);
DECLARE_RELATIONAL(>,sgt);
DECLARE_RELATIONAL(>=,sge);
DECLARE_RELATIONAL(==,seq);
DECLARE_RELATIONAL_EQ(==,seq);
DECLARE_RELATIONAL(!=,sne);
#undef DECLARE_RELATIONAL

View File

@@ -52,23 +52,5 @@ namespace Grid {
}
};
// LatticeCoordinate();
// FIXME for debug; deprecate this; made obscelete by
template<class vobj> void lex_sites(Lattice<vobj> &l){
Real *v_ptr = (Real *)&l._odata[0];
size_t o_len = l._grid->oSites();
size_t v_len = sizeof(vobj)/sizeof(vRealF);
size_t vec_len = vRealF::Nsimd();
for(int i=0;i<o_len;i++){
for(int j=0;j<v_len;j++){
for(int vv=0;vv<vec_len;vv+=2){
v_ptr[i*v_len*vec_len+j*vec_len+vv ]= i+vv*500;
v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
}
}}
}
}
#endif

View File

@@ -43,8 +43,7 @@ namespace Grid {
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
{
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
}
return ret;
@@ -55,8 +54,7 @@ PARALLEL_FOR_LOOP
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs) -> Lattice<typename vobj::tensor_reduced>
{
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
@@ -68,13 +66,10 @@ PARALLEL_FOR_LOOP
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
{
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
}
#endif

View File

@@ -37,8 +37,7 @@ namespace Grid {
inline Lattice<vobj> operator -(const Lattice<vobj> &r)
{
Lattice<vobj> ret(r._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<r._grid->oSites();ss++){
parallel_for(int ss=0;ss<r._grid->oSites();ss++){
vstream(ret._odata[ss], -r._odata[ss]);
}
return ret;
@@ -74,8 +73,7 @@ PARALLEL_FOR_LOOP
inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
{
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
decltype(lhs*rhs._odata[0]) tmp=lhs*rhs._odata[ss];
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs*rhs._odata[ss];
@@ -86,8 +84,7 @@ PARALLEL_FOR_LOOP
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs+rhs._odata[0])>
{
Lattice<decltype(lhs+rhs._odata[0])> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
decltype(lhs+rhs._odata[0]) tmp =lhs-rhs._odata[ss];
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs+rhs._odata[ss];
@@ -98,11 +95,9 @@ PARALLEL_FOR_LOOP
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs-rhs._odata[0])>
{
Lattice<decltype(lhs-rhs._odata[0])> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
decltype(lhs-rhs._odata[0]) tmp=lhs-rhs._odata[ss];
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs-rhs._odata[ss];
}
return ret;
}
@@ -110,8 +105,7 @@ PARALLEL_FOR_LOOP
inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
{
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
decltype(lhs._odata[0]*rhs) tmp =lhs._odata[ss]*rhs;
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs._odata[ss]*rhs;
@@ -122,8 +116,7 @@ PARALLEL_FOR_LOOP
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]+rhs)>
{
Lattice<decltype(lhs._odata[0]+rhs)> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
decltype(lhs._odata[0]+rhs) tmp=lhs._odata[ss]+rhs;
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs._odata[ss]+rhs;
@@ -134,15 +127,12 @@ PARALLEL_FOR_LOOP
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]-rhs)>
{
Lattice<decltype(lhs._odata[0]-rhs)> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
decltype(lhs._odata[0]-rhs) tmp=lhs._odata[ss]-rhs;
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs._odata[ss]-rhs;
}
return ret;
}
}
#endif

View File

@@ -44,8 +44,7 @@ namespace Grid {
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
ret.checkerboard=lhs.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
}
return ret;
@@ -55,8 +54,7 @@ PARALLEL_FOR_LOOP
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
ret.checkerboard=lhs.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
}
return ret;
@@ -68,16 +66,14 @@ PARALLEL_FOR_LOOP
template<int Index,class vobj>
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
{
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
}
}
template<int Index,class vobj>
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
{
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
}
}
@@ -131,9 +127,6 @@ PARALLEL_FOR_LOOP
assert( l.checkerboard == l._grid->CheckerBoard(site));
// FIXME
// assert( sizeof(sobj)*Nsimd == sizeof(vobj));
int rank,odx,idx;
grid->GlobalCoorToRankIndex(rank,odx,idx,site);

View File

@@ -40,8 +40,7 @@ namespace Grid {
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = adj(lhs._odata[ss]);
}
return ret;
@@ -49,13 +48,10 @@ PARALLEL_FOR_LOOP
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = conjugate(lhs._odata[ss]);
}
return ret;
};
}
#endif

View File

@@ -0,0 +1,733 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_reduction.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_LATTICE_REDUCTION_H
#define GRID_LATTICE_REDUCTION_H
#include <Grid/Grid_Eigen_Dense.h>
namespace Grid {
#ifdef GRID_WARN_SUBOPTIMAL
#warning "Optimisation alert all these reduction loops are NOT threaded "
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////
// Deterministic Reduction operations
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
auto nrm = innerProduct(arg,arg);
return std::real(nrm);
}
// Double inner product
template<class vobj>
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
{
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type;
GridBase *grid = left._grid;
const int pad = 8;
ComplexD inner;
Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
for(int ss=myoff;ss<mywork+myoff; ss++){
vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
}
// All threads sum across SIMD; reduce serial work at end
// one write per cacheline with streaming store
ComplexD tmp = Reduce(TensorRemove(vinner)) ;
vstream(sumarray[thr*pad],tmp);
}
inner=0.0;
for(int i=0;i<grid->SumArraySize();i++){
inner = inner+sumarray[i*pad];
}
right._grid->GlobalSum(inner);
return inner;
}
/////////////////////////
// Fast axpby_norm
// z = a x + b y
// return norm z
/////////////////////////
template<class sobj,class vobj> strong_inline RealD
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
{
sobj one(1.0);
return axpby_norm_fast(z,a,one,x,y);
}
template<class sobj,class vobj> strong_inline RealD
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
{
const int pad = 8;
z.checkerboard = x.checkerboard;
conformable(z,x);
conformable(x,y);
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type;
RealD nrm;
GridBase *grid = x._grid;
Vector<RealD> sumarray(grid->SumArraySize()*pad);
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(x._grid->oSites(),thr,mywork,myoff);
// private to thread; sub summation
decltype(innerProductD(z._odata[0],z._odata[0])) vnrm=zero;
for(int ss=myoff;ss<mywork+myoff; ss++){
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vnrm = vnrm + innerProductD(tmp,tmp);
vstream(z._odata[ss],tmp);
}
vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
}
nrm = 0.0; // sum across threads; linear in thread count but fast
for(int i=0;i<grid->SumArraySize();i++){
nrm = nrm+sumarray[i*pad];
}
z._grid->GlobalSum(nrm);
return nrm;
}
template<class Op,class T1>
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
{
return sum(closure(expr));
}
template<class Op,class T1,class T2>
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
{
return sum(closure(expr));
}
template<class Op,class T1,class T2,class T3>
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
eval(0,std::get<1>(expr.second)),
eval(0,std::get<2>(expr.second))
))::scalar_object
{
return sum(closure(expr));
}
template<class vobj>
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
{
GridBase *grid=arg._grid;
int Nsimd = grid->Nsimd();
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
for(int i=0;i<grid->SumArraySize();i++){
sumarray[i]=zero;
}
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
vobj vvsum=zero;
for(int ss=myoff;ss<mywork+myoff; ss++){
vvsum = vvsum + arg._odata[ss];
}
sumarray[thr]=vvsum;
}
vobj vsum=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vsum = vsum+sumarray[i];
}
typedef typename vobj::scalar_object sobj;
sobj ssum=zero;
std::vector<sobj> buf(Nsimd);
extract(vsum,buf);
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
arg._grid->GlobalSum(ssum);
return ssum;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// sliceSum, sliceInnerProduct, sliceAxpy, sliceNorm etc...
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
{
///////////////////////////////////////////////////////
// FIXME precision promoted summation
// may be important for correlation functions
// But easily avoided by using double precision fields
///////////////////////////////////////////////////////
typedef typename vobj::scalar_object sobj;
GridBase *grid = Data._grid;
assert(grid!=NULL);
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node
for(int r=0;r<rd;r++){
lvSum[r]=zero;
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
// sum over reduced dimension planes, breaking out orthog dir
// Parallel over orthog direction
parallel_for(int r=0;r<rd;r++){
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss= so+n*stride+b;
lvSum[r]=lvSum[r]+Data._odata[ss];
}
}
}
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
for(int rt=0;rt<rd;rt++){
extract(lvSum[rt],extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx];
}
}
// sum over nodes.
sobj gsum;
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
gsum=lsSum[lt];
} else {
gsum=zero;
}
grid->GlobalSum(gsum);
result[t]=gsum;
}
}
template<class vobj>
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
{
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
typedef typename vobj::scalar_type scalar_type;
std::vector<scalar_type> lsSum;
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
}
template <class vobj>
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
{
// std::cout << GridLogMessage << "Start prep" << std::endl;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
GridBase *grid = lhs._grid;
assert(grid!=NULL);
conformable(grid,rhs._grid);
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
// std::cout << GridLogMessage << "Start alloc" << std::endl;
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
std::vector<iScalar<scalar_type>> extracted(Nsimd); // splitting the SIMD
// std::cout << GridLogMessage << "End alloc" << std::endl;
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
for(int r=0;r<rd;r++){
lvSum[r]=zero;
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
// std::cout << GridLogMessage << "End prep" << std::endl;
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
vector_type vv;
parallel_for(int r=0;r<rd;r++)
{
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss = so + n * stride + b;
vv = TensorRemove(innerProduct(lhs._odata[ss], rhs._odata[ss]));
lvSum[r] = lvSum[r] + vv;
}
}
}
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
for(int rt=0;rt<rd;rt++){
iScalar<vector_type> temp;
temp._internal = lvSum[rt];
extract(temp,extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
}
}
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
}
template <class vobj>
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
{
typedef typename vobj::scalar_type scalar_type;
GridBase *grid = lhs._grid;
int fd = result.size();
int ld = lsSum.size();
// sum over nodes.
std::vector<scalar_type> gsum;
gsum.resize(fd, scalar_type(0.0));
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
gsum[t]=lsSum[lt];
}
}
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
grid->GlobalSumVector(&gsum[0], fd);
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
result = gsum;
}
template<class vobj>
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
{
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
GridBase *grid = lhs._grid;
assert(grid!=NULL);
conformable(grid,rhs._grid);
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
std::vector<scalar_type > lsSum(ld,scalar_type(0.0)); // sum across these down to scalars
std::vector<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
for(int r=0;r<rd;r++){
lvSum[r]=zero;
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
parallel_for(int r=0;r<rd;r++){
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss= so+n*stride+b;
vector_type vv = TensorRemove(innerProduct(lhs._odata[ss],rhs._odata[ss]));
lvSum[r]=lvSum[r]+vv;
}
}
}
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
for(int rt=0;rt<rd;rt++){
iScalar<vector_type> temp;
temp._internal = lvSum[rt];
extract(temp,extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
}
}
// sum over nodes.
scalar_type gsum;
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
gsum=lsSum[lt];
} else {
gsum=scalar_type(0.0);
}
grid->GlobalSum(gsum);
result[t]=gsum;
}
}
template<class vobj>
static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = rhs._grid->GlobalDimensions()[Orthog];
std::vector<ComplexD> ip(Nblock);
sn.resize(Nblock);
sliceInnerProductVector(ip,rhs,rhs,Orthog);
for(int ss=0;ss<Nblock;ss++){
sn[ss] = real(ip[ss]);
}
};
template<class vobj>
static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
int orthogdim,RealD scale=1.0)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::tensor_reduced tensor_reduced;
scalar_type zscale(scale);
GridBase *grid = X._grid;
int Nsimd =grid->Nsimd();
int Nblock =grid->GlobalDimensions()[orthogdim];
int fd =grid->_fdimensions[orthogdim];
int ld =grid->_ldimensions[orthogdim];
int rd =grid->_rdimensions[orthogdim];
int e1 =grid->_slice_nblock[orthogdim];
int e2 =grid->_slice_block [orthogdim];
int stride =grid->_slice_stride[orthogdim];
std::vector<int> icoor;
for(int r=0;r<rd;r++){
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
vector_type av;
for(int l=0;l<Nsimd;l++){
grid->iCoorFromIindex(icoor,l);
int ldx =r+icoor[orthogdim]*rd;
scalar_type *as =(scalar_type *)&av;
as[l] = scalar_type(a[ldx])*zscale;
}
tensor_reduced at; at=av;
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss= so+n*stride+b;
R._odata[ss] = at*X._odata[ss]+Y._odata[ss];
}
}
}
};
/*
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
{
int NN = BlockSolverGrid->_ndimension;
int nsimd = BlockSolverGrid->Nsimd();
std::vector<int> latt_phys(0);
std::vector<int> simd_phys(0);
std::vector<int> mpi_phys(0);
for(int d=0;d<NN;d++){
if( d!=Orthog ) {
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
}
}
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
}
*/
template<class vobj>
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
// Lattice<vobj> Xslice(SliceGrid);
// Lattice<vobj> Rslice(SliceGrid);
assert( FullGrid->_simd_layout[Orthog]==1);
int nh = FullGrid->_ndimension;
// int nl = SliceGrid->_ndimension;
int nl = nh-1;
//FIXME package in a convenient iterator
//Should loop over a plane orthogonal to direction "Orthog"
int stride=FullGrid->_slice_stride[Orthog];
int block =FullGrid->_slice_block [Orthog];
int nblock=FullGrid->_slice_nblock[Orthog];
int ostride=FullGrid->_ostride[Orthog];
#pragma omp parallel
{
std::vector<vobj> s_x(Nblock);
#pragma omp for collapse(2)
for(int n=0;n<nblock;n++){
for(int b=0;b<block;b++){
int o = n*stride + b;
for(int i=0;i<Nblock;i++){
s_x[i] = X[o+i*ostride];
}
vobj dot;
for(int i=0;i<Nblock;i++){
dot = Y[o+i*ostride];
for(int j=0;j<Nblock;j++){
dot = dot + s_x[j]*(scale*aa(j,i));
}
R[o+i*ostride]=dot;
}
}}
}
};
template<class vobj>
static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
// Lattice<vobj> Xslice(SliceGrid);
// Lattice<vobj> Rslice(SliceGrid);
assert( FullGrid->_simd_layout[Orthog]==1);
int nh = FullGrid->_ndimension;
// int nl = SliceGrid->_ndimension;
int nl=1;
//FIXME package in a convenient iterator
//Should loop over a plane orthogonal to direction "Orthog"
int stride=FullGrid->_slice_stride[Orthog];
int block =FullGrid->_slice_block [Orthog];
int nblock=FullGrid->_slice_nblock[Orthog];
int ostride=FullGrid->_ostride[Orthog];
#pragma omp parallel
{
std::vector<vobj> s_x(Nblock);
#pragma omp for collapse(2)
for(int n=0;n<nblock;n++){
for(int b=0;b<block;b++){
int o = n*stride + b;
for(int i=0;i<Nblock;i++){
s_x[i] = X[o+i*ostride];
}
vobj dot;
for(int i=0;i<Nblock;i++){
dot = s_x[0]*(scale*aa(0,i));
for(int j=1;j<Nblock;j++){
dot = dot + s_x[j]*(scale*aa(j,i));
}
R[o+i*ostride]=dot;
}
}}
}
};
template<class vobj>
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
GridBase *FullGrid = lhs._grid;
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
int Nblock = FullGrid->GlobalDimensions()[Orthog];
// Lattice<vobj> Lslice(SliceGrid);
// Lattice<vobj> Rslice(SliceGrid);
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
assert( FullGrid->_simd_layout[Orthog]==1);
int nh = FullGrid->_ndimension;
// int nl = SliceGrid->_ndimension;
int nl = nh-1;
//FIXME package in a convenient iterator
//Should loop over a plane orthogonal to direction "Orthog"
int stride=FullGrid->_slice_stride[Orthog];
int block =FullGrid->_slice_block [Orthog];
int nblock=FullGrid->_slice_nblock[Orthog];
int ostride=FullGrid->_ostride[Orthog];
typedef typename vobj::vector_typeD vector_typeD;
#pragma omp parallel
{
std::vector<vobj> Left(Nblock);
std::vector<vobj> Right(Nblock);
Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock);
#pragma omp for collapse(2)
for(int n=0;n<nblock;n++){
for(int b=0;b<block;b++){
int o = n*stride + b;
for(int i=0;i<Nblock;i++){
Left [i] = lhs[o+i*ostride];
Right[i] = rhs[o+i*ostride];
}
for(int i=0;i<Nblock;i++){
for(int j=0;j<Nblock;j++){
auto tmp = innerProduct(Left[i],Right[j]);
auto rtmp = TensorRemove(tmp);
mat_thread(i,j) += Reduce(rtmp);
}}
}}
#pragma omp critical
{
mat += mat_thread;
}
}
for(int i=0;i<Nblock;i++){
for(int j=0;j<Nblock;j++){
ComplexD sum = mat(i,j);
FullGrid->GlobalSum(sum);
mat(i,j)=sum;
}}
return;
}
} /*END NAMESPACE GRID*/
#endif

516
Grid/lattice/Lattice_rng.h Normal file
View File

@@ -0,0 +1,516 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_rng.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_LATTICE_RNG_H
#define GRID_LATTICE_RNG_H
#include <random>
#ifdef RNG_SITMO
#include <Grid/sitmo_rng/sitmo_prng_engine.hpp>
#endif
#if defined(RNG_SITMO)
#define RNG_FAST_DISCARD
#else
#undef RNG_FAST_DISCARD
#endif
namespace Grid {
//////////////////////////////////////////////////////////////
// Allow the RNG state to be less dense than the fine grid
//////////////////////////////////////////////////////////////
inline int RNGfillable(GridBase *coarse,GridBase *fine)
{
int rngdims = coarse->_ndimension;
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
int lowerdims = fine->_ndimension - coarse->_ndimension;
assert(lowerdims >= 0);
for(int d=0;d<lowerdims;d++){
assert(fine->_simd_layout[d]==1);
assert(fine->_processors[d]==1);
}
int multiplicity=1;
for(int d=0;d<lowerdims;d++){
multiplicity=multiplicity*fine->_rdimensions[d];
}
// local and global volumes subdivide cleanly after SIMDization
for(int d=0;d<rngdims;d++){
int fd= d+lowerdims;
assert(coarse->_processors[d] == fine->_processors[fd]);
assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]);
assert(((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d])==fine->_rdimensions[fd]);
multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d];
}
return multiplicity;
}
// merge of April 11 2017
// this function is necessary for the LS vectorised field
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
{
int rngdims = coarse->_ndimension;
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0);
// assumes that the higher dimensions are not using more processors
// all further divisions are local
for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
// then divide the number of local sites
// check that the total number of sims agree, meanse the iSites are the same
assert(fine->Nsimd() == coarse->Nsimd());
// check that the two grids divide cleanly
assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() );
return fine->lSites() / coarse->lSites();
}
// real scalars are one component
template<class scalar,class distribution,class generator>
void fillScalar(scalar &s,distribution &dist,generator & gen)
{
s=dist(gen);
}
template<class distribution,class generator>
void fillScalar(ComplexF &s,distribution &dist, generator &gen)
{
s=ComplexF(dist(gen),dist(gen));
}
template<class distribution,class generator>
void fillScalar(ComplexD &s,distribution &dist,generator &gen)
{
s=ComplexD(dist(gen),dist(gen));
}
class GridRNGbase {
public:
// One generator per site.
// Uniform and Gaussian distributions from these generators.
#ifdef RNG_RANLUX
typedef std::ranlux48 RngEngine;
typedef uint64_t RngStateType;
static const int RngStateCount = 15;
#endif
#ifdef RNG_MT19937
typedef std::mt19937 RngEngine;
typedef uint32_t RngStateType;
static const int RngStateCount = std::mt19937::state_size;
#endif
#ifdef RNG_SITMO
typedef sitmo::prng_engine RngEngine;
typedef uint64_t RngStateType;
static const int RngStateCount = 13;
#endif
std::vector<RngEngine> _generators;
std::vector<std::uniform_real_distribution<RealD> > _uniform;
std::vector<std::normal_distribution<RealD> > _gaussian;
std::vector<std::discrete_distribution<int32_t> > _bernoulli;
std::vector<std::uniform_int_distribution<uint32_t> > _uid;
///////////////////////
// support for parallel init
///////////////////////
#ifdef RNG_FAST_DISCARD
static void Skip(RngEngine &eng,uint64_t site)
{
/////////////////////////////////////////////////////////////////////////////////////
// Skip by 2^40 elements between successive lattice sites
// This goes by 10^12.
// Consider quenched updating; likely never exceeding rate of 1000 sweeps
// per second on any machine. This gives us of order 10^9 seconds, or 100 years
// skip ahead.
// For HMC unlikely to go at faster than a solve per second, and
// tens of seconds per trajectory so this is clean in all reasonable cases,
// and margin of safety is orders of magnitude.
// We could hack Sitmo to skip in the higher order words of state if necessary
//
// Replace with 2^30 ; avoid problem on large volumes
//
/////////////////////////////////////////////////////////////////////////////////////
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
const int shift = 30;
uint64_t skip = site;
skip = skip<<shift;
assert((skip >> shift)==site); // check for overflow
eng.discard(skip);
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
}
#endif
static RngEngine Reseed(RngEngine &eng)
{
std::vector<uint32_t> newseed;
std::uniform_int_distribution<uint32_t> uid;
return Reseed(eng,newseed,uid);
}
static RngEngine Reseed(RngEngine &eng,std::vector<uint32_t> & newseed,
std::uniform_int_distribution<uint32_t> &uid)
{
const int reseeds=4;
newseed.resize(reseeds);
for(int i=0;i<reseeds;i++){
newseed[i] = uid(eng);
}
std::seed_seq sseq(newseed.begin(),newseed.end());
return RngEngine(sseq);
}
void GetState(std::vector<RngStateType> & saved,RngEngine &eng) {
saved.resize(RngStateCount);
std::stringstream ss;
ss<<eng;
ss.seekg(0,ss.beg);
for(int i=0;i<RngStateCount;i++){
ss>>saved[i];
}
}
void GetState(std::vector<RngStateType> & saved,int gen) {
GetState(saved,_generators[gen]);
}
void SetState(std::vector<RngStateType> & saved,RngEngine &eng){
assert(saved.size()==RngStateCount);
std::stringstream ss;
for(int i=0;i<RngStateCount;i++){
ss<< saved[i]<<" ";
}
ss.seekg(0,ss.beg);
ss>>eng;
}
void SetState(std::vector<RngStateType> & saved,int gen){
SetState(saved,_generators[gen]);
}
void SetEngine(RngEngine &Eng, int gen){
_generators[gen]=Eng;
}
void GetEngine(RngEngine &Eng, int gen){
Eng=_generators[gen];
}
template<class source> void Seed(source &src, int gen)
{
_generators[gen] = RngEngine(src);
}
};
class GridSerialRNG : public GridRNGbase {
public:
GridSerialRNG() : GridRNGbase() {
_generators.resize(1);
_uniform.resize(1,std::uniform_real_distribution<RealD>{0,1});
_gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) );
_bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1});
_uid.resize(1,std::uniform_int_distribution<uint32_t>() );
}
template <class sobj,class distribution> inline void fill(sobj &l,std::vector<distribution> &dist){
typedef typename sobj::scalar_type scalar_type;
int words = sizeof(sobj)/sizeof(scalar_type);
scalar_type *buf = (scalar_type *) & l;
dist[0].reset();
for(int idx=0;idx<words;idx++){
fillScalar(buf[idx],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
};
template <class distribution> inline void fill(ComplexF &l,std::vector<distribution> &dist){
dist[0].reset();
fillScalar(l,dist[0],_generators[0]);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(ComplexD &l,std::vector<distribution> &dist){
dist[0].reset();
fillScalar(l,dist[0],_generators[0]);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(RealF &l,std::vector<distribution> &dist){
dist[0].reset();
fillScalar(l,dist[0],_generators[0]);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(RealD &l,std::vector<distribution> &dist){
dist[0].reset();
fillScalar(l,dist[0],_generators[0]);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
// vector fill
template <class distribution> inline void fill(vComplexF &l,std::vector<distribution> &dist){
RealF *pointer=(RealF *)&l;
dist[0].reset();
for(int i=0;i<2*vComplexF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vComplexD &l,std::vector<distribution> &dist){
RealD *pointer=(RealD *)&l;
dist[0].reset();
for(int i=0;i<2*vComplexD::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vRealF &l,std::vector<distribution> &dist){
RealF *pointer=(RealF *)&l;
dist[0].reset();
for(int i=0;i<vRealF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vRealD &l,std::vector<distribution> &dist){
RealD *pointer=(RealD *)&l;
dist[0].reset();
for(int i=0;i<vRealD::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
void SeedFixedIntegers(const std::vector<int> &seeds){
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
std::seed_seq src(seeds.begin(),seeds.end());
Seed(src,0);
}
void SeedUniqueString(const std::string &s){
std::vector<int> seeds;
std::stringstream sha;
seeds = GridChecksum::sha256_seeds(s);
for(int i=0;i<seeds.size();i++) {
sha << std::hex << seeds[i];
}
std::cout << GridLogMessage << "Intialising serial RNG with unique string '"
<< s << "'" << std::endl;
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
SeedFixedIntegers(seeds);
}
};
class GridParallelRNG : public GridRNGbase {
double _time_counter;
public:
GridBase *_grid;
unsigned int _vol;
int generator_idx(int os,int is) {
return is*_grid->oSites()+os;
}
GridParallelRNG(GridBase *grid) : GridRNGbase() {
_grid = grid;
_vol =_grid->iSites()*_grid->oSites();
_generators.resize(_vol);
_uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
_gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) );
_bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
_uid.resize(_vol,std::uniform_int_distribution<uint32_t>() );
}
template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){
typedef typename vobj::scalar_object scalar_object;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
double inner_time_counter = usecond();
int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid
int Nsimd = _grid->Nsimd(); // guaranteed to be the same for l._grid too
int osites = _grid->oSites(); // guaranteed to be <= l._grid->oSites() by a factor multiplicity
int words = sizeof(scalar_object) / sizeof(scalar_type);
parallel_for(int ss=0;ss<osites;ss++){
std::vector<scalar_object> buf(Nsimd);
for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times
int sm = multiplicity * ss + m; // Maps the generator site to the fine site
for (int si = 0; si < Nsimd; si++) {
int gdx = generator_idx(ss, si); // index of generator state
scalar_type *pointer = (scalar_type *)&buf[si];
dist[gdx].reset();
for (int idx = 0; idx < words; idx++)
fillScalar(pointer[idx], dist[gdx], _generators[gdx]);
}
// merge into SIMD lanes, FIXME suboptimal implementation
merge(l._odata[sm], buf);
}
}
_time_counter += usecond()- inner_time_counter;
};
void SeedUniqueString(const std::string &s){
std::vector<int> seeds;
seeds = GridChecksum::sha256_seeds(s);
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
<< s << "'" << std::endl;
std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
SeedFixedIntegers(seeds);
}
void SeedFixedIntegers(const std::vector<int> &seeds){
// Everyone generates the same seed_seq based on input seeds
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
std::seed_seq source(seeds.begin(),seeds.end());
RngEngine master_engine(source);
#ifdef RNG_FAST_DISCARD
////////////////////////////////////////////////
// Skip ahead through a single stream.
// Applicable to SITMO and other has based/crypto RNGs
// Should be applicable to Mersenne Twister, but the C++11
// MT implementation does not implement fast discard even though
// in principle this is possible
////////////////////////////////////////////////
// Everybody loops over global volume.
parallel_for(int gidx=0;gidx<_grid->_gsites;gidx++){
// Where is it?
int rank,o_idx,i_idx;
std::vector<int> gcoor;
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
// If this is one of mine we take it
if( rank == _grid->ThisRank() ){
int l_idx=generator_idx(o_idx,i_idx);
_generators[l_idx] = master_engine;
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
}
}
#else
////////////////////////////////////////////////////////////////
// Machine and thread decomposition dependent seeding is efficient
// and maximally parallel; but NOT reproducible from machine to machine.
// Not ideal, but fastest way to reseed all nodes.
////////////////////////////////////////////////////////////////
{
// Obtain one Reseed per processor
int Nproc = _grid->ProcessorCount();
std::vector<RngEngine> seeders(Nproc);
int me= _grid->ThisRank();
for(int p=0;p<Nproc;p++){
seeders[p] = Reseed(master_engine);
}
master_engine = seeders[me];
}
{
// Obtain one reseeded generator per thread
int Nthread = GridThread::GetThreads();
std::vector<RngEngine> seeders(Nthread);
for(int t=0;t<Nthread;t++){
seeders[t] = Reseed(master_engine);
}
parallel_for(int t=0;t<Nthread;t++) {
// set up one per local site in threaded fashion
std::vector<uint32_t> newseeds;
std::uniform_int_distribution<uint32_t> uid;
for(int l=0;l<_grid->lSites();l++) {
if ( (l%Nthread)==t ) {
_generators[l] = Reseed(seeders[t],newseeds,uid);
}
}
}
}
#endif
}
void Report(){
std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl;
}
////////////////////////////////////////////////////////////////////////
// Support for rigorous test of RNG's
// Return uniform random uint32_t from requested site generator
////////////////////////////////////////////////////////////////////////
uint32_t GlobalU01(int gsite){
uint32_t the_number;
// who
std::vector<int> gcoor;
int rank,o_idx,i_idx;
_grid->GlobalIndexToGlobalCoor(gsite,gcoor);
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
// draw
int l_idx=generator_idx(o_idx,i_idx);
if( rank == _grid->ThisRank() ){
the_number = _uid[l_idx](_generators[l_idx]);
}
// share & return
_grid->Broadcast(rank,(void *)&the_number,sizeof(the_number));
return the_number;
}
};
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._uniform); }
template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._gaussian); }
template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);}
template <class sobj> inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); }
template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); }
template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); }
}
#endif

View File

@@ -42,8 +42,7 @@ namespace Grid {
-> Lattice<decltype(trace(lhs._odata[0]))>
{
Lattice<decltype(trace(lhs._odata[0]))> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = trace(lhs._odata[ss]);
}
return ret;
@@ -56,8 +55,7 @@ PARALLEL_FOR_LOOP
inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
{
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = traceIndex<Index>(lhs._odata[ss]);
}
return ret;

File diff suppressed because it is too large Load Diff

View File

@@ -41,8 +41,7 @@ namespace Grid {
template<class vobj>
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = transpose(lhs._odata[ss]);
}
return ret;
@@ -55,12 +54,10 @@ PARALLEL_FOR_LOOP
inline auto TransposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
{
Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = transposeIndex<Index>(lhs._odata[ss]);
}
return ret;
};
}
#endif

View File

@@ -37,8 +37,7 @@ namespace Grid {
Lattice<obj> ret(rhs._grid);
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
ret._odata[ss]=pow(rhs._odata[ss],y);
}
return ret;
@@ -47,8 +46,7 @@ PARALLEL_FOR_LOOP
Lattice<obj> ret(rhs._grid);
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
ret._odata[ss]=mod(rhs._odata[ss],y);
}
return ret;
@@ -58,22 +56,26 @@ PARALLEL_FOR_LOOP
Lattice<obj> ret(rhs._grid);
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
ret._odata[ss]=div(rhs._odata[ss],y);
}
return ret;
}
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){
Lattice<obj> ret(rhs._grid);
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp);
}
return ret;
}

View File

@@ -56,8 +56,7 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
std::vector<scalar_object> truevals (Nsimd);
std::vector<scalar_object> falsevals(Nsimd);
PARALLEL_FOR_LOOP
for(int ss=0;ss<iftrue._grid->oSites(); ss++){
parallel_for(int ss=0;ss<iftrue._grid->oSites(); ss++){
extract(iftrue._odata[ss] ,truevals);
extract(iffalse._odata[ss] ,falsevals);

View File

@@ -29,9 +29,11 @@ See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/GridCore.h>
#include <Grid/util/CompilerCompatible.h>
#include <cxxabi.h>
#include <memory>
namespace Grid {
@@ -48,7 +50,7 @@ namespace Grid {
return (status==0) ? res.get() : name ;
}
GridStopWatch Logger::StopWatch;
GridStopWatch Logger::GlobalStopWatch;
int Logger::timestamp;
std::ostream Logger::devnull(0);
@@ -57,6 +59,9 @@ void GridLogTimestamp(int on){
}
Colours GridLogColours(0);
GridLogger GridLogMG (1, "MG" , GridLogColours, "NORMAL");
GridLogger GridLogIRL (1, "IRL" , GridLogColours, "NORMAL");
GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL");
GridLogger GridLogError (1, "Error" , GridLogColours, "RED");
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
@@ -93,7 +98,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
////////////////////////////////////////////////////////////
void Grid_quiesce_nodes(void) {
int me = 0;
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPI3L)
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT)
MPI_Comm_rank(MPI_COMM_WORLD, &me);
#endif
#ifdef GRID_COMMS_SHMEM

View File

@@ -85,12 +85,16 @@ class Logger {
protected:
Colours &Painter;
int active;
int timing_mode;
int topWidth{-1}, chanWidth{-1};
static int timestamp;
std::string name, topName;
std::string COLOUR;
public:
static GridStopWatch StopWatch;
static GridStopWatch GlobalStopWatch;
GridStopWatch LocalStopWatch;
GridStopWatch *StopWatch;
static std::ostream devnull;
std::string background() {return Painter.colour["NORMAL"];}
@@ -101,22 +105,52 @@ public:
name(nm),
topName(topNm),
Painter(col_class),
COLOUR(col) {} ;
timing_mode(0),
COLOUR(col)
{
StopWatch = & GlobalStopWatch;
};
void Active(int on) {active = on;};
int isActive(void) {return active;};
static void Timestamp(int on) {timestamp = on;};
void Reset(void) {
StopWatch->Reset();
StopWatch->Start();
}
void TimingMode(int on) {
timing_mode = on;
if(on) {
StopWatch = &LocalStopWatch;
Reset();
}
}
void setTopWidth(const int w) {topWidth = w;}
void setChanWidth(const int w) {chanWidth = w;}
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
if ( log.active ) {
stream << log.background()<< std::setw(10) << std::left << log.topName << log.background()<< " : ";
stream << log.colour() << std::setw(14) << std::left << log.name << log.background() << " : ";
stream << log.background()<< std::left;
if (log.topWidth > 0)
{
stream << std::setw(log.topWidth);
}
stream << log.topName << log.background()<< " : ";
stream << log.colour() << std::left;
if (log.chanWidth > 0)
{
stream << std::setw(log.chanWidth);
}
stream << log.name << log.background() << " : ";
if ( log.timestamp ) {
StopWatch.Stop();
GridTime now = StopWatch.Elapsed();
StopWatch.Start();
stream << log.evidence()<< now << log.background() << " : " ;
log.StopWatch->Stop();
GridTime now = log.StopWatch->Elapsed();
if ( log.timing_mode==1 ) log.StopWatch->Reset();
log.StopWatch->Start();
stream << log.evidence()
<< now << log.background() << " : " ;
}
stream << log.colour();
return stream;
@@ -135,6 +169,9 @@ public:
void GridLogConfigure(std::vector<std::string> &logstreams);
extern GridLogger GridLogMG;
extern GridLogger GridLogIRL;
extern GridLogger GridLogSolver;
extern GridLogger GridLogError;
extern GridLogger GridLogWarning;
extern GridLogger GridLogMessage;

View File

@@ -0,0 +1,3 @@
#include <Grid/GridCore.h>
int Grid::BinaryIO::latticeWriteMaxRetry = -1;

759
Grid/parallelIO/BinaryIO.h Normal file
View File

@@ -0,0 +1,759 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/BinaryIO.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu<guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_BINARY_IO_H
#define GRID_BINARY_IO_H
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT)
#define USE_MPI_IO
#else
#undef USE_MPI_IO
#endif
#ifdef HAVE_ENDIAN_H
#include <endian.h>
#endif
#include <arpa/inet.h>
#include <algorithm>
namespace Grid {
/////////////////////////////////////////////////////////////////////////////////
// Byte reversal garbage
/////////////////////////////////////////////////////////////////////////////////
inline uint32_t byte_reverse32(uint32_t f) {
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
return f;
}
inline uint64_t byte_reverse64(uint64_t f) {
uint64_t g;
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
g = g << 32;
f = f >> 32;
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
return g;
}
#if BYTE_ORDER == BIG_ENDIAN
inline uint64_t Grid_ntohll(uint64_t A) { return A; }
#else
inline uint64_t Grid_ntohll(uint64_t A) {
return byte_reverse64(A);
}
#endif
// A little helper
inline void removeWhitespace(std::string &key)
{
key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
}
///////////////////////////////////////////////////////////////////////////////////////////////////
// Static class holding the parallel IO code
// Could just use a namespace
///////////////////////////////////////////////////////////////////////////////////////////////////
class BinaryIO {
public:
static int latticeWriteMaxRetry;
/////////////////////////////////////////////////////////////////////////////
// more byte manipulation helpers
/////////////////////////////////////////////////////////////////////////////
template<class vobj> static inline void Uint32Checksum(Lattice<vobj> &lat,uint32_t &nersc_csum)
{
typedef typename vobj::scalar_object sobj;
GridBase *grid = lat._grid;
uint64_t lsites = grid->lSites();
std::vector<sobj> scalardata(lsites);
unvectorizeToLexOrdArray(scalardata,lat);
NerscChecksum(grid,scalardata,nersc_csum);
}
template <class fobj>
static inline void NerscChecksum(GridBase *grid, std::vector<fobj> &fbuf, uint32_t &nersc_csum)
{
const uint64_t size32 = sizeof(fobj) / sizeof(uint32_t);
uint64_t lsites = grid->lSites();
if (fbuf.size() == 1)
{
lsites = 1;
}
PARALLEL_REGION
{
uint32_t nersc_csum_thr = 0;
PARALLEL_FOR_LOOP_INTERN
for (uint64_t local_site = 0; local_site < lsites; local_site++)
{
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
for (uint64_t j = 0; j < size32; j++)
{
nersc_csum_thr = nersc_csum_thr + site_buf[j];
}
}
PARALLEL_CRITICAL
{
nersc_csum += nersc_csum_thr;
}
}
}
template<class fobj> static inline void ScidacChecksum(GridBase *grid,std::vector<fobj> &fbuf,uint32_t &scidac_csuma,uint32_t &scidac_csumb)
{
const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t);
int nd = grid->_ndimension;
uint64_t lsites =grid->lSites();
if (fbuf.size()==1) {
lsites=1;
}
std::vector<int> local_vol =grid->LocalDimensions();
std::vector<int> local_start =grid->LocalStarts();
std::vector<int> global_vol =grid->FullDimensions();
PARALLEL_REGION
{
std::vector<int> coor(nd);
uint32_t scidac_csuma_thr=0;
uint32_t scidac_csumb_thr=0;
uint32_t site_crc=0;
PARALLEL_FOR_LOOP_INTERN
for(uint64_t local_site=0;local_site<lsites;local_site++){
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
/*
* Scidac csum is rather more heavyweight
* FIXME -- 128^3 x 256 x 16 will overflow.
*/
int global_site;
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
for(int d=0;d<nd;d++) {
coor[d] = coor[d]+local_start[d];
}
Lexicographic::IndexFromCoor(coor,global_site,global_vol);
uint32_t gsite29 = global_site%29;
uint32_t gsite31 = global_site%31;
site_crc = crc32(0,(unsigned char *)site_buf,sizeof(fobj));
// std::cout << "Site "<<local_site << " crc "<<std::hex<<site_crc<<std::dec<<std::endl;
// std::cout << "Site "<<local_site << std::hex<<site_buf[0] <<site_buf[1]<<std::dec <<std::endl;
scidac_csuma_thr ^= site_crc<<gsite29 | site_crc>>(32-gsite29);
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
}
PARALLEL_CRITICAL
{
scidac_csuma^= scidac_csuma_thr;
scidac_csumb^= scidac_csumb_thr;
}
}
}
// Network is big endian
static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);}
static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);}
static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);}
static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);}
static inline void be32toh_v(void *file_object,uint64_t bytes)
{
uint32_t * f = (uint32_t *)file_object;
uint64_t count = bytes/sizeof(uint32_t);
parallel_for(uint64_t i=0;i<count;i++){
f[i] = ntohl(f[i]);
}
}
// LE must Swap and switch to host
static inline void le32toh_v(void *file_object,uint64_t bytes)
{
uint32_t *fp = (uint32_t *)file_object;
uint32_t f;
uint64_t count = bytes/sizeof(uint32_t);
parallel_for(uint64_t i=0;i<count;i++){
f = fp[i];
// got network order and the network to host
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
fp[i] = ntohl(f);
}
}
// BE is same as network
static inline void be64toh_v(void *file_object,uint64_t bytes)
{
uint64_t * f = (uint64_t *)file_object;
uint64_t count = bytes/sizeof(uint64_t);
parallel_for(uint64_t i=0;i<count;i++){
f[i] = Grid_ntohll(f[i]);
}
}
// LE must swap and switch;
static inline void le64toh_v(void *file_object,uint64_t bytes)
{
uint64_t *fp = (uint64_t *)file_object;
uint64_t f,g;
uint64_t count = bytes/sizeof(uint64_t);
parallel_for(uint64_t i=0;i<count;i++){
f = fp[i];
// got network order and the network to host
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
g = g << 32;
f = f >> 32;
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
fp[i] = Grid_ntohll(g);
}
}
/////////////////////////////////////////////////////////////////////////////
// Real action:
// Read or Write distributed lexico array of ANY object to a specific location in file
//////////////////////////////////////////////////////////////////////////////////////
static const int BINARYIO_MASTER_APPEND = 0x10;
static const int BINARYIO_UNORDERED = 0x08;
static const int BINARYIO_LEXICOGRAPHIC = 0x04;
static const int BINARYIO_READ = 0x02;
static const int BINARYIO_WRITE = 0x01;
template<class word,class fobj>
static inline void IOobject(word w,
GridBase *grid,
std::vector<fobj> &iodata,
std::string file,
uint64_t& offset,
const std::string &format, int control,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
uint32_t &scidac_csumb)
{
grid->Barrier();
GridStopWatch timer;
GridStopWatch bstimer;
nersc_csum=0;
scidac_csuma=0;
scidac_csumb=0;
int ndim = grid->Dimensions();
int nrank = grid->ProcessorCount();
int myrank = grid->ThisRank();
std::vector<int> psizes = grid->ProcessorGrid();
std::vector<int> pcoor = grid->ThisProcessorCoor();
std::vector<int> gLattice= grid->GlobalDimensions();
std::vector<int> lLattice= grid->LocalDimensions();
std::vector<int> lStart(ndim);
std::vector<int> gStart(ndim);
// Flatten the file
uint64_t lsites = grid->lSites();
if ( control & BINARYIO_MASTER_APPEND ) {
assert(iodata.size()==1);
} else {
assert(lsites==iodata.size());
}
for(int d=0;d<ndim;d++){
gStart[d] = lLattice[d]*pcoor[d];
lStart[d] = 0;
}
#ifdef USE_MPI_IO
std::vector<int> distribs(ndim,MPI_DISTRIBUTE_BLOCK);
std::vector<int> dargs (ndim,MPI_DISTRIBUTE_DFLT_DARG);
MPI_Datatype mpiObject;
MPI_Datatype fileArray;
MPI_Datatype localArray;
MPI_Datatype mpiword;
MPI_Offset disp = offset;
MPI_File fh ;
MPI_Status status;
int numword;
if ( sizeof( word ) == sizeof(float ) ) {
numword = sizeof(fobj)/sizeof(float);
mpiword = MPI_FLOAT;
} else {
numword = sizeof(fobj)/sizeof(double);
mpiword = MPI_DOUBLE;
}
//////////////////////////////////////////////////////////////////////////////
// Sobj in MPI phrasing
//////////////////////////////////////////////////////////////////////////////
int ierr;
ierr = MPI_Type_contiguous(numword,mpiword,&mpiObject); assert(ierr==0);
ierr = MPI_Type_commit(&mpiObject);
//////////////////////////////////////////////////////////////////////////////
// File global array data type
//////////////////////////////////////////////////////////////////////////////
ierr=MPI_Type_create_subarray(ndim,&gLattice[0],&lLattice[0],&gStart[0],MPI_ORDER_FORTRAN, mpiObject,&fileArray); assert(ierr==0);
ierr=MPI_Type_commit(&fileArray); assert(ierr==0);
//////////////////////////////////////////////////////////////////////////////
// local lattice array
//////////////////////////////////////////////////////////////////////////////
ierr=MPI_Type_create_subarray(ndim,&lLattice[0],&lLattice[0],&lStart[0],MPI_ORDER_FORTRAN, mpiObject,&localArray); assert(ierr==0);
ierr=MPI_Type_commit(&localArray); assert(ierr==0);
#endif
//////////////////////////////////////////////////////////////////////////////
// Byte order
//////////////////////////////////////////////////////////////////////////////
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
//////////////////////////////////////////////////////////////////////////////
// Do the I/O
//////////////////////////////////////////////////////////////////////////////
if ( control & BINARYIO_READ ) {
timer.Start();
if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
#ifdef USE_MPI_IO
std::cout<< GridLogMessage<<"IOobject: MPI read I/O "<< file<< std::endl;
ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0);
ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0);
ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0);
MPI_File_close(&fh);
MPI_Type_free(&fileArray);
MPI_Type_free(&localArray);
#else
assert(0);
#endif
} else {
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
std::ifstream fin;
fin.open(file, std::ios::binary | std::ios::in);
if (control & BINARYIO_MASTER_APPEND)
{
fin.seekg(-sizeof(fobj), fin.end);
}
else
{
fin.seekg(offset + myrank * lsites * sizeof(fobj));
}
fin.read((char *)&iodata[0], iodata.size() * sizeof(fobj));
assert(fin.fail() == 0);
fin.close();
}
timer.Stop();
grid->Barrier();
bstimer.Start();
ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb);
if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
if (ieee32) le32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
if (ieee64) le64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
NerscChecksum(grid,iodata,nersc_csum);
bstimer.Stop();
}
if ( control & BINARYIO_WRITE ) {
bstimer.Start();
NerscChecksum(grid,iodata,nersc_csum);
if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
if (ieee32) htole32_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
if (ieee64) htole64_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb);
bstimer.Stop();
grid->Barrier();
timer.Start();
if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
#ifdef USE_MPI_IO
std::cout << GridLogMessage <<"IOobject: MPI write I/O " << file << std::endl;
ierr = MPI_File_open(grid->communicator, (char *)file.c_str(), MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
// std::cout << GridLogMessage << "Checking for errors" << std::endl;
if (ierr != MPI_SUCCESS)
{
char error_string[BUFSIZ];
int length_of_error_string, error_class;
MPI_Error_class(ierr, &error_class);
MPI_Error_string(error_class, error_string, &length_of_error_string);
fprintf(stderr, "%3d: %s\n", myrank, error_string);
MPI_Error_string(ierr, error_string, &length_of_error_string);
fprintf(stderr, "%3d: %s\n", myrank, error_string);
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
}
std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
assert(ierr == 0);
std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
assert(ierr == 0);
MPI_Offset os;
MPI_File_get_position(fh, &os);
MPI_File_get_byte_offset(fh, os, &disp);
offset = disp;
MPI_File_close(&fh);
MPI_Type_free(&fileArray);
MPI_Type_free(&localArray);
#else
assert(0);
#endif
} else {
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
std::ofstream fout;
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
try {
if (offset) { // Must already exist and contain data
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
} else { // Allow create
fout.open(file,std::ios::binary|std::ios::out);
}
} catch (const std::fstream::failure& exc) {
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
// std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
#ifdef USE_MPI_IO
MPI_Abort(MPI_COMM_WORLD,1);
#else
exit(1);
#endif
}
if ( control & BINARYIO_MASTER_APPEND ) {
try {
fout.seekp(0,fout.end);
} catch (const std::fstream::failure& exc) {
std::cout << "Exception in seeking file end " << file << std::endl;
}
} else {
try {
fout.seekp(offset+myrank*lsites*sizeof(fobj));
} catch (const std::fstream::failure& exc) {
std::cout << "Exception in seeking file " << file <<" offset "<< offset << std::endl;
}
}
try {
fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));//assert( fout.fail()==0);
}
catch (const std::fstream::failure& exc) {
std::cout << "Exception in writing file " << file << std::endl;
std::cout << GridLogError << "Exception description: "<< exc.what() << std::endl;
#ifdef USE_MPI_IO
MPI_Abort(MPI_COMM_WORLD,1);
#else
exit(1);
#endif
}
offset = fout.tellp();
fout.close();
}
timer.Stop();
}
std::cout<<GridLogMessage<<"IOobject: ";
if ( control & BINARYIO_READ) std::cout << " read ";
else std::cout << " write ";
uint64_t bytes = sizeof(fobj)*iodata.size()*nrank;
std::cout<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/ (double)timer.useconds() <<" MB/s "<<std::endl;
std::cout<<GridLogMessage<<"IOobject: endian and checksum overhead "<<bstimer.Elapsed() <<std::endl;
//////////////////////////////////////////////////////////////////////////////
// Safety check
//////////////////////////////////////////////////////////////////////////////
// if the data size is 1 we do not want to sum over the MPI ranks
if (iodata.size() != 1){
grid->Barrier();
grid->GlobalSum(nersc_csum);
grid->GlobalXOR(scidac_csuma);
grid->GlobalXOR(scidac_csumb);
grid->Barrier();
}
}
/////////////////////////////////////////////////////////////////////////////
// Read a Lattice of object
//////////////////////////////////////////////////////////////////////////////////////
template<class vobj,class fobj,class munger>
static inline void readLatticeObject(Lattice<vobj> &Umu,
std::string file,
munger munge,
uint64_t offset,
const std::string &format,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
uint32_t &scidac_csumb)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::Realified::scalar_type word; word w=0;
GridBase *grid = Umu._grid;
uint64_t lsites = grid->lSites();
std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb);
GridStopWatch timer;
timer.Start();
parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
vectorizeFromLexOrdArray(scalardata,Umu);
grid->Barrier();
timer.Stop();
std::cout<<GridLogMessage<<"readLatticeObject: vectorize overhead "<<timer.Elapsed() <<std::endl;
}
/////////////////////////////////////////////////////////////////////////////
// Write a Lattice of object
//////////////////////////////////////////////////////////////////////////////////////
template<class vobj,class fobj,class munger>
static inline void writeLatticeObject(Lattice<vobj> &Umu,
std::string file,
munger munge,
uint64_t offset,
const std::string &format,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
uint32_t &scidac_csumb)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::Realified::scalar_type word; word w=0;
GridBase *grid = Umu._grid;
uint64_t lsites = grid->lSites(), offsetCopy = offset;
int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry);
bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0);
std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
//////////////////////////////////////////////////////////////////////////////
// Munge [ .e.g 3rd row recon ]
//////////////////////////////////////////////////////////////////////////////
GridStopWatch timer; timer.Start();
unvectorizeToLexOrdArray(scalardata,Umu);
parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
grid->Barrier();
timer.Stop();
while (attemptsLeft >= 0)
{
grid->Barrier();
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb);
if (checkWrite)
{
std::vector<fobj> ckiodata(lsites);
uint32_t cknersc_csum, ckscidac_csuma, ckscidac_csumb;
uint64_t ckoffset = offsetCopy;
std::cout << GridLogMessage << "writeLatticeObject: read back object" << std::endl;
grid->Barrier();
IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
cknersc_csum,ckscidac_csuma,ckscidac_csumb);
if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb))
{
std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl;
offset = offsetCopy;
}
else
{
std::cout << GridLogMessage << "writeLatticeObject: read test checksum correct" << std::endl;
break;
}
}
attemptsLeft--;
}
std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl;
}
/////////////////////////////////////////////////////////////////////////////
// Read a RNG; use IOobject and lexico map to an array of state
//////////////////////////////////////////////////////////////////////////////////////
static inline void readRNG(GridSerialRNG &serial,
GridParallelRNG &parallel,
std::string file,
uint64_t offset,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
uint32_t &scidac_csumb)
{
typedef typename GridSerialRNG::RngStateType RngStateType;
const int RngStateCount = GridSerialRNG::RngStateCount;
typedef std::array<RngStateType,RngStateCount> RNGstate;
typedef RngStateType word; word w=0;
std::string format = "IEEE32BIG";
GridBase *grid = parallel._grid;
uint64_t gsites = grid->gSites();
uint64_t lsites = grid->lSites();
uint32_t nersc_csum_tmp = 0;
uint32_t scidac_csuma_tmp = 0;
uint32_t scidac_csumb_tmp = 0;
GridStopWatch timer;
std::cout << GridLogMessage << "RNG read I/O on file " << file << std::endl;
std::vector<RNGstate> iodata(lsites);
IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb);
timer.Start();
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
std::vector<RngStateType> tmp(RngStateCount);
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
parallel.SetState(tmp,lidx);
}
timer.Stop();
iodata.resize(1);
IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_MASTER_APPEND,
nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp);
{
std::vector<RngStateType> tmp(RngStateCount);
std::copy(iodata[0].begin(),iodata[0].end(),tmp.begin());
serial.SetState(tmp,0);
}
nersc_csum = nersc_csum + nersc_csum_tmp;
scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
std::cout << GridLogMessage << "RNG file nersc_checksum " << std::hex << nersc_csum << std::dec << std::endl;
std::cout << GridLogMessage << "RNG file scidac_checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
std::cout << GridLogMessage << "RNG file scidac_checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
}
/////////////////////////////////////////////////////////////////////////////
// Write a RNG; lexico map to an array of state and use IOobject
//////////////////////////////////////////////////////////////////////////////////////
static inline void writeRNG(GridSerialRNG &serial,
GridParallelRNG &parallel,
std::string file,
uint64_t offset,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
uint32_t &scidac_csumb)
{
typedef typename GridSerialRNG::RngStateType RngStateType;
typedef RngStateType word; word w=0;
const int RngStateCount = GridSerialRNG::RngStateCount;
typedef std::array<RngStateType,RngStateCount> RNGstate;
GridBase *grid = parallel._grid;
uint64_t gsites = grid->gSites();
uint64_t lsites = grid->lSites();
uint32_t nersc_csum_tmp;
uint32_t scidac_csuma_tmp;
uint32_t scidac_csumb_tmp;
GridStopWatch timer;
std::string format = "IEEE32BIG";
std::cout << GridLogMessage << "RNG write I/O on file " << file << std::endl;
timer.Start();
std::vector<RNGstate> iodata(lsites);
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
std::vector<RngStateType> tmp(RngStateCount);
parallel.GetState(tmp,lidx);
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
}
timer.Stop();
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb);
iodata.resize(1);
{
std::vector<RngStateType> tmp(RngStateCount);
serial.GetState(tmp,0);
std::copy(tmp.begin(),tmp.end(),iodata[0].begin());
}
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND,
nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp);
nersc_csum = nersc_csum + nersc_csum_tmp;
scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
std::cout << GridLogMessage << "RNG file checksum " << std::hex << nersc_csum << std::dec << std::endl;
std::cout << GridLogMessage << "RNG file checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
std::cout << GridLogMessage << "RNG file checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
}
};
}
#endif

876
Grid/parallelIO/IldgIO.h Normal file
View File

@@ -0,0 +1,876 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/IldgIO.h
Copyright (C) 2015
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_ILDG_IO_H
#define GRID_ILDG_IO_H
#ifdef HAVE_LIME
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <pwd.h>
#include <sys/utsname.h>
#include <unistd.h>
//C-Lime is a must have for this functionality
extern "C" {
#include "lime.h"
}
namespace Grid {
namespace QCD {
/////////////////////////////////
// Encode word types as strings
/////////////////////////////////
template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); }
template<> inline std::string ScidacWordMnemonic<double> (void){ return std::string("D"); }
template<> inline std::string ScidacWordMnemonic<float> (void){ return std::string("F"); }
template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); }
template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); }
template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); }
template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); }
/////////////////////////////////////////
// Encode a generic tensor as a string
/////////////////////////////////////////
template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) {
typedef typename getPrecision<vobj>::real_scalar_type stype;
int _ColourN = indexRank<ColourIndex,vobj>();
int _ColourScalar = isScalar<ColourIndex,vobj>();
int _ColourVector = isVector<ColourIndex,vobj>();
int _ColourMatrix = isMatrix<ColourIndex,vobj>();
int _SpinN = indexRank<SpinIndex,vobj>();
int _SpinScalar = isScalar<SpinIndex,vobj>();
int _SpinVector = isVector<SpinIndex,vobj>();
int _SpinMatrix = isMatrix<SpinIndex,vobj>();
int _LorentzN = indexRank<LorentzIndex,vobj>();
int _LorentzScalar = isScalar<LorentzIndex,vobj>();
int _LorentzVector = isVector<LorentzIndex,vobj>();
int _LorentzMatrix = isMatrix<LorentzIndex,vobj>();
std::stringstream stream;
stream << "GRID_";
stream << ScidacWordMnemonic<stype>();
if ( _LorentzVector ) stream << "_LorentzVector"<<_LorentzN;
if ( _LorentzMatrix ) stream << "_LorentzMatrix"<<_LorentzN;
if ( _SpinVector ) stream << "_SpinVector"<<_SpinN;
if ( _SpinMatrix ) stream << "_SpinMatrix"<<_SpinN;
if ( _ColourVector ) stream << "_ColourVector"<<_ColourN;
if ( _ColourMatrix ) stream << "_ColourMatrix"<<_ColourN;
if ( _ColourScalar && _LorentzScalar && _SpinScalar ) stream << "_Complex";
typesize = sizeof(typename vobj::scalar_type);
if ( _ColourMatrix ) typesize*= _ColourN*_ColourN;
else typesize*= _ColourN;
if ( _SpinMatrix ) typesize*= _SpinN*_SpinN;
else typesize*= _SpinN;
colors = _ColourN;
spins = _SpinN;
datacount = _LorentzN;
return stream.str();
}
template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) {
return ScidacRecordTypeString<vobj>(colors,spins,typesize,datacount);
};
////////////////////////////////////////////////////////////
// Helper to fill out metadata
////////////////////////////////////////////////////////////
template<class vobj> void ScidacMetaData(Lattice<vobj> & field,
FieldMetaData &header,
scidacRecord & _scidacRecord,
scidacFile & _scidacFile)
{
typedef typename getPrecision<vobj>::real_scalar_type stype;
/////////////////////////////////////
// Pull Grid's metadata
/////////////////////////////////////
PrepareMetaData(field,header);
/////////////////////////////////////
// Scidac Private File structure
/////////////////////////////////////
_scidacFile = scidacFile(field._grid);
/////////////////////////////////////
// Scidac Private Record structure
/////////////////////////////////////
scidacRecord sr;
sr.datatype = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount);
sr.date = header.creation_date;
sr.precision = ScidacWordMnemonic<stype>();
sr.recordtype = GRID_IO_FIELD;
_scidacRecord = sr;
// std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl;
}
///////////////////////////////////////////////////////
// Scidac checksum
///////////////////////////////////////////////////////
static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb)
{
uint32_t scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
if ( scidac_csuma !=scidac_checksuma) return 0;
if ( scidac_csumb !=scidac_checksumb) return 0;
return 1;
}
////////////////////////////////////////////////////////////////////////////////////
// Lime, ILDG and Scidac I/O classes
////////////////////////////////////////////////////////////////////////////////////
class GridLimeReader : public BinaryIO {
public:
///////////////////////////////////////////////////
// FIXME: format for RNG? Now just binary out instead
///////////////////////////////////////////////////
FILE *File;
LimeReader *LimeR;
std::string filename;
/////////////////////////////////////////////
// Open the file
/////////////////////////////////////////////
void open(const std::string &_filename)
{
filename= _filename;
File = fopen(filename.c_str(), "r");
if (File == nullptr)
{
std::cerr << "cannot open file '" << filename << "'" << std::endl;
abort();
}
LimeR = limeCreateReader(File);
}
/////////////////////////////////////////////
// Close the file
/////////////////////////////////////////////
void close(void){
fclose(File);
// limeDestroyReader(LimeR);
}
////////////////////////////////////////////
// Read a generic lattice field and verify checksum
////////////////////////////////////////////
template<class vobj>
void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
{
typedef typename vobj::scalar_object sobj;
scidacChecksum scidacChecksum_;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
std::string format = getFormatString<vobj>();
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
uint64_t file_bytes =limeReaderBytes(LimeR);
// std::cout << GridLogMessage << limeReaderType(LimeR) << " "<< file_bytes <<" bytes "<<std::endl;
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
// std::cout << GridLogMessage<< " readLimeLatticeBinaryObject matches ! " <<std::endl;
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
// std::cout << "R sizeof(sobj)= " <<sizeof(sobj)<<std::endl;
// std::cout << "R Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "R Payload expected " <<PayloadSize<<std::endl;
// std::cout << "R file size " <<file_bytes <<std::endl;
assert(PayloadSize == file_bytes);// Must match or user error
uint64_t offset= ftello(File);
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
/////////////////////////////////////////////
// Insist checksum is next record
/////////////////////////////////////////////
readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
/////////////////////////////////////////////
// Verify checksums
/////////////////////////////////////////////
assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
return;
}
}
}
////////////////////////////////////////////
// Read a generic serialisable object
////////////////////////////////////////////
void readLimeObject(std::string &xmlstring,std::string record_name)
{
// should this be a do while; can we miss a first record??
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
// std::cout << GridLogMessage<< " readLimeObject matches ! " << record_name <<std::endl;
std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
xmlstring = std::string(&xmlc[0]);
return;
}
}
assert(0);
}
template<class serialisable_object>
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
{
std::string xmlstring;
readLimeObject(xmlstring, record_name);
XmlReader RD(xmlstring, true, "");
read(RD,object_name,object);
}
};
class GridLimeWriter : public BinaryIO
{
public:
///////////////////////////////////////////////////
// FIXME: format for RNG? Now just binary out instead
// FIXME: collective calls or not ?
// : must know if I am the I/O boss
///////////////////////////////////////////////////
FILE *File;
LimeWriter *LimeW;
std::string filename;
bool boss_node;
GridLimeWriter( bool isboss = true) {
boss_node = isboss;
}
void open(const std::string &_filename) {
filename= _filename;
if ( boss_node ) {
File = fopen(filename.c_str(), "w");
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
}
}
/////////////////////////////////////////////
// Close the file
/////////////////////////////////////////////
void close(void) {
if ( boss_node ) {
fclose(File);
}
// limeDestroyWriter(LimeW);
}
///////////////////////////////////////////////////////
// Lime utility functions
///////////////////////////////////////////////////////
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
{
if ( boss_node ) {
LimeRecordHeader *h;
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
assert(limeWriteRecordHeader(h, LimeW) >= 0);
limeDestroyHeader(h);
}
return LIME_SUCCESS;
}
////////////////////////////////////////////
// Write a generic serialisable object
////////////////////////////////////////////
void writeLimeObject(int MB,int ME,XmlWriter &writer,std::string object_name,std::string record_name)
{
if ( boss_node ) {
std::string xmlstring = writer.docString();
// std::cout << "WriteLimeObject" << record_name <<std::endl;
uint64_t nbytes = xmlstring.size();
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
int err;
LimeRecordHeader *h = limeCreateHeader(MB, ME,const_cast<char *>(record_name.c_str()), nbytes);
assert(h!= NULL);
err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
limeDestroyHeader(h);
}
}
template<class serialisable_object>
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, const unsigned int scientificPrec = 0)
{
XmlWriter WR("","");
if (scientificPrec)
{
WR.scientificFormat(true);
WR.setPrecision(scientificPrec);
}
write(WR,object_name,object);
writeLimeObject(MB, ME, WR, object_name, record_name);
}
////////////////////////////////////////////////////
// Write a generic lattice field and csum
// This routine is Collectively called by all nodes
// in communicator used by the field._grid
////////////////////////////////////////////////////
template<class vobj>
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
{
////////////////////////////////////////////////////////////////////
// NB: FILE and iostream are jointly writing disjoint sequences in the
// the same file through different file handles (integer units).
//
// These are both buffered, so why I think this code is right is as follows.
//
// i) write record header to FILE *File, telegraphing the size; flush
// ii) ftello reads the offset from FILE *File .
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
// Closes iostream and flushes.
// iv) fseek on FILE * to end of this disjoint section.
// v) Continue writing scidac record.
////////////////////////////////////////////////////////////////////
GridBase *grid = field._grid;
assert(boss_node == field._grid->IsBoss() );
////////////////////////////////////////////
// Create record header
////////////////////////////////////////////
typedef typename vobj::scalar_object sobj;
int err;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
uint64_t PayloadSize = sizeof(sobj) * grid->_gsites;
if ( boss_node ) {
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
fflush(File);
}
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
////////////////////////////////////////////////
// Check all nodes agree on file position
////////////////////////////////////////////////
uint64_t offset1;
if ( boss_node ) {
offset1 = ftello(File);
}
grid->Broadcast(0,(void *)&offset1,sizeof(offset1));
///////////////////////////////////////////
// The above is collective. Write by other means into the binary record
///////////////////////////////////////////
std::string format = getFormatString<vobj>();
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
///////////////////////////////////////////
// Wind forward and close the record
///////////////////////////////////////////
if ( boss_node ) {
fseek(File,0,SEEK_END);
uint64_t offset2 = ftello(File); // std::cout << " now at offset "<<offset2 << std::endl;
assert( (offset2-offset1) == PayloadSize);
}
/////////////////////////////////////////////////////////////
// Check MPI-2 I/O did what we expect to file
/////////////////////////////////////////////////////////////
if ( boss_node ) {
err=limeWriterCloseRecord(LimeW); assert(err>=0);
}
////////////////////////////////////////
// Write checksum element, propagaing forward from the BinaryIO
// Always pair a checksum with a binary object, and close message
////////////////////////////////////////
scidacChecksum checksum;
std::stringstream streama; streama << std::hex << scidac_csuma;
std::stringstream streamb; streamb << std::hex << scidac_csumb;
checksum.suma= streama.str();
checksum.sumb= streamb.str();
if ( boss_node ) {
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
}
}
};
class ScidacWriter : public GridLimeWriter {
public:
ScidacWriter(bool isboss =true ) : GridLimeWriter(isboss) { };
template<class SerialisableUserFile>
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
{
scidacFile _scidacFile(grid);
if ( this->boss_node ) {
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
}
}
////////////////////////////////////////////////
// Write generic lattice field in scidac format
////////////////////////////////////////////////
template <class vobj, class userRecord>
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
const unsigned int recordScientificPrec = 0)
{
GridBase * grid = field._grid;
////////////////////////////////////////
// fill the Grid header
////////////////////////////////////////
FieldMetaData header;
scidacRecord _scidacRecord;
scidacFile _scidacFile;
ScidacMetaData(field,header,_scidacRecord,_scidacFile);
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
if ( this->boss_node ) {
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML), recordScientificPrec);
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
}
// Collective call
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
}
};
class ScidacReader : public GridLimeReader {
public:
template<class SerialisableUserFile>
void readScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
{
scidacFile _scidacFile(grid);
readLimeObject(_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
readLimeObject(_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
}
////////////////////////////////////////////////
// Write generic lattice field in scidac format
////////////////////////////////////////////////
template <class vobj, class userRecord>
void readScidacFieldRecord(Lattice<vobj> &field,userRecord &_userRecord)
{
typedef typename vobj::scalar_object sobj;
GridBase * grid = field._grid;
////////////////////////////////////////
// fill the Grid header
////////////////////////////////////////
FieldMetaData header;
scidacRecord _scidacRecord;
scidacFile _scidacFile;
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
readLimeObject(_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
readLimeObject(_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
readLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));
}
void skipPastBinaryRecord(void) {
std::string rec_name(ILDG_BINARY_DATA);
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
skipPastObjectRecord(std::string(SCIDAC_CHECKSUM));
return;
}
}
}
void skipPastObjectRecord(std::string rec_name) {
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
return;
}
}
}
void skipScidacFieldRecord() {
skipPastObjectRecord(std::string(GRID_FORMAT));
skipPastObjectRecord(std::string(SCIDAC_RECORD_XML));
skipPastObjectRecord(std::string(SCIDAC_PRIVATE_RECORD_XML));
skipPastBinaryRecord();
}
};
class IldgWriter : public ScidacWriter {
public:
IldgWriter(bool isboss) : ScidacWriter(isboss) {};
///////////////////////////////////
// A little helper
///////////////////////////////////
void writeLimeIldgLFN(std::string &LFN)
{
uint64_t PayloadSize = LFN.size();
int err;
createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize);
err=limeWriteRecordData(const_cast<char*>(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
}
////////////////////////////////////////////////////////////////
// Special ILDG operations ; gauge configs only.
// Don't require scidac records EXCEPT checksum
// Use Grid MetaData object if present.
////////////////////////////////////////////////////////////////
template <class vsimd>
void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,int sequence,std::string LFN,std::string description)
{
GridBase * grid = Umu._grid;
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj;
////////////////////////////////////////
// fill the Grid header
////////////////////////////////////////
FieldMetaData header;
scidacRecord _scidacRecord;
scidacFile _scidacFile;
ScidacMetaData(Umu,header,_scidacRecord,_scidacFile);
std::string format = header.floating_point;
header.ensemble_id = description;
header.ensemble_label = description;
header.sequence_number = sequence;
header.ildg_lfn = LFN;
assert ( (format == std::string("IEEE32BIG"))
||(format == std::string("IEEE64BIG")) );
//////////////////////////////////////////////////////
// Fill ILDG header data struct
//////////////////////////////////////////////////////
ildgFormat ildgfmt ;
ildgfmt.field = std::string("su3gauge");
if ( format == std::string("IEEE32BIG") ) {
ildgfmt.precision = 32;
} else {
ildgfmt.precision = 64;
}
ildgfmt.version = 1.0;
ildgfmt.lx = header.dimension[0];
ildgfmt.ly = header.dimension[1];
ildgfmt.lz = header.dimension[2];
ildgfmt.lt = header.dimension[3];
assert(header.nd==4);
assert(header.nd==header.dimension.size());
//////////////////////////////////////////////////////////////////////////////
// Fill the USQCD info field
//////////////////////////////////////////////////////////////////////////////
usqcdInfo info;
info.version=1.0;
info.plaq = header.plaquette;
info.linktr = header.link_trace;
std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl;
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
writeLimeObject(0,0,ildgfmt,std::string("ildgFormat") ,std::string(ILDG_FORMAT)); // rec
writeLimeIldgLFN(header.ildg_lfn); // rec
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
// limeDestroyWriter(LimeW);
}
};
class IldgReader : public GridLimeReader {
public:
////////////////////////////////////////////////////////////////
// Read either Grid/SciDAC/ILDG configuration
// Don't require scidac records EXCEPT checksum
// Use Grid MetaData object if present.
// Else use ILDG MetaData object if present.
// Else use SciDAC MetaData object if present.
////////////////////////////////////////////////////////////////
template <class vsimd>
void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, FieldMetaData &FieldMetaData_) {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef typename GaugeField::vector_object vobj;
typedef typename vobj::scalar_object sobj;
typedef LorentzColourMatrixF fobj;
typedef LorentzColourMatrixD dobj;
GridBase *grid = Umu._grid;
std::vector<int> dims = Umu._grid->FullDimensions();
assert(dims.size()==4);
// Metadata holders
ildgFormat ildgFormat_ ;
std::string ildgLFN_ ;
scidacChecksum scidacChecksum_;
usqcdInfo usqcdInfo_ ;
// track what we read from file
int found_ildgFormat =0;
int found_ildgLFN =0;
int found_scidacChecksum=0;
int found_usqcdInfo =0;
int found_ildgBinary =0;
int found_FieldMetaData =0;
uint32_t nersc_csum;
uint32_t scidac_csuma;
uint32_t scidac_csumb;
// Binary format
std::string format;
//////////////////////////////////////////////////////////////////////////
// Loop over all records
// -- Order is poorly guaranteed except ILDG header preceeds binary section.
// -- Run like an event loop.
// -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing
// that Scidac.
// -- Insist on Scidac checksum record.
//////////////////////////////////////////////////////////////////////////
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
//////////////////////////////////////////////////////////////////
// If not BINARY_DATA read a string and parse
//////////////////////////////////////////////////////////////////
if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) ) ) {
// Copy out the string
std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
// std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl;
//////////////////////////////////
// ILDG format record
std::string xmlstring(&xmlc[0]);
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
XmlReader RD(xmlstring, true, "");
read(RD,"ildgFormat",ildgFormat_);
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
if ( ildgFormat_.precision == 32 ) format = std::string("IEEE32BIG");
assert( ildgFormat_.lx == dims[0]);
assert( ildgFormat_.ly == dims[1]);
assert( ildgFormat_.lz == dims[2]);
assert( ildgFormat_.lt == dims[3]);
found_ildgFormat = 1;
}
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
FieldMetaData_.ildg_lfn = xmlstring;
found_ildgLFN = 1;
}
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
XmlReader RD(xmlstring, true, "");
read(RD,"FieldMetaData",FieldMetaData_);
format = FieldMetaData_.floating_point;
assert(FieldMetaData_.dimension[0] == dims[0]);
assert(FieldMetaData_.dimension[1] == dims[1]);
assert(FieldMetaData_.dimension[2] == dims[2]);
assert(FieldMetaData_.dimension[3] == dims[3]);
found_FieldMetaData = 1;
}
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
// is it a USQCD info field
if ( xmlstring.find(std::string("usqcdInfo")) != std::string::npos ) {
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
XmlReader RD(xmlstring, true, "");
read(RD,"usqcdInfo",usqcdInfo_);
found_usqcdInfo = 1;
}
}
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
XmlReader RD(xmlstring, true, "");
read(RD,"scidacChecksum",scidacChecksum_);
found_scidacChecksum = 1;
}
} else {
/////////////////////////////////
// Binary data
/////////////////////////////////
std::cout << GridLogMessage << "ILDG Binary record found : " ILDG_BINARY_DATA << std::endl;
uint64_t offset= ftello(File);
if ( format == std::string("IEEE64BIG") ) {
GaugeSimpleMunger<dobj, sobj> munge;
BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
} else {
GaugeSimpleMunger<fobj, sobj> munge;
BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
}
found_ildgBinary = 1;
}
}
//////////////////////////////////////////////////////
// Minimally must find binary segment and checksum
// Since this is an ILDG reader require ILDG format
//////////////////////////////////////////////////////
assert(found_ildgBinary);
assert(found_ildgFormat);
assert(found_scidacChecksum);
// Must find something with the lattice dimensions
assert(found_FieldMetaData||found_ildgFormat);
if ( found_FieldMetaData ) {
std::cout << GridLogMessage<<"Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<<std::endl;
} else {
assert(found_ildgFormat);
assert ( ildgFormat_.field == std::string("su3gauge") );
///////////////////////////////////////////////////////////////////////////////////////
// Populate our Grid metadata as best we can
///////////////////////////////////////////////////////////////////////////////////////
std::ostringstream vers; vers << ildgFormat_.version;
FieldMetaData_.hdr_version = vers.str();
FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3");
FieldMetaData_.nd=4;
FieldMetaData_.dimension.resize(4);
FieldMetaData_.dimension[0] = ildgFormat_.lx ;
FieldMetaData_.dimension[1] = ildgFormat_.ly ;
FieldMetaData_.dimension[2] = ildgFormat_.lz ;
FieldMetaData_.dimension[3] = ildgFormat_.lt ;
if ( found_usqcdInfo ) {
FieldMetaData_.plaquette = usqcdInfo_.plaq;
FieldMetaData_.link_trace= usqcdInfo_.linktr;
std::cout << GridLogMessage <<"This configuration was probably written by USQCD "<<std::endl;
std::cout << GridLogMessage <<"USQCD xml record Plaquette : "<<FieldMetaData_.plaquette<<std::endl;
std::cout << GridLogMessage <<"USQCD xml record LinkTrace : "<<FieldMetaData_.link_trace<<std::endl;
} else {
FieldMetaData_.plaquette = 0.0;
FieldMetaData_.link_trace= 0.0;
std::cout << GridLogWarning << "This configuration is unsafe with no plaquette records that can verify it !!! "<<std::endl;
}
}
////////////////////////////////////////////////////////////
// Really really want to mandate a scidac checksum
////////////////////////////////////////////////////////////
if ( found_scidacChecksum ) {
FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb);
assert( scidac_csuma ==FieldMetaData_.scidac_checksuma);
assert( scidac_csumb ==FieldMetaData_.scidac_checksumb);
std::cout << GridLogMessage<<"SciDAC checksums match " << std::endl;
} else {
std::cout << GridLogWarning<<"SciDAC checksums not found. This is unsafe. " << std::endl;
assert(0); // Can I insist always checksum ?
}
if ( found_FieldMetaData || found_usqcdInfo ) {
FieldMetaData checker;
GaugeStatistics(Umu,checker);
assert(fabs(checker.plaquette - FieldMetaData_.plaquette )<1.0e-5);
assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5);
std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl;
}
}
};
}}
//HAVE_LIME
#endif
#endif

View File

@@ -0,0 +1,237 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/IldgIO.h
Copyright (C) 2015
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_ILDGTYPES_IO_H
#define GRID_ILDGTYPES_IO_H
#ifdef HAVE_LIME
extern "C" { // for linkage
#include "lime.h"
}
namespace Grid {
/////////////////////////////////////////////////////////////////////////////////
// Data representation of records that enter ILDG and SciDac formats
/////////////////////////////////////////////////////////////////////////////////
#define GRID_FORMAT "grid-format"
#define ILDG_FORMAT "ildg-format"
#define ILDG_BINARY_DATA "ildg-binary-data"
#define ILDG_DATA_LFN "ildg-data-lfn"
#define SCIDAC_CHECKSUM "scidac-checksum"
#define SCIDAC_PRIVATE_FILE_XML "scidac-private-file-xml"
#define SCIDAC_FILE_XML "scidac-file-xml"
#define SCIDAC_PRIVATE_RECORD_XML "scidac-private-record-xml"
#define SCIDAC_RECORD_XML "scidac-record-xml"
#define SCIDAC_BINARY_DATA "scidac-binary-data"
// Unused SCIDAC records names; could move to support this functionality
#define SCIDAC_SITELIST "scidac-sitelist"
////////////////////////////////////////////////////////////
const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat
const int GRID_IO_MULTIFILE = 1; // hardcode lift from QIO compat
const int GRID_IO_FIELD = 0; // hardcode lift from QIO compat
const int GRID_IO_GLOBAL = 1; // hardcode lift from QIO compat
////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////
// QIO uses mandatory "private" records fixed format
// Private is in principle "opaque" however it can't be changed now because that would break existing
// file compatability, so should be correct to assume the undocumented but defacto file structure.
/////////////////////////////////////////////////////////////////////////////////
struct emptyUserRecord : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(emptyUserRecord,int,dummy);
emptyUserRecord() { dummy=0; };
};
////////////////////////
// Scidac private file xml
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>
////////////////////////
struct scidacFile : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile,
double, version,
int, spacetime,
std::string, dims, // must convert to int
int, volfmt);
std::vector<int> getDimensions(void) {
std::stringstream stream(dims);
std::vector<int> dimensions;
int n;
while(stream >> n){
dimensions.push_back(n);
}
return dimensions;
}
void setDimensions(std::vector<int> dimensions) {
char delimiter = ' ';
std::stringstream stream;
for(int i=0;i<dimensions.size();i++){
stream << dimensions[i];
if ( i != dimensions.size()-1) {
stream << delimiter <<std::endl;
}
}
dims = stream.str();
}
// Constructor provides Grid
scidacFile() =default; // default constructor
scidacFile(GridBase * grid){
version = 1.0;
spacetime = grid->_ndimension;
setDimensions(grid->FullDimensions());
volfmt = GRID_IO_SINGLEFILE;
}
};
///////////////////////////////////////////////////////////////////////
// scidac-private-record-xml : example
// <scidacRecord>
// <version>1.1</version><date>Tue Jul 26 21:14:44 2011 UTC</date><recordtype>0</recordtype>
// <datatype>QDP_D3_ColorMatrix</datatype><precision>D</precision><colors>3</colors><spins>4</spins>
// <typesize>144</typesize><datacount>4</datacount>
// </scidacRecord>
///////////////////////////////////////////////////////////////////////
struct scidacRecord : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord,
double, version,
std::string, date,
int, recordtype,
std::string, datatype,
std::string, precision,
int, colors,
int, spins,
int, typesize,
int, datacount);
scidacRecord()
: version(1.0), recordtype(0), colors(0), spins(0), typesize(0), datacount(0)
{}
};
////////////////////////
// ILDG format
////////////////////////
struct ildgFormat : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat,
double, version,
std::string, field,
int, precision,
int, lx,
int, ly,
int, lz,
int, lt);
ildgFormat() { version=1.0; };
};
////////////////////////
// USQCD info
////////////////////////
struct usqcdInfo : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo,
double, version,
double, plaq,
double, linktr,
std::string, info);
usqcdInfo() {
version=1.0;
};
};
////////////////////////
// Scidac Checksum
////////////////////////
struct scidacChecksum : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum,
double, version,
std::string, suma,
std::string, sumb);
scidacChecksum() {
version=1.0;
};
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Type: scidac-file-xml <title>MILC ILDG archival gauge configuration</title>
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Type:
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////
// Scidac private file xml
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>
////////////////////////
#if 0
////////////////////////////////////////////////////////////////////////////////////////
// From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf
////////////////////////////////////////////////////////////////////////////////////////
struct usqcdPropFile : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile,
double, version,
std::string, type,
std::string, info);
usqcdPropFile() {
version=1.0;
};
};
struct usqcdSourceInfo : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo,
double, version,
std::string, info);
usqcdSourceInfo() {
version=1.0;
};
};
struct usqcdPropInfo : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo,
double, version,
int, spin,
int, color,
std::string, info);
usqcdPropInfo() {
version=1.0;
};
};
#endif
}
#endif
#endif

327
Grid/parallelIO/MetaData.h Normal file
View File

@@ -0,0 +1,327 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/NerscIO.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <algorithm>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <map>
#include <unistd.h>
#include <sys/utsname.h>
#include <pwd.h>
namespace Grid {
///////////////////////////////////////////////////////
// Precision mapping
///////////////////////////////////////////////////////
template<class vobj> static std::string getFormatString (void)
{
std::string format;
typedef typename getPrecision<vobj>::real_scalar_type stype;
if ( sizeof(stype) == sizeof(float) ) {
format = std::string("IEEE32BIG");
}
if ( sizeof(stype) == sizeof(double) ) {
format = std::string("IEEE64BIG");
}
return format;
}
////////////////////////////////////////////////////////////////////////////////
// header specification/interpretation
////////////////////////////////////////////////////////////////////////////////
class FieldMetaData : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData,
int, nd,
std::vector<int>, dimension,
std::vector<std::string>, boundary,
int, data_start,
std::string, hdr_version,
std::string, storage_format,
double, link_trace,
double, plaquette,
uint32_t, checksum,
uint32_t, scidac_checksuma,
uint32_t, scidac_checksumb,
unsigned int, sequence_number,
std::string, data_type,
std::string, ensemble_id,
std::string, ensemble_label,
std::string, ildg_lfn,
std::string, creator,
std::string, creator_hardware,
std::string, creation_date,
std::string, archive_date,
std::string, floating_point);
// WARNING: non-initialised values might lead to twisted parallel IO
// issues, std::string are fine because they initliase to size 0
// as per C++ standard.
FieldMetaData(void)
: nd(4), dimension(4,0), boundary(4, ""), data_start(0),
link_trace(0.), plaquette(0.), checksum(0),
scidac_checksuma(0), scidac_checksumb(0), sequence_number(0)
{}
};
namespace QCD {
using namespace Grid;
//////////////////////////////////////////////////////////////////////
// Bit and Physical Checksumming and QA of data
//////////////////////////////////////////////////////////////////////
inline void GridMetaData(GridBase *grid,FieldMetaData &header)
{
int nd = grid->_ndimension;
header.nd = nd;
header.dimension.resize(nd);
header.boundary.resize(nd);
header.data_start = 0;
for(int d=0;d<nd;d++) {
header.dimension[d] = grid->_fdimensions[d];
}
for(int d=0;d<nd;d++) {
header.boundary[d] = std::string("PERIODIC");
}
}
inline void MachineCharacteristics(FieldMetaData &header)
{
// Who
struct passwd *pw = getpwuid (getuid());
if (pw) header.creator = std::string(pw->pw_name);
// When
std::time_t t = std::time(nullptr);
std::tm tm_ = *std::localtime(&t);
std::ostringstream oss;
// oss << std::put_time(&tm_, "%c %Z");
header.creation_date = oss.str();
header.archive_date = header.creation_date;
// What
struct utsname name; uname(&name);
header.creator_hardware = std::string(name.nodename)+"-";
header.creator_hardware+= std::string(name.machine)+"-";
header.creator_hardware+= std::string(name.sysname)+"-";
header.creator_hardware+= std::string(name.release);
}
#define dump_meta_data(field, s) \
s << "BEGIN_HEADER" << std::endl; \
s << "HDR_VERSION = " << field.hdr_version << std::endl; \
s << "DATATYPE = " << field.data_type << std::endl; \
s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \
for(int i=0;i<4;i++){ \
s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \
} \
s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \
s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \
for(int i=0;i<4;i++){ \
s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl; \
} \
\
s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \
s << "SCIDAC_CHECKSUMA = "<< std::hex << std::setw(10) << field.scidac_checksuma << std::dec<<std::endl; \
s << "SCIDAC_CHECKSUMB = "<< std::hex << std::setw(10) << field.scidac_checksumb << std::dec<<std::endl; \
s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl; \
s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl; \
s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl; \
s << "CREATOR = " << field.creator << std::endl; \
s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl; \
s << "CREATION_DATE = " << field.creation_date << std::endl; \
s << "ARCHIVE_DATE = " << field.archive_date << std::endl; \
s << "FLOATING_POINT = " << field.floating_point << std::endl; \
s << "END_HEADER" << std::endl;
template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMetaData &header)
{
GridBase *grid = field._grid;
std::string format = getFormatString<vobj>();
header.floating_point = format;
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
GridMetaData(grid,header);
MachineCharacteristics(header);
}
inline void GaugeStatistics(Lattice<vLorentzColourMatrixF> & data,FieldMetaData &header)
{
// How to convert data precision etc...
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplF>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplF>::avgPlaquette(data);
}
inline void GaugeStatistics(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
{
// How to convert data precision etc...
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplD>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplD>::avgPlaquette(data);
}
template<> inline void PrepareMetaData<vLorentzColourMatrixF>(Lattice<vLorentzColourMatrixF> & field, FieldMetaData &header)
{
GridBase *grid = field._grid;
std::string format = getFormatString<vLorentzColourMatrixF>();
header.floating_point = format;
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
GridMetaData(grid,header);
GaugeStatistics(field,header);
MachineCharacteristics(header);
}
template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header)
{
GridBase *grid = field._grid;
std::string format = getFormatString<vLorentzColourMatrixD>();
header.floating_point = format;
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
GridMetaData(grid,header);
GaugeStatistics(field,header);
MachineCharacteristics(header);
}
//////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware
//////////////////////////////////////////////////////////////////////
inline void reconstruct3(LorentzColourMatrix & cm)
{
const int x=0;
const int y=1;
const int z=2;
for(int mu=0;mu<Nd;mu++){
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
}
}
////////////////////////////////////////////////////////////////////////////////
// Some data types for intermediate storage
////////////////////////////////////////////////////////////////////////////////
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >;
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
/////////////////////////////////////////////////////////////////////////////////
// Simple classes for precision conversion
/////////////////////////////////////////////////////////////////////////////////
template <class fobj, class sobj>
struct BinarySimpleUnmunger {
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
void operator()(sobj &in, fobj &out) {
// take word by word and transform accoding to the status
fobj_stype *out_buffer = (fobj_stype *)&out;
sobj_stype *in_buffer = (sobj_stype *)&in;
size_t fobj_words = sizeof(out) / sizeof(fobj_stype);
size_t sobj_words = sizeof(in) / sizeof(sobj_stype);
assert(fobj_words == sobj_words);
for (unsigned int word = 0; word < sobj_words; word++)
out_buffer[word] = in_buffer[word]; // type conversion on the fly
}
};
template <class fobj, class sobj>
struct BinarySimpleMunger {
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
void operator()(fobj &in, sobj &out) {
// take word by word and transform accoding to the status
fobj_stype *in_buffer = (fobj_stype *)&in;
sobj_stype *out_buffer = (sobj_stype *)&out;
size_t fobj_words = sizeof(in) / sizeof(fobj_stype);
size_t sobj_words = sizeof(out) / sizeof(sobj_stype);
assert(fobj_words == sobj_words);
for (unsigned int word = 0; word < sobj_words; word++)
out_buffer[word] = in_buffer[word]; // type conversion on the fly
}
};
template<class fobj,class sobj>
struct GaugeSimpleMunger{
void operator()(fobj &in, sobj &out) {
for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}}
}
};
};
template <class fobj, class sobj>
struct GaugeSimpleUnmunger {
void operator()(sobj &in, fobj &out) {
for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}}
}
};
};
template<class fobj,class sobj>
struct Gauge3x2munger{
void operator() (fobj &in,sobj &out){
for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)(i)(j);
}}
}
reconstruct3(out);
}
};
template<class fobj,class sobj>
struct Gauge3x2unmunger{
void operator() (sobj &in,fobj &out){
for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)(i)(j) = in(mu)()(i,j);
}}
}
}
};
}
}

363
Grid/parallelIO/NerscIO.h Normal file
View File

@@ -0,0 +1,363 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/NerscIO.h
Copyright (C) 2015
Author: Matt Spraggs <matthew.spraggs@gmail.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_NERSC_IO_H
#define GRID_NERSC_IO_H
namespace Grid {
namespace QCD {
using namespace Grid;
////////////////////////////////////////////////////////////////////////////////
// Write and read from fstream; comput header offset for payload
////////////////////////////////////////////////////////////////////////////////
class NerscIO : public BinaryIO {
public:
static inline void truncate(std::string file){
std::ofstream fout(file,std::ios::out);
}
static inline unsigned int writeHeader(FieldMetaData &field,std::string file)
{
std::ofstream fout(file,std::ios::out|std::ios::in);
fout.seekp(0,std::ios::beg);
dump_meta_data(field, fout);
field.data_start = fout.tellp();
return field.data_start;
}
// for the header-reader
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
{
uint64_t offset=0;
std::map<std::string,std::string> header;
std::string line;
//////////////////////////////////////////////////
// read the header
//////////////////////////////////////////////////
std::ifstream fin(file);
getline(fin,line); // read one line and insist is
removeWhitespace(line);
std::cout << GridLogMessage << "* " << line << std::endl;
assert(line==std::string("BEGIN_HEADER"));
do {
getline(fin,line); // read one line
std::cout << GridLogMessage << "* "<<line<< std::endl;
int eq = line.find("=");
if(eq >0) {
std::string key=line.substr(0,eq);
std::string val=line.substr(eq+1);
removeWhitespace(key);
removeWhitespace(val);
header[key] = val;
}
} while( line.find("END_HEADER") == std::string::npos );
field.data_start = fin.tellg();
//////////////////////////////////////////////////
// chomp the values
//////////////////////////////////////////////////
field.hdr_version = header["HDR_VERSION"];
field.data_type = header["DATATYPE"];
field.storage_format = header["STORAGE_FORMAT"];
field.dimension[0] = std::stol(header["DIMENSION_1"]);
field.dimension[1] = std::stol(header["DIMENSION_2"]);
field.dimension[2] = std::stol(header["DIMENSION_3"]);
field.dimension[3] = std::stol(header["DIMENSION_4"]);
assert(grid->_ndimension == 4);
for(int d=0;d<4;d++){
assert(grid->_fdimensions[d]==field.dimension[d]);
}
field.link_trace = std::stod(header["LINK_TRACE"]);
field.plaquette = std::stod(header["PLAQUETTE"]);
field.boundary[0] = header["BOUNDARY_1"];
field.boundary[1] = header["BOUNDARY_2"];
field.boundary[2] = header["BOUNDARY_3"];
field.boundary[3] = header["BOUNDARY_4"];
field.checksum = std::stoul(header["CHECKSUM"],0,16);
field.ensemble_id = header["ENSEMBLE_ID"];
field.ensemble_label = header["ENSEMBLE_LABEL"];
field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]);
field.creator = header["CREATOR"];
field.creator_hardware = header["CREATOR_HARDWARE"];
field.creation_date = header["CREATION_DATE"];
field.archive_date = header["ARCHIVE_DATE"];
field.floating_point = header["FLOATING_POINT"];
return field.data_start;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now the meat: the object readers
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vsimd>
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
FieldMetaData& header,
std::string file)
{
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
GridBase *grid = Umu._grid;
uint64_t offset = readHeader(file,Umu._grid,header);
FieldMetaData clone(header);
std::string format(header.floating_point);
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
// depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type>
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
if ( ieee32 || ieee32big ) {
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
nersc_csum,scidac_csuma,scidac_csumb);
}
if ( ieee64 || ieee64big ) {
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
(Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
nersc_csum,scidac_csuma,scidac_csumb);
}
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
if ( ieee32 || ieee32big ) {
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
nersc_csum,scidac_csuma,scidac_csumb);
}
if ( ieee64 || ieee64big ) {
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format,
nersc_csum,scidac_csuma,scidac_csumb);
}
} else {
assert(0);
}
GaugeStatistics(Umu,clone);
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec
<<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette
<<" header "<<header.plaquette<<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
<<" header "<<header.link_trace<<std::endl;
if ( fabs(clone.plaquette -header.plaquette ) >= 1.0e-5 ) {
std::cout << " Plaquette mismatch "<<std::endl;
std::cout << Umu[0]<<std::endl;
std::cout << Umu[1]<<std::endl;
}
if ( nersc_csum != header.checksum ) {
std::cerr << " checksum mismatch " << std::endl;
std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl;
std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl;
std::cerr << " nersc_csum " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
exit(0);
}
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(nersc_csum == header.checksum );
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
}
template<class vsimd>
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
std::string file,
int two_row,
int bits32)
{
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj;
FieldMetaData header;
///////////////////////////////////////////
// Following should become arguments
///////////////////////////////////////////
header.sequence_number = 1;
header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF";
typedef LorentzColourMatrixD fobj3D;
typedef LorentzColour2x3D fobj2D;
GridBase *grid = Umu._grid;
GridMetaData(grid,header);
assert(header.nd==4);
GaugeStatistics(Umu,header);
MachineCharacteristics(header);
uint64_t offset;
// Sod it -- always write 3x3 double
header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE_3x3");
GaugeSimpleUnmunger<fobj3D,sobj> munge;
if ( grid->IsBoss() ) {
truncate(file);
offset = writeHeader(header,file);
}
grid->Broadcast(0,(void *)&offset,sizeof(offset));
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb);
header.checksum = nersc_csum;
if ( grid->IsBoss() ) {
writeHeader(header,file);
}
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
<<std::hex<<header.checksum
<<std::dec<<" plaq "<< header.plaquette <<std::endl;
}
///////////////////////////////
// RNG state
///////////////////////////////
static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG &parallel,std::string file)
{
typedef typename GridParallelRNG::RngStateType RngStateType;
// Following should become arguments
FieldMetaData header;
header.sequence_number = 1;
header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF";
GridBase *grid = parallel._grid;
GridMetaData(grid,header);
assert(header.nd==4);
header.link_trace=0.0;
header.plaquette=0.0;
MachineCharacteristics(header);
uint64_t offset;
#ifdef RNG_RANLUX
header.floating_point = std::string("UINT64");
header.data_type = std::string("RANLUX48");
#endif
#ifdef RNG_MT19937
header.floating_point = std::string("UINT32");
header.data_type = std::string("MT19937");
#endif
#ifdef RNG_SITMO
header.floating_point = std::string("UINT64");
header.data_type = std::string("SITMO");
#endif
if ( grid->IsBoss() ) {
truncate(file);
offset = writeHeader(header,file);
}
grid->Broadcast(0,(void *)&offset,sizeof(offset));
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
header.checksum = nersc_csum;
if ( grid->IsBoss() ) {
offset = writeHeader(header,file);
}
std::cout<<GridLogMessage
<<"Written NERSC RNG STATE "<<file<< " checksum "
<<std::hex<<header.checksum
<<std::dec<<std::endl;
}
static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,FieldMetaData& header,std::string file)
{
typedef typename GridParallelRNG::RngStateType RngStateType;
GridBase *grid = parallel._grid;
uint64_t offset = readHeader(file,grid,header);
FieldMetaData clone(header);
std::string format(header.floating_point);
std::string data_type(header.data_type);
#ifdef RNG_RANLUX
assert(format == std::string("UINT64"));
assert(data_type == std::string("RANLUX48"));
#endif
#ifdef RNG_MT19937
assert(format == std::string("UINT32"));
assert(data_type == std::string("MT19937"));
#endif
#ifdef RNG_SITMO
assert(format == std::string("UINT64"));
assert(data_type == std::string("SITMO"));
#endif
// depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type>
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
if ( nersc_csum != header.checksum ) {
std::cerr << "checksum mismatch "<<std::hex<< nersc_csum <<" "<<header.checksum<<std::dec<<std::endl;
exit(0);
}
assert(nersc_csum == header.checksum );
std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl;
}
};
}}
#endif

View File

@@ -26,8 +26,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/PerfCount.h>
#include <Grid/GridCore.h>
#include <Grid/perfmon/PerfCount.h>
namespace Grid {
@@ -40,7 +40,7 @@ const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::Performan
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES , "CPUCYCLES.........." , INSTRUCTIONS},
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS , "INSTRUCTIONS......." , CPUCYCLES },
// 4
#ifdef AVX512
#ifdef KNL
{ PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES },
{ PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS },
{ PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS },

View File

@@ -172,7 +172,7 @@ public:
const char * name = PerformanceCounterConfigs[PCT].name;
fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
if (fd == -1) {
fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
perror("Error is");
}
int norm = PerformanceCounterConfigs[PCT].normalisation;
@@ -181,7 +181,7 @@ public:
name = PerformanceCounterConfigs[norm].name;
cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
if (cyclefd == -1) {
fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
perror("Error is");
}
#endif
@@ -205,13 +205,14 @@ public:
void Stop(void) {
count=0;
cycles=0;
size_t ign;
#ifdef __linux__
ssize_t ign;
if ( fd!= -1) {
::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
ign=::read(fd, &count, sizeof(long long));
ign=::read(cyclefd, &cycles, sizeof(long long));
ign+=::read(cyclefd, &cycles, sizeof(long long));
assert(ign=2*sizeof(long long));
}
elapsed = cyclecount() - begin;
#else

View File

@@ -1,11 +1,9 @@
#include <Grid/Grid.h>
#include <Grid/PerfCount.h>
#include <Grid/Stat.h>
#include <Grid/GridCore.h>
#include <Grid/perfmon/PerfCount.h>
#include <Grid/perfmon/Stat.h>
namespace Grid {
bool PmuStat::pmu_initialized=false;

View File

@@ -49,14 +49,38 @@ inline double usecond(void) {
typedef std::chrono::system_clock GridClock;
typedef std::chrono::time_point<GridClock> GridTimePoint;
typedef std::chrono::milliseconds GridTime;
typedef std::chrono::microseconds GridUsecs;
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
typedef std::chrono::seconds GridSecs;
typedef std::chrono::milliseconds GridMillisecs;
typedef std::chrono::microseconds GridUsecs;
typedef std::chrono::microseconds GridTime;
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
{
stream << time.count()<<" ms";
stream << time.count()<<" s";
return stream;
}
inline std::ostream& operator<< (std::ostream & stream, const GridMillisecs & now)
{
GridSecs second(1);
auto secs = now/second ;
auto subseconds = now%second ;
auto fill = stream.fill();
stream << secs<<"."<<std::setw(3)<<std::setfill('0')<<subseconds.count()<<" s";
stream.fill(fill);
return stream;
}
inline std::ostream& operator<< (std::ostream & stream, const GridUsecs & now)
{
GridSecs second(1);
auto seconds = now/second ;
auto subseconds = now%second ;
auto fill = stream.fill();
stream << seconds<<"."<<std::setw(6)<<std::setfill('0')<<subseconds.count()<<" s";
stream.fill(fill);
return stream;
}
class GridStopWatch {
private:
@@ -96,6 +120,9 @@ public:
assert(running == false);
return (uint64_t) accumulator.count();
}
bool isRunning(void){
return running;
}
};
}

View File

@@ -1,7 +1,7 @@
/**
* pugixml parser - version 1.6
* pugixml parser - version 1.9
* --------------------------------------------------------
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at http://pugixml.org/
*
* This library is distributed under the MIT License. See notice at the end
@@ -17,6 +17,9 @@
// Uncomment this to enable wchar_t mode
// #define PUGIXML_WCHAR_MODE
// Uncomment this to enable compact mode
// #define PUGIXML_COMPACT
// Uncomment this to disable XPath
// #define PUGIXML_NO_XPATH
@@ -46,7 +49,7 @@
#endif
/**
* Copyright (c) 2006-2015 Arseny Kapoulkine
* Copyright (c) 2006-2018 Arseny Kapoulkine
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation

Some files were not shown because too many files have changed in this diff Show More