mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-17 07:17:06 +01:00
Compare commits
690 Commits
feature/cl
...
feature/re
Author | SHA1 | Date | |
---|---|---|---|
49b934310b | |||
01e8cf5017 | |||
12f4499502 | |||
05aec72887 | |||
136d3802cb | |||
a4c55406ed | |||
c7f33ca2a8 | |||
0e3035c51d | |||
10fc263675 | |||
bccfd4cbb3 | |||
0b50d4a328 | |||
e232257cb6 | |||
09451b5e48 | |||
6364aa8acf | |||
b9e84ecab7 | |||
41032fef44 | |||
d77bc88170 | |||
494b3c9e57 | |||
2ba19a9e07 | |||
5d7cc29eaf | |||
f22a27d7f9 | |||
33a0bbb17b | |||
f592ec8baa | |||
8b007b5c24 | |||
9bb170576d | |||
a7e3977b75 | |||
995f20e45d | |||
d058b4e681 | |||
8e0d2f3402 | |||
2ac57370f1 | |||
344e832a4e | |||
cfe281f1a4 | |||
f5422c7334 | |||
68c76a410d | |||
69b6ba0a73 | |||
65349b07a7 | |||
7cd9914f0e | |||
f3f24b3017 | |||
8ef4657805 | |||
78c1086f8b | |||
68c13045d6 | |||
e9b6f58fdc | |||
839605c45c | |||
1ff1422e07 | |||
32376f0437 | |||
0c6e581336 | |||
e0a79a5bbf | |||
4c016cc1a4 | |||
2205b1e63e | |||
6f421c7a6f | |||
b62b9ac214 | |||
88d9922e4f | |||
9734e3ee58 | |||
8c3a599148 | |||
4a47b11876 | |||
f1382cf81d | |||
85699daef2 | |||
1651111d18 | |||
1ed4ea344d | |||
8f514ae550 | |||
4a7415e83c | |||
0ffcfea724 | |||
febe41cc1d | |||
62173395b8 | |||
b48611b80f | |||
6b559d68aa | |||
1982cc58dd | |||
2e2e5ce596 | |||
7d84dca8e9 | |||
2d3916418e | |||
21304e2139 | |||
7b850eb48b | |||
a3ace57e01 | |||
b1c3cbe35e | |||
f31d6bfec2 | |||
a7cfa26901 | |||
f333f3e575 | |||
2b4e253473 | |||
0ba3d469c7 | |||
f709329d96 | |||
f05b25dae4 | |||
3e1d268fa3 | |||
109c74bed8 | |||
3023287fd9 | |||
b3d6805638 | |||
291bc2a1f0 | |||
2f368c33fc | |||
9592115341 | |||
24c07694bc | |||
f0229025e2 | |||
6de9a45a09 | |||
03c3d495a2 | |||
49f25e08e8 | |||
efc0c65056 | |||
936eaac8e1 | |||
fe6a372f75 | |||
148fc052bd | |||
c073341a10 | |||
78299daaac | |||
866449c804 | |||
d69a52079f | |||
9f4f8a14a3 | |||
f6593dc881 | |||
b46d31d4b6 | |||
58567fc650 | |||
7c57cac670 | |||
d0b21bf1ff | |||
a1825d1f59 | |||
5a3e83ff7b | |||
52569d98d8 | |||
b351103c29 | |||
118cca4681 | |||
44de727cd2 | |||
888ebc3cf9 | |||
6c031a1b81 | |||
02aa4bd762 | |||
9aafa8ee60 | |||
430b98b354 | |||
84189867ef | |||
4ab8cfbe2a | |||
aadd9f4468 | |||
8fbb27ce13 | |||
21bba95909 | |||
6448fe7121 | |||
2458a11d1d | |||
d0ca7c3fe6 | |||
57f899d79c | |||
e881a0c157 | |||
f411657118 | |||
7458c6174b | |||
21b269d0f9 | |||
083af92ac2 | |||
2c162577b5 | |||
b1c4e96382 | |||
a55c6f34f3 | |||
beed527ea3 | |||
eaa633cf69 | |||
c632455129 | |||
c012899ed5 | |||
8bab544c2f | |||
76fc06a5dc | |||
4af6c7e7aa | |||
f60fbcfc4d | |||
464c81706e | |||
408130b808 | |||
375edd1370 | |||
6d912f6c67 | |||
6d1d28955e | |||
920b471761 | |||
63c21767ba | |||
7b6b712565 | |||
35abd05ee9 | |||
dd36e60f6a | |||
cb6c548e21 | |||
02c4ccf621 | |||
fd24588212 | |||
b800bb3ecb | |||
f8abd0978b | |||
12c7c493bf | |||
c7c9072313 | |||
2bf3be5fae | |||
3a40e4fc69 | |||
2e69e03f6f | |||
a09f9bb528 | |||
f0e341d726 | |||
6f09df0daf | |||
26cee605b8 | |||
b3fa18c229 | |||
2940c9bcfd | |||
0bb532f72b | |||
fada2aa0f7 | |||
c193e4e675 | |||
3ee682f676 | |||
d85ec3bac2 | |||
b52d8eb1e3 | |||
ee630d2e8b | |||
2f0af79869 | |||
1b7fb79ec0 | |||
2db1a4628c | |||
6aa047d842 | |||
8779c32ae1 | |||
c527dc3358 | |||
6b42577b6b | |||
fb3596f968 | |||
f3a0158213 | |||
0250aa9347 | |||
3df6743396 | |||
fb7d021b9d | |||
5f206df775 | |||
7727e81113 | |||
c4115544a5 | |||
08c47328ba | |||
09001aedca | |||
2c67304716 | |||
dc6d8686de | |||
cc2780bea3 | |||
6e5a2b7922 | |||
f4878d3a13 | |||
89d2fac92e | |||
f2d3e41cf2 | |||
3c27bb36d4 | |||
603d59f389 | |||
07a0ef3f95 | |||
503259f9c9 | |||
5be6a51044 | |||
ac69f042b1 | |||
133d5c2e34 | |||
2a94244890 | |||
a15a2dfd29 | |||
093bb02633 | |||
99a85116f8 | |||
27cdb79063 | |||
f4cbfd63ff | |||
2b794b6aa7 | |||
d0244a059f | |||
dcdd891d7d | |||
6d2df9de79 | |||
41d4e37bae | |||
ee5c0cc9b6 | |||
0a4020eb4d | |||
b2de26589b | |||
0677adb4dd | |||
231cc95be6 | |||
639f9cab82 | |||
4eac4e575e | |||
3f0f92cda6 | |||
d2650e89bd | |||
2962123cba | |||
830168ec37 | |||
584c921ca0 | |||
81347b4d16 | |||
2cfa0b0e6b | |||
fa5dee76b1 | |||
8d1679c6b8 | |||
3791a38f7c | |||
142f7b0c86 | |||
891ad66eab | |||
60c43151c5 | |||
e036800261 | |||
62900def36 | |||
e3a309a73f | |||
ad6c1c0c4e | |||
00b92a91b5 | |||
65533741f7 | |||
dc0259fbda | |||
131a6785d4 | |||
44f4f5c8e2 | |||
2679df034f | |||
bf71162b97 | |||
299e828d83 | |||
ef5452cddf | |||
80de748737 | |||
71e1006ba8 | |||
00f31ae83f | |||
cce339deaf | |||
24128ff109 | |||
34e9d3f0ca | |||
c995788259 | |||
94c7198001 | |||
04d86fe9f3 | |||
b78074b6a0 | |||
7dfd3cdae8 | |||
cecee1ef2c | |||
355d4b58be | |||
2c54a536f3 | |||
d868a45120 | |||
9deae8c962 | |||
db86cdd7bd | |||
ec9939c1ba | |||
f74617c124 | |||
8c6a3921ed | |||
a8a15dd9d0 | |||
3ce68a751a | |||
daa0977d01 | |||
a2929f4384 | |||
7fe3974c0a | |||
f7e86f81a0 | |||
fecec803d9 | |||
8fe9a13cdd | |||
d2c42e6f42 | |||
049cc518f4 | |||
2e1c66897f | |||
adcef36189 | |||
2f121c41c9 | |||
e0ed7e300f | |||
485207901b | |||
c760f0a4c3 | |||
c84eeedec3 | |||
1ac3526f33 | |||
0de090ee74 | |||
91405de3f7 | |||
8fccda301a | |||
7a0abfac89 | |||
ae37fda699 | |||
b5fc5e2030 | |||
8db0ef9736 | |||
95d4b46446 | |||
5dfd216a34 | |||
c2e8d0aa88 | |||
0fe5aeffbb | |||
7fbc469046 | |||
bf96a4bdbf | |||
84685c9bc3 | |||
a8d4156997 | |||
c18074869b | |||
f4c6d39238 | |||
200d35b38a | |||
eb52e84d09 | |||
72abc34764 | |||
e3164d4c7b | |||
f5db386c55 | |||
294ee70a7a | |||
013ea4e8d1 | |||
7fbbb31a50 | |||
0e127b1fc7 | |||
68c028b0a6 | |||
255d4992e1 | |||
a0d399e5ce | |||
fd3b2e945a | |||
b999984501 | |||
7836cc2d74 | |||
a61e0df54b | |||
9d835afa35 | |||
5e3be47117 | |||
48de706dd5 | |||
f871fb0c6d | |||
93771f3099 | |||
8cb205725b | |||
9ad580d82f | |||
899f961d0d | |||
54d789204f | |||
25828746f3 | |||
f362c00739 | |||
25d1cadd3b | |||
c24d53bbd1 | |||
2017e4e3b4 | |||
27a4d4c951 | |||
2f92721249 | |||
3c7a4106ed | |||
3252059daf | |||
6eed167f0c | |||
4ad0df6fde | |||
661381e881 | |||
68a5079f33 | |||
8634e19f1b | |||
9ada378e38 | |||
9d9692d439 | |||
0659ae4014 | |||
bfbf2f1fa0 | |||
dd6b796a01 | |||
52a856b4a8 | |||
04190ee7f3 | |||
587bfcc0f4 | |||
2700992ef5 | |||
8c658de179 | |||
ba37d51ee9 | |||
4f4181c54a | |||
4d4ac2517b | |||
e568c24d1d | |||
b458326744 | |||
6e7d5e2243 | |||
b35169f1dd | |||
441ad7498d | |||
6f6c5c549a | |||
1584e17b54 | |||
12982a4455 | |||
172f412102 | |||
a64497265d | |||
ca639c195f | |||
edc28dcfbf | |||
c45f24a1b5 | |||
aaf37ee4d7 | |||
1dddd17e3c | |||
661f1d3e8e | |||
edcf9b9293 | |||
fe6860b4dd | |||
d6406b13e1 | |||
e369d7306d | |||
9f8d63e104 | |||
9b0240d101 | |||
b27f0e5a53 | |||
75e4483407 | |||
0734e9ddd4 | |||
809b1cdd58 | |||
1be8089604 | |||
3e0eff6468 | |||
7ecc47ac89 | |||
e9f1ac09de | |||
fa0d8feff4 | |||
49b8501fd4 | |||
d47484717e | |||
05b44aef6b | |||
03e9832efa | |||
28a375d35d | |||
3b06381745 | |||
91a0a3f820 | |||
8f44c799a6 | |||
96272f3841 | |||
5c936d88a0 | |||
1c64ee926e | |||
2cbb72a81c | |||
31d83ee046 | |||
a9e8758a01 | |||
3e125c5b61 | |||
eac6ec4b5e | |||
213f8db6a2 | |||
6358f35b7e | |||
43f5a0df50 | |||
c897878776 | |||
cc6eb51e3e | |||
507009089b | |||
2baf193031 | |||
362ba0443a | |||
276a2353df | |||
b234784c8e | |||
6ea2a8b7ca | |||
c1d0359aaa | |||
047ee4ad0b | |||
a13106da0c | |||
75113e6523 | |||
325c73d051 | |||
b25a59e95e | |||
c5b9147b53 | |||
64ac815fd9 | |||
a1be533329 | |||
7c4533797f | |||
af84fd65bb | |||
1a2613086a | |||
4f110c09a5 | |||
6764362237 | |||
2fa2b0e0b1 | |||
b61292f735 | |||
ce7720e221 | |||
853a5528dc | |||
169f405c9c | |||
c6125b01ce | |||
b0b5b34bff | |||
1c9722357d | |||
141da3ae71 | |||
94edf9cf8b | |||
c11a3ca0a7 | |||
870b1a85ae | |||
b5510427f9 | |||
26ed65c8f8 | |||
f7f043d8cf | |||
ddcaa6ad29 | |||
334da7f452 | |||
4669ecd4ba | |||
4573b34cac | |||
17f57e85d1 | |||
c8d4d184ee | |||
17f27b1ebd | |||
a16bbecb8a | |||
7c9b0dd842 | |||
6b7228b3e6 | |||
f117552334 | |||
a21a160029 | |||
1569a374a9 | |||
eddf023b8a | |||
6b8ffbe735 | |||
81050535a5 | |||
7dcf5c90e3 | |||
9ce00f26f9 | |||
85c253ed4a | |||
ccfc0a5a89 | |||
d3f857b1c9 | |||
fb62035aa0 | |||
0260bc7705 | |||
68e6a58f12 | |||
640515e3d8 | |||
f089bf5629 | |||
276f113f28 | |||
97c579f637 | |||
a13c109111 | |||
ab6afd18ac | |||
5bde64d48b | |||
2f5add4d5f | |||
c5a885dcd6 | |||
a4d8512fb8 | |||
5ec903044d | |||
8a0cf0194f | |||
1c680d4b7a | |||
c9c073eee4 | |||
f290b2e908 | |||
5f8225461b | |||
e9323460c7 | |||
20e186a1e0 | |||
6ef4af989b | |||
ccde8b817f | |||
68168bf72d | |||
e93d0feaa7 | |||
8f601d9b39 | |||
5436308e4a | |||
07fe7d0cbe | |||
60b57706c4 | |||
58c2f60b69 | |||
bfa3a7b3b0 | |||
954e38bebe | |||
b1a38bde7a | |||
2581875edc | |||
f212b0a963 | |||
62702dbcb8 | |||
41d6cab033 | |||
5a31e747c9 | |||
cbc73a3fd1 | |||
6c6d43eb4e | |||
e1dcfd3553 | |||
888838473a | |||
01568b0e62 | |||
d5ce66f6ab | |||
d86936a3de | |||
d516938707 | |||
72344d1418 | |||
7ecf6ab38b | |||
2d4d70d3ec | |||
78f8d47528 | |||
b85f987b0b | |||
f57afe2079 | |||
0fb84fa34b | |||
8462bbfe63 | |||
229977c955 | |||
e485a07133 | |||
0880747edb | |||
b801e1fcd6 | |||
70ec2faa98 | |||
2f849ee252 | |||
bb6ed44339 | |||
360cface33 | |||
80302e95a8 | |||
caf2f6b274 | |||
c49be8988b | |||
971c2379bd | |||
94b0d66e4c | |||
5e8af396fd | |||
9942723189 | |||
a7d19dbb64 | |||
90dbe03e17 | |||
8b14096990 | |||
b938202081 | |||
e79ef469ac | |||
485c5db0fe | |||
c793947209 | |||
3e9ee053a1 | |||
dda6c69d5b | |||
cd51b9af99 | |||
c399c2b44d | |||
af7de7a294 | |||
1dc86efd26 | |||
f32555dcc5 | |||
30391cb2eb | |||
e93c883470 | |||
2e88408f5c | |||
fcac5c0772 | |||
90f4000935 | |||
480708b9a0 | |||
c4baf876d4 | |||
2f4dac3531 | |||
3ec6890850 | |||
018801d973 | |||
1d83521daa | |||
fc5670c6a4 | |||
d9c435e282 | |||
614a0e8277 | |||
aaf39222c3 | |||
550142bd6a | |||
c0a929aef7 | |||
37fe944224 | |||
315a42843f | |||
83a101db83 | |||
c4274e1660 | |||
ba6db55cb0 | |||
e5ea84d531 | |||
15767a1491 | |||
4d2a32ae7a | |||
5b937e3644 | |||
e418b044f7 | |||
b8b05f143f | |||
6ec42b4b82 | |||
abb7d4d2f5 | |||
16ebbfff29 | |||
4828226095 | |||
8a049f27b8 | |||
43578a3eb4 | |||
fdbd42e542 | |||
e7e4cee4f3 | |||
ec3954ff5f | |||
0f468e2179 | |||
8e61286741 | |||
4790e99817 | |||
2dd63aa7a4 | |||
559a501140 | |||
945684c470 | |||
e30a80a234 | |||
69e4ecc1d2 | |||
5f483df16b | |||
4680a977c3 | |||
de42456171 | |||
d55212c998 | |||
c96483e3bd | |||
c6e1f64573 | |||
ae31a6a760 | |||
dd8f2a64fe | |||
724cf02d4a | |||
7b8b2731e7 | |||
237a8ec918 | |||
49a0ae73eb | |||
315f1146cd | |||
9f202782c5 | |||
594a262dcc | |||
7f8ca54285 | |||
c5b23c367e | |||
b6fe03eb26 | |||
f37ed4958b | |||
896f3a8002 | |||
5f85473d6b | |||
ac3b0ebc58 | |||
f0fcdf75b5 | |||
53bffb83d4 | |||
cd44e851f1 | |||
fb24e3a7d2 | |||
655a69259a | |||
4e0cf0cc28 | |||
507c4e9efc | |||
cdf550845f | |||
3db7a5387b | |||
f8a5194c70 | |||
cff3bae155 | |||
90dffc73c8 | |||
a1151fc734 | |||
ab3baeb38f | |||
389731d373 | |||
6e3ce7423e | |||
15f15a7cfd | |||
0e5f626226 | |||
97b9c6f03d | |||
63982819c6 | |||
6fec507bef | |||
219b3bd34f | |||
b00d2d2c39 | |||
f1b3e21830 | |||
24162c9ead | |||
935cd1e173 | |||
55e39df30f | |||
581be32ed2 | |||
6bc136b1d0 | |||
0c668bf46a | |||
840814c776 | |||
95af55128e | |||
9f2a57e334 | |||
c645d33db5 | |||
e0f1349524 | |||
79b761f923 | |||
0d4e31ca58 | |||
b07a354a33 | |||
c433939795 | |||
b6a4c31b48 | |||
98b1439ff9 | |||
564738b1ff | |||
a80e43dbcf | |||
b99622d9fb | |||
937c77ead2 | |||
95e5a2ade3 | |||
91676d1dda | |||
ac3611bb19 | |||
cc4afb978d | |||
20e92a7009 | |||
42f0afcbfa | |||
20ac13fdf3 | |||
e38612e6fa | |||
c2b2b71c5d | |||
009f48a904 | |||
5cfc0180aa | |||
914f180fa3 | |||
6cb563a40c | |||
db3837be22 | |||
2f0dd83016 | |||
3ac27e5596 | |||
bd466a55a8 | |||
c8e6f58e24 | |||
888988ad37 | |||
e4a105a30b | |||
26ebe41fef | |||
1e496fee74 | |||
9f755e0379 | |||
4512dbdf58 | |||
483fd3cfa1 | |||
85516e9c7c | |||
0c006fbfaa | |||
54c10a42cc | |||
ef0fe2bcc1 |
25
.gitignore
vendored
25
.gitignore
vendored
@ -83,6 +83,7 @@ ltmain.sh
|
|||||||
.Trashes
|
.Trashes
|
||||||
ehthumbs.db
|
ehthumbs.db
|
||||||
Thumbs.db
|
Thumbs.db
|
||||||
|
.dirstamp
|
||||||
|
|
||||||
# build directory #
|
# build directory #
|
||||||
###################
|
###################
|
||||||
@ -97,11 +98,8 @@ build.sh
|
|||||||
|
|
||||||
# Eigen source #
|
# Eigen source #
|
||||||
################
|
################
|
||||||
lib/Eigen/*
|
Grid/Eigen
|
||||||
|
Eigen/*
|
||||||
# FFTW source #
|
|
||||||
################
|
|
||||||
lib/fftw/*
|
|
||||||
|
|
||||||
# libtool macros #
|
# libtool macros #
|
||||||
##################
|
##################
|
||||||
@ -112,20 +110,7 @@ m4/libtool.m4
|
|||||||
################
|
################
|
||||||
gh-pages/
|
gh-pages/
|
||||||
|
|
||||||
# Buck files #
|
|
||||||
##############
|
|
||||||
.buck*
|
|
||||||
buck-out
|
|
||||||
BUCK
|
|
||||||
make-bin-BUCK.sh
|
|
||||||
|
|
||||||
# generated sources #
|
# generated sources #
|
||||||
#####################
|
#####################
|
||||||
lib/qcd/spin/gamma-gen/*.h
|
Grid/qcd/spin/gamma-gen/*.h
|
||||||
lib/qcd/spin/gamma-gen/*.cc
|
Grid/qcd/spin/gamma-gen/*.cc
|
||||||
|
|
||||||
# vs code editor files #
|
|
||||||
########################
|
|
||||||
.vscode/
|
|
||||||
.vscode/settings.json
|
|
||||||
settings.json
|
|
||||||
|
26
.travis.yml
26
.travis.yml
@ -9,6 +9,11 @@ matrix:
|
|||||||
- os: osx
|
- os: osx
|
||||||
osx_image: xcode8.3
|
osx_image: xcode8.3
|
||||||
compiler: clang
|
compiler: clang
|
||||||
|
env: PREC=single
|
||||||
|
- os: osx
|
||||||
|
osx_image: xcode8.3
|
||||||
|
compiler: clang
|
||||||
|
env: PREC=double
|
||||||
|
|
||||||
before_install:
|
before_install:
|
||||||
- export GRIDDIR=`pwd`
|
- export GRIDDIR=`pwd`
|
||||||
@ -16,9 +21,11 @@ before_install:
|
|||||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
|
||||||
|
|
||||||
install:
|
install:
|
||||||
|
- export CWD=`pwd`
|
||||||
|
- echo $CWD
|
||||||
- export CC=$CC$VERSION
|
- export CC=$CC$VERSION
|
||||||
- export CXX=$CXX$VERSION
|
- export CXX=$CXX$VERSION
|
||||||
- echo $PATH
|
- echo $PATH
|
||||||
@ -31,17 +38,24 @@ install:
|
|||||||
- which $CXX
|
- which $CXX
|
||||||
- $CXX --version
|
- $CXX --version
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
|
||||||
|
|
||||||
script:
|
script:
|
||||||
- ./bootstrap.sh
|
- ./bootstrap.sh
|
||||||
- mkdir build
|
- mkdir build
|
||||||
- cd build
|
- cd build
|
||||||
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
- mkdir lime
|
||||||
|
- cd lime
|
||||||
|
- mkdir build
|
||||||
|
- cd build
|
||||||
|
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
|
||||||
|
- tar xf lime-1.3.2.tar.gz
|
||||||
|
- cd lime-1.3.2
|
||||||
|
- ./configure --prefix=$CWD/build/lime/install
|
||||||
- make -j4
|
- make -j4
|
||||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
- make install
|
||||||
- echo make clean
|
- cd $CWD/build
|
||||||
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
- ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
|
||||||
- make -j4
|
- make -j4
|
||||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||||
- make check
|
- make check
|
||||||
|
|
||||||
|
@ -48,6 +48,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/serialisation/Serialisation.h>
|
#include <Grid/serialisation/Serialisation.h>
|
||||||
#include <Grid/threads/Threads.h>
|
#include <Grid/threads/Threads.h>
|
||||||
#include <Grid/util/Util.h>
|
#include <Grid/util/Util.h>
|
||||||
|
#include <Grid/util/Sha.h>
|
||||||
#include <Grid/communicator/Communicator.h>
|
#include <Grid/communicator/Communicator.h>
|
||||||
#include <Grid/cartesian/Cartesian.h>
|
#include <Grid/cartesian/Cartesian.h>
|
||||||
#include <Grid/tensors/Tensors.h>
|
#include <Grid/tensors/Tensors.h>
|
@ -1,4 +1,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
// Force Eigen to use MKL if Grid has been configured with --enable-mkl
|
||||||
|
#ifdef USE_MKL
|
||||||
|
#define EIGEN_USE_MKL_ALL
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined __GNUC__
|
#if defined __GNUC__
|
||||||
#pragma GCC diagnostic push
|
#pragma GCC diagnostic push
|
||||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
@ -21,6 +21,32 @@ if BUILD_HDF5
|
|||||||
extra_headers+=serialisation/Hdf5Type.h
|
extra_headers+=serialisation/Hdf5Type.h
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
all: version-cache
|
||||||
|
|
||||||
|
version-cache:
|
||||||
|
@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
|
||||||
|
a="uncommited changes";\
|
||||||
|
else\
|
||||||
|
a="clean";\
|
||||||
|
fi;\
|
||||||
|
echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
|
||||||
|
if [ -e version-cache ]; then\
|
||||||
|
d=`diff vertmp version-cache`;\
|
||||||
|
if [ "$${d}" != "" ]; then\
|
||||||
|
mv vertmp version-cache;\
|
||||||
|
rm -f Version.h;\
|
||||||
|
fi;\
|
||||||
|
else\
|
||||||
|
mv vertmp version-cache;\
|
||||||
|
rm -f Version.h;\
|
||||||
|
fi;\
|
||||||
|
rm -f vertmp
|
||||||
|
|
||||||
|
Version.h:
|
||||||
|
cp version-cache Version.h
|
||||||
|
|
||||||
|
.PHONY: version-cache
|
||||||
|
|
||||||
#
|
#
|
||||||
# Libraries
|
# Libraries
|
||||||
#
|
#
|
||||||
@ -30,8 +56,8 @@ include Eigen.inc
|
|||||||
lib_LIBRARIES = libGrid.a
|
lib_LIBRARIES = libGrid.a
|
||||||
|
|
||||||
CCFILES += $(extra_sources)
|
CCFILES += $(extra_sources)
|
||||||
HFILES += $(extra_headers)
|
HFILES += $(extra_headers) Config.h Version.h
|
||||||
|
|
||||||
libGrid_a_SOURCES = $(CCFILES)
|
libGrid_a_SOURCES = $(CCFILES)
|
||||||
libGrid_adir = $(pkgincludedir)
|
libGrid_adir = $(includedir)/Grid
|
||||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
|
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)
|
@ -39,6 +39,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||||
#include <Grid/algorithms/approx/Forecast.h>
|
#include <Grid/algorithms/approx/Forecast.h>
|
||||||
|
|
||||||
|
#include <Grid/algorithms/iterative/Deflation.h>
|
||||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||||
#include <Grid/algorithms/iterative/NormalEquations.h>
|
#include <Grid/algorithms/iterative/NormalEquations.h>
|
@ -51,7 +51,7 @@ namespace Grid {
|
|||||||
|
|
||||||
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
||||||
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
||||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
|
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0;
|
||||||
virtual void HermOp(const Field &in, Field &out)=0;
|
virtual void HermOp(const Field &in, Field &out)=0;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -309,36 +309,59 @@ namespace Grid {
|
|||||||
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||||
protected:
|
protected:
|
||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
|
Field tmp;
|
||||||
|
RealD mass;
|
||||||
|
double tMpc;
|
||||||
|
double tIP;
|
||||||
|
double tMeo;
|
||||||
|
double taxpby_norm;
|
||||||
|
uint64_t ncall;
|
||||||
public:
|
public:
|
||||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
|
void Report(void)
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
|
||||||
|
std::cout << GridLogMessage << " HermOpAndNorm.IP "<< tIP /ncall<<" usec "<<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Mpc.MeoMoe "<< tMeo/ncall<<" usec "<<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Mpc.axpby_norm "<< taxpby_norm/ncall<<" usec "<<std::endl;
|
||||||
|
}
|
||||||
|
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||||
|
{
|
||||||
|
assert( _Mat.isTrivialEE() );
|
||||||
|
mass = _Mat.Mass();
|
||||||
|
tMpc=0;
|
||||||
|
tIP =0;
|
||||||
|
tMeo=0;
|
||||||
|
taxpby_norm=0;
|
||||||
|
ncall=0;
|
||||||
|
}
|
||||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
GridLogIterative.TimingMode(1);
|
ncall++;
|
||||||
std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl;
|
tMpc-=usecond();
|
||||||
n2 = Mpc(in,out);
|
n2 = Mpc(in,out);
|
||||||
std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl;
|
tMpc+=usecond();
|
||||||
|
tIP-=usecond();
|
||||||
ComplexD dot= innerProduct(in,out);
|
ComplexD dot= innerProduct(in,out);
|
||||||
std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl;
|
tIP+=usecond();
|
||||||
n1 = real(dot);
|
n1 = real(dot);
|
||||||
}
|
}
|
||||||
virtual void HermOp(const Field &in, Field &out){
|
virtual void HermOp(const Field &in, Field &out){
|
||||||
std::cout << GridLogIterative << " HermOp "<<std::endl;
|
ncall++;
|
||||||
Mpc(in,out);
|
tMpc-=usecond();
|
||||||
|
_Mat.Meooe(in,out);
|
||||||
|
_Mat.Meooe(out,tmp);
|
||||||
|
tMpc+=usecond();
|
||||||
|
taxpby_norm-=usecond();
|
||||||
|
axpby(out,-1.0,mass*mass,tmp,in);
|
||||||
|
taxpby_norm+=usecond();
|
||||||
}
|
}
|
||||||
virtual RealD Mpc (const Field &in, Field &out) {
|
virtual RealD Mpc (const Field &in, Field &out) {
|
||||||
Field tmp(in._grid);
|
tMeo-=usecond();
|
||||||
Field tmp2(in._grid);
|
|
||||||
|
|
||||||
std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
|
|
||||||
_Mat.Mooee(in,out);
|
|
||||||
_Mat.Mooee(out,tmp);
|
|
||||||
std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
|
|
||||||
|
|
||||||
_Mat.Meooe(in,out);
|
_Mat.Meooe(in,out);
|
||||||
_Mat.Meooe(out,tmp2);
|
_Mat.Meooe(out,tmp);
|
||||||
std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl;
|
tMeo+=usecond();
|
||||||
|
taxpby_norm-=usecond();
|
||||||
RealD nn=axpy_norm(out,-1.0,tmp2,tmp);
|
RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
|
||||||
std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl;
|
taxpby_norm+=usecond();
|
||||||
return nn;
|
return nn;
|
||||||
}
|
}
|
||||||
virtual RealD MpcDag (const Field &in, Field &out){
|
virtual RealD MpcDag (const Field &in, Field &out){
|
||||||
@ -357,6 +380,12 @@ namespace Grid {
|
|||||||
template<class Field> class OperatorFunction {
|
template<class Field> class OperatorFunction {
|
||||||
public:
|
public:
|
||||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
|
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
|
||||||
|
virtual void operator() (LinearOperatorBase<Field> &Linop, const std::vector<Field> &in,std::vector<Field> &out) {
|
||||||
|
assert(in.size()==out.size());
|
||||||
|
for(int k=0;k<in.size();k++){
|
||||||
|
(*this)(Linop,in[k],out[k]);
|
||||||
|
}
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Field> class LinearFunction {
|
template<class Field> class LinearFunction {
|
@ -55,6 +55,14 @@ namespace Grid {
|
|||||||
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
|
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
|
||||||
public:
|
public:
|
||||||
virtual GridBase *RedBlackGrid(void)=0;
|
virtual GridBase *RedBlackGrid(void)=0;
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Query the even even properties to make algorithmic decisions
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
virtual RealD Mass(void) { return 0.0; };
|
||||||
|
virtual int ConstEE(void) { return 0; }; // Disable assumptions unless overridden
|
||||||
|
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
|
||||||
|
|
||||||
// half checkerboard operaions
|
// half checkerboard operaions
|
||||||
virtual void Meooe (const Field &in, Field &out)=0;
|
virtual void Meooe (const Field &in, Field &out)=0;
|
||||||
virtual void Mooee (const Field &in, Field &out)=0;
|
virtual void Mooee (const Field &in, Field &out)=0;
|
@ -33,7 +33,7 @@ directory
|
|||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
|
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec };
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Block conjugate gradient. Dimension zero should be the block direction
|
// Block conjugate gradient. Dimension zero should be the block direction
|
||||||
@ -42,7 +42,6 @@ template <class Field>
|
|||||||
class BlockConjugateGradient : public OperatorFunction<Field> {
|
class BlockConjugateGradient : public OperatorFunction<Field> {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
|
||||||
typedef typename Field::scalar_type scomplex;
|
typedef typename Field::scalar_type scomplex;
|
||||||
|
|
||||||
int blockDim ;
|
int blockDim ;
|
||||||
@ -54,21 +53,15 @@ class BlockConjugateGradient : public OperatorFunction<Field> {
|
|||||||
RealD Tolerance;
|
RealD Tolerance;
|
||||||
Integer MaxIterations;
|
Integer MaxIterations;
|
||||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||||
|
Integer PrintInterval; //GridLogMessages or Iterative
|
||||||
|
|
||||||
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
|
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||||
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv)
|
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
|
||||||
{};
|
{};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Thin QR factorisation (google it)
|
// Thin QR factorisation (google it)
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
|
||||||
Eigen::MatrixXcd &C,
|
|
||||||
Eigen::MatrixXcd &Cinv,
|
|
||||||
Field & Q,
|
|
||||||
const Field & R)
|
|
||||||
{
|
|
||||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
//Dimensions
|
//Dimensions
|
||||||
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
|
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
|
||||||
@ -85,22 +78,20 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
|||||||
// Cdag C = Rdag R ; passes.
|
// Cdag C = Rdag R ; passes.
|
||||||
// QdagQ = 1 ; passes
|
// QdagQ = 1 ; passes
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||||
|
Eigen::MatrixXcd &C,
|
||||||
|
Eigen::MatrixXcd &Cinv,
|
||||||
|
Field & Q,
|
||||||
|
const Field & R)
|
||||||
|
{
|
||||||
|
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
||||||
|
|
||||||
// Force manifest hermitian to avoid rounding related
|
// Force manifest hermitian to avoid rounding related
|
||||||
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||||
|
|
||||||
#if 0
|
|
||||||
std::cout << " Calling Cholesky ldlt on m_rr " << m_rr <<std::endl;
|
|
||||||
Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();
|
|
||||||
std::cout << " Called Cholesky ldlt on m_rr " << L_ldlt <<std::endl;
|
|
||||||
auto D_ldlt = m_rr.ldlt().vectorD();
|
|
||||||
std::cout << " Called Cholesky ldlt on m_rr " << D_ldlt <<std::endl;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// std::cout << " Calling Cholesky llt on m_rr " <<std::endl;
|
|
||||||
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||||
// std::cout << " Called Cholesky llt on m_rr " << L <<std::endl;
|
|
||||||
C = L.adjoint();
|
C = L.adjoint();
|
||||||
Cinv = C.inverse();
|
Cinv = C.inverse();
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -112,6 +103,25 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
sliceMulMatrix(Q,Cinv,R,Orthog);
|
sliceMulMatrix(Q,Cinv,R,Orthog);
|
||||||
}
|
}
|
||||||
|
// see comments above
|
||||||
|
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||||
|
Eigen::MatrixXcd &C,
|
||||||
|
Eigen::MatrixXcd &Cinv,
|
||||||
|
std::vector<Field> & Q,
|
||||||
|
const std::vector<Field> & R)
|
||||||
|
{
|
||||||
|
InnerProductMatrix(m_rr,R,R);
|
||||||
|
|
||||||
|
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||||
|
|
||||||
|
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||||
|
|
||||||
|
C = L.adjoint();
|
||||||
|
Cinv = C.inverse();
|
||||||
|
|
||||||
|
MulMatrix(Q,Cinv,R);
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Call one of several implementations
|
// Call one of several implementations
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -119,14 +129,20 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
|||||||
{
|
{
|
||||||
if ( CGtype == BlockCGrQ ) {
|
if ( CGtype == BlockCGrQ ) {
|
||||||
BlockCGrQsolve(Linop,Src,Psi);
|
BlockCGrQsolve(Linop,Src,Psi);
|
||||||
} else if (CGtype == BlockCG ) {
|
|
||||||
BlockCGsolve(Linop,Src,Psi);
|
|
||||||
} else if (CGtype == CGmultiRHS ) {
|
} else if (CGtype == CGmultiRHS ) {
|
||||||
CGmultiRHSsolve(Linop,Src,Psi);
|
CGmultiRHSsolve(Linop,Src,Psi);
|
||||||
} else {
|
} else {
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi)
|
||||||
|
{
|
||||||
|
if ( CGtype == BlockCGrQVec ) {
|
||||||
|
BlockCGrQsolveVec(Linop,Src,Psi);
|
||||||
|
} else {
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// BlockCGrQ implementation:
|
// BlockCGrQ implementation:
|
||||||
@ -139,7 +155,8 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
|||||||
{
|
{
|
||||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||||
Nblock = B._grid->_fdimensions[Orthog];
|
Nblock = B._grid->_fdimensions[Orthog];
|
||||||
|
/* FAKE */
|
||||||
|
Nblock=8;
|
||||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||||
|
|
||||||
X.checkerboard = B.checkerboard;
|
X.checkerboard = B.checkerboard;
|
||||||
@ -202,15 +219,10 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
|||||||
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
|
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
|
||||||
|
|
||||||
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||||
|
|
||||||
Linop.HermOp(X, AD);
|
Linop.HermOp(X, AD);
|
||||||
tmp = B - AD;
|
tmp = B - AD;
|
||||||
//std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
|
|
||||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||||
//std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
|
|
||||||
//std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl;
|
|
||||||
//std::cout << GridLogMessage << " m_C " << m_C<<std::endl;
|
|
||||||
//std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl;
|
|
||||||
D=Q;
|
D=Q;
|
||||||
|
|
||||||
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
|
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
|
||||||
@ -232,14 +244,12 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
|||||||
MatrixTimer.Start();
|
MatrixTimer.Start();
|
||||||
Linop.HermOp(D, Z);
|
Linop.HermOp(D, Z);
|
||||||
MatrixTimer.Stop();
|
MatrixTimer.Stop();
|
||||||
//std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl;
|
|
||||||
|
|
||||||
//4. M = [D^dag Z]^{-1}
|
//4. M = [D^dag Z]^{-1}
|
||||||
sliceInnerTimer.Start();
|
sliceInnerTimer.Start();
|
||||||
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
|
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
|
||||||
sliceInnerTimer.Stop();
|
sliceInnerTimer.Stop();
|
||||||
m_M = m_DZ.inverse();
|
m_M = m_DZ.inverse();
|
||||||
//std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl;
|
|
||||||
|
|
||||||
//5. X = X + D MC
|
//5. X = X + D MC
|
||||||
m_tmp = m_M * m_C;
|
m_tmp = m_M * m_C;
|
||||||
@ -257,6 +267,7 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
|||||||
|
|
||||||
//7. D = Q + D S^dag
|
//7. D = Q + D S^dag
|
||||||
m_tmp = m_S.adjoint();
|
m_tmp = m_S.adjoint();
|
||||||
|
|
||||||
sliceMaddTimer.Start();
|
sliceMaddTimer.Start();
|
||||||
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
|
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
|
||||||
sliceMaddTimer.Stop();
|
sliceMaddTimer.Stop();
|
||||||
@ -317,152 +328,6 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
|||||||
IterationsToComplete = k;
|
IterationsToComplete = k;
|
||||||
}
|
}
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
|
||||||
{
|
|
||||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
|
||||||
Nblock = Src._grid->_fdimensions[Orthog];
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
|
||||||
|
|
||||||
Psi.checkerboard = Src.checkerboard;
|
|
||||||
conformable(Psi, Src);
|
|
||||||
|
|
||||||
Field P(Src);
|
|
||||||
Field AP(Src);
|
|
||||||
Field R(Src);
|
|
||||||
|
|
||||||
Eigen::MatrixXcd m_pAp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
|
||||||
Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
|
||||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
||||||
Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
||||||
|
|
||||||
Eigen::MatrixXcd m_alpha = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
||||||
Eigen::MatrixXcd m_beta = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
||||||
|
|
||||||
// Initial residual computation & set up
|
|
||||||
std::vector<RealD> residuals(Nblock);
|
|
||||||
std::vector<RealD> ssq(Nblock);
|
|
||||||
|
|
||||||
sliceNorm(ssq,Src,Orthog);
|
|
||||||
RealD sssum=0;
|
|
||||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
|
||||||
|
|
||||||
sliceNorm(residuals,Src,Orthog);
|
|
||||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
|
||||||
|
|
||||||
sliceNorm(residuals,Psi,Orthog);
|
|
||||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
|
||||||
|
|
||||||
// Initial search dir is guess
|
|
||||||
Linop.HermOp(Psi, AP);
|
|
||||||
|
|
||||||
|
|
||||||
/************************************************************************
|
|
||||||
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
|
|
||||||
************************************************************************
|
|
||||||
* O'Leary : R = B - A X
|
|
||||||
* O'Leary : P = M R ; preconditioner M = 1
|
|
||||||
* O'Leary : alpha = PAP^{-1} RMR
|
|
||||||
* O'Leary : beta = RMR^{-1}_old RMR_new
|
|
||||||
* O'Leary : X=X+Palpha
|
|
||||||
* O'Leary : R_new=R_old-AP alpha
|
|
||||||
* O'Leary : P=MR_new+P beta
|
|
||||||
*/
|
|
||||||
|
|
||||||
R = Src - AP;
|
|
||||||
P = R;
|
|
||||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
|
||||||
|
|
||||||
GridStopWatch sliceInnerTimer;
|
|
||||||
GridStopWatch sliceMaddTimer;
|
|
||||||
GridStopWatch MatrixTimer;
|
|
||||||
GridStopWatch SolverTimer;
|
|
||||||
SolverTimer.Start();
|
|
||||||
|
|
||||||
int k;
|
|
||||||
for (k = 1; k <= MaxIterations; k++) {
|
|
||||||
|
|
||||||
RealD rrsum=0;
|
|
||||||
for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
|
|
||||||
|
|
||||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
|
||||||
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
|
|
||||||
|
|
||||||
MatrixTimer.Start();
|
|
||||||
Linop.HermOp(P, AP);
|
|
||||||
MatrixTimer.Stop();
|
|
||||||
|
|
||||||
// Alpha
|
|
||||||
sliceInnerTimer.Start();
|
|
||||||
sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
|
|
||||||
sliceInnerTimer.Stop();
|
|
||||||
m_pAp_inv = m_pAp.inverse();
|
|
||||||
m_alpha = m_pAp_inv * m_rr ;
|
|
||||||
|
|
||||||
// Psi, R update
|
|
||||||
sliceMaddTimer.Start();
|
|
||||||
sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog); // add alpha * P to psi
|
|
||||||
sliceMaddMatrix(R ,m_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
|
|
||||||
sliceMaddTimer.Stop();
|
|
||||||
|
|
||||||
// Beta
|
|
||||||
m_rr_inv = m_rr.inverse();
|
|
||||||
sliceInnerTimer.Start();
|
|
||||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
|
||||||
sliceInnerTimer.Stop();
|
|
||||||
m_beta = m_rr_inv *m_rr;
|
|
||||||
|
|
||||||
// Search update
|
|
||||||
sliceMaddTimer.Start();
|
|
||||||
sliceMaddMatrix(AP,m_beta,P,R,Orthog);
|
|
||||||
sliceMaddTimer.Stop();
|
|
||||||
P= AP;
|
|
||||||
|
|
||||||
/*********************
|
|
||||||
* convergence monitor
|
|
||||||
*********************
|
|
||||||
*/
|
|
||||||
RealD max_resid=0;
|
|
||||||
RealD rr;
|
|
||||||
for(int b=0;b<Nblock;b++){
|
|
||||||
rr = real(m_rr(b,b))/ssq[b];
|
|
||||||
if ( rr > max_resid ) max_resid = rr;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( max_resid < Tolerance*Tolerance ) {
|
|
||||||
|
|
||||||
SolverTimer.Stop();
|
|
||||||
|
|
||||||
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
|
|
||||||
for(int b=0;b<Nblock;b++){
|
|
||||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
|
|
||||||
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
|
||||||
}
|
|
||||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
|
||||||
|
|
||||||
Linop.HermOp(Psi, AP);
|
|
||||||
AP = AP-Src;
|
|
||||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
|
||||||
|
|
||||||
IterationsToComplete = k;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
|
|
||||||
|
|
||||||
if (ErrorOnNoConverge) assert(0);
|
|
||||||
IterationsToComplete = k;
|
|
||||||
}
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
// multiRHS conjugate gradient. Dimension zero should be the block direction
|
// multiRHS conjugate gradient. Dimension zero should be the block direction
|
||||||
// Use this for spread out across nodes
|
// Use this for spread out across nodes
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
@ -600,6 +465,233 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
|
|||||||
IterationsToComplete = k;
|
IterationsToComplete = k;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){
|
||||||
|
for(int b=0;b<Nblock;b++){
|
||||||
|
for(int bp=0;bp<Nblock;bp++) {
|
||||||
|
m(b,bp) = innerProduct(X[b],Y[bp]);
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){
|
||||||
|
// Should make this cache friendly with site outermost, parallel_for
|
||||||
|
// Deal with case AP aliases with either Y or X
|
||||||
|
std::vector<Field> tmp(Nblock,X[0]);
|
||||||
|
for(int b=0;b<Nblock;b++){
|
||||||
|
tmp[b] = Y[b];
|
||||||
|
for(int bp=0;bp<Nblock;bp++) {
|
||||||
|
tmp[b] = tmp[b] + (scale*m(bp,b))*X[bp];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int b=0;b<Nblock;b++){
|
||||||
|
AP[b] = tmp[b];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){
|
||||||
|
// Should make this cache friendly with site outermost, parallel_for
|
||||||
|
for(int b=0;b<Nblock;b++){
|
||||||
|
AP[b] = zero;
|
||||||
|
for(int bp=0;bp<Nblock;bp++) {
|
||||||
|
AP[b] += (m(bp,b))*X[bp];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
double normv(const std::vector<Field> &P){
|
||||||
|
double nn = 0.0;
|
||||||
|
for(int b=0;b<Nblock;b++) {
|
||||||
|
nn+=norm2(P[b]);
|
||||||
|
}
|
||||||
|
return nn;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
// BlockCGrQvec implementation:
|
||||||
|
//--------------------------
|
||||||
|
// X is guess/Solution
|
||||||
|
// B is RHS
|
||||||
|
// Solve A X_i = B_i ; i refers to Nblock index
|
||||||
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X)
|
||||||
|
{
|
||||||
|
Nblock = B.size();
|
||||||
|
assert(Nblock == X.size());
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
|
||||||
|
|
||||||
|
for(int b=0;b<Nblock;b++){
|
||||||
|
X[b].checkerboard = B[b].checkerboard;
|
||||||
|
conformable(X[b], B[b]);
|
||||||
|
conformable(X[b], X[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
Field Fake(B[0]);
|
||||||
|
|
||||||
|
std::vector<Field> tmp(Nblock,Fake);
|
||||||
|
std::vector<Field> Q(Nblock,Fake);
|
||||||
|
std::vector<Field> D(Nblock,Fake);
|
||||||
|
std::vector<Field> Z(Nblock,Fake);
|
||||||
|
std::vector<Field> AD(Nblock,Fake);
|
||||||
|
|
||||||
|
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||||
|
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||||
|
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||||
|
|
||||||
|
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||||
|
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||||
|
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||||
|
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||||
|
|
||||||
|
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||||
|
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||||
|
|
||||||
|
// Initial residual computation & set up
|
||||||
|
std::vector<RealD> residuals(Nblock);
|
||||||
|
std::vector<RealD> ssq(Nblock);
|
||||||
|
|
||||||
|
RealD sssum=0;
|
||||||
|
for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);}
|
||||||
|
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||||
|
|
||||||
|
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);}
|
||||||
|
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||||
|
|
||||||
|
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);}
|
||||||
|
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||||
|
|
||||||
|
/************************************************************************
|
||||||
|
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
||||||
|
************************************************************************
|
||||||
|
* Dimensions:
|
||||||
|
*
|
||||||
|
* X,B==(Nferm x Nblock)
|
||||||
|
* A==(Nferm x Nferm)
|
||||||
|
*
|
||||||
|
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
|
||||||
|
*
|
||||||
|
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||||
|
* for k:
|
||||||
|
* Z = AD
|
||||||
|
* M = [D^dag Z]^{-1}
|
||||||
|
* X = X + D MC
|
||||||
|
* QS = Q - ZM
|
||||||
|
* D = Q + D S^dag
|
||||||
|
* C = S C
|
||||||
|
*/
|
||||||
|
///////////////////////////////////////
|
||||||
|
// Initial block: initial search dir is guess
|
||||||
|
///////////////////////////////////////
|
||||||
|
std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl;
|
||||||
|
|
||||||
|
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||||
|
for(int b=0;b<Nblock;b++) {
|
||||||
|
Linop.HermOp(X[b], AD[b]);
|
||||||
|
tmp[b] = B[b] - AD[b];
|
||||||
|
}
|
||||||
|
|
||||||
|
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||||
|
|
||||||
|
for(int b=0;b<Nblock;b++) D[b]=Q[b];
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl;
|
||||||
|
|
||||||
|
///////////////////////////////////////
|
||||||
|
// Timers
|
||||||
|
///////////////////////////////////////
|
||||||
|
GridStopWatch sliceInnerTimer;
|
||||||
|
GridStopWatch sliceMaddTimer;
|
||||||
|
GridStopWatch QRTimer;
|
||||||
|
GridStopWatch MatrixTimer;
|
||||||
|
GridStopWatch SolverTimer;
|
||||||
|
SolverTimer.Start();
|
||||||
|
|
||||||
|
int k;
|
||||||
|
for (k = 1; k <= MaxIterations; k++) {
|
||||||
|
|
||||||
|
//3. Z = AD
|
||||||
|
MatrixTimer.Start();
|
||||||
|
for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);
|
||||||
|
MatrixTimer.Stop();
|
||||||
|
|
||||||
|
//4. M = [D^dag Z]^{-1}
|
||||||
|
sliceInnerTimer.Start();
|
||||||
|
InnerProductMatrix(m_DZ,D,Z);
|
||||||
|
sliceInnerTimer.Stop();
|
||||||
|
m_M = m_DZ.inverse();
|
||||||
|
|
||||||
|
//5. X = X + D MC
|
||||||
|
m_tmp = m_M * m_C;
|
||||||
|
sliceMaddTimer.Start();
|
||||||
|
MaddMatrix(X,m_tmp, D,X);
|
||||||
|
sliceMaddTimer.Stop();
|
||||||
|
|
||||||
|
//6. QS = Q - ZM
|
||||||
|
sliceMaddTimer.Start();
|
||||||
|
MaddMatrix(tmp,m_M,Z,Q,-1.0);
|
||||||
|
sliceMaddTimer.Stop();
|
||||||
|
QRTimer.Start();
|
||||||
|
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
|
||||||
|
QRTimer.Stop();
|
||||||
|
|
||||||
|
//7. D = Q + D S^dag
|
||||||
|
m_tmp = m_S.adjoint();
|
||||||
|
sliceMaddTimer.Start();
|
||||||
|
MaddMatrix(D,m_tmp,D,Q);
|
||||||
|
sliceMaddTimer.Stop();
|
||||||
|
|
||||||
|
//8. C = S C
|
||||||
|
m_C = m_S*m_C;
|
||||||
|
|
||||||
|
/*********************
|
||||||
|
* convergence monitor
|
||||||
|
*********************
|
||||||
|
*/
|
||||||
|
m_rr = m_C.adjoint() * m_C;
|
||||||
|
|
||||||
|
RealD max_resid=0;
|
||||||
|
RealD rrsum=0;
|
||||||
|
RealD rr;
|
||||||
|
|
||||||
|
for(int b=0;b<Nblock;b++) {
|
||||||
|
rrsum+=real(m_rr(b,b));
|
||||||
|
rr = real(m_rr(b,b))/ssq[b];
|
||||||
|
if ( rr > max_resid ) max_resid = rr;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
|
||||||
|
|
||||||
|
if ( max_resid < Tolerance*Tolerance ) {
|
||||||
|
|
||||||
|
SolverTimer.Stop();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
|
||||||
|
|
||||||
|
for(int b=0;b<Nblock;b++){
|
||||||
|
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||||
|
|
||||||
|
for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
|
||||||
|
for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
|
||||||
|
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
|
||||||
|
|
||||||
|
IterationsToComplete = k;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
|
||||||
|
|
||||||
|
if (ErrorOnNoConverge) assert(0);
|
||||||
|
IterationsToComplete = k;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
@ -54,6 +54,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
|||||||
|
|
||||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
||||||
|
|
||||||
|
|
||||||
psi.checkerboard = src.checkerboard;
|
psi.checkerboard = src.checkerboard;
|
||||||
conformable(psi, src);
|
conformable(psi, src);
|
||||||
|
|
||||||
@ -70,7 +71,6 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
|||||||
|
|
||||||
Linop.HermOpAndNorm(psi, mmp, d, b);
|
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||||
|
|
||||||
|
|
||||||
r = src - mmp;
|
r = src - mmp;
|
||||||
p = r;
|
p = r;
|
||||||
|
|
||||||
@ -96,38 +96,44 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
|||||||
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||||
|
|
||||||
GridStopWatch LinalgTimer;
|
GridStopWatch LinalgTimer;
|
||||||
|
GridStopWatch InnerTimer;
|
||||||
|
GridStopWatch AxpyNormTimer;
|
||||||
|
GridStopWatch LinearCombTimer;
|
||||||
GridStopWatch MatrixTimer;
|
GridStopWatch MatrixTimer;
|
||||||
GridStopWatch SolverTimer;
|
GridStopWatch SolverTimer;
|
||||||
|
|
||||||
SolverTimer.Start();
|
SolverTimer.Start();
|
||||||
int k;
|
int k;
|
||||||
for (k = 1; k <= MaxIterations; k++) {
|
for (k = 1; k <= MaxIterations*1000; k++) {
|
||||||
c = cp;
|
c = cp;
|
||||||
|
|
||||||
MatrixTimer.Start();
|
MatrixTimer.Start();
|
||||||
Linop.HermOpAndNorm(p, mmp, d, qq);
|
Linop.HermOp(p, mmp);
|
||||||
MatrixTimer.Stop();
|
MatrixTimer.Stop();
|
||||||
|
|
||||||
LinalgTimer.Start();
|
LinalgTimer.Start();
|
||||||
// RealD qqck = norm2(mmp);
|
|
||||||
// ComplexD dck = innerProduct(p,mmp);
|
|
||||||
|
|
||||||
|
InnerTimer.Start();
|
||||||
|
ComplexD dc = innerProduct(p,mmp);
|
||||||
|
InnerTimer.Stop();
|
||||||
|
d = dc.real();
|
||||||
a = c / d;
|
a = c / d;
|
||||||
b_pred = a * (a * qq - d) / c;
|
|
||||||
|
|
||||||
|
AxpyNormTimer.Start();
|
||||||
cp = axpy_norm(r, -a, mmp, r);
|
cp = axpy_norm(r, -a, mmp, r);
|
||||||
|
AxpyNormTimer.Stop();
|
||||||
b = cp / c;
|
b = cp / c;
|
||||||
|
|
||||||
// Fuse these loops ; should be really easy
|
LinearCombTimer.Start();
|
||||||
psi = a * p + psi;
|
parallel_for(int ss=0;ss<src._grid->oSites();ss++){
|
||||||
p = p * b + r;
|
vstream(psi[ss], a * p[ss] + psi[ss]);
|
||||||
|
vstream(p [ss], b * p[ss] + r[ss]);
|
||||||
|
}
|
||||||
|
LinearCombTimer.Stop();
|
||||||
LinalgTimer.Stop();
|
LinalgTimer.Stop();
|
||||||
|
|
||||||
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||||
<< " residual " << cp << " target " << rsq << std::endl;
|
<< " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
|
||||||
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
|
|
||||||
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
|
|
||||||
|
|
||||||
// Stopping condition
|
// Stopping condition
|
||||||
if (cp <= rsq) {
|
if (cp <= rsq) {
|
||||||
@ -148,6 +154,9 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
|||||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
|
||||||
|
|
||||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||||
|
|
@ -43,6 +43,7 @@ namespace Grid {
|
|||||||
public:
|
public:
|
||||||
RealD Tolerance;
|
RealD Tolerance;
|
||||||
Integer MaxIterations;
|
Integer MaxIterations;
|
||||||
|
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||||
int verbose;
|
int verbose;
|
||||||
MultiShiftFunction shifts;
|
MultiShiftFunction shifts;
|
||||||
|
|
||||||
@ -164,6 +165,15 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
|
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////
|
||||||
|
// Timers
|
||||||
|
///////////////////////////////////////
|
||||||
|
GridStopWatch AXPYTimer;
|
||||||
|
GridStopWatch ShiftTimer;
|
||||||
|
GridStopWatch QRTimer;
|
||||||
|
GridStopWatch MatrixTimer;
|
||||||
|
GridStopWatch SolverTimer;
|
||||||
|
SolverTimer.Start();
|
||||||
|
|
||||||
// Iteration loop
|
// Iteration loop
|
||||||
int k;
|
int k;
|
||||||
@ -171,7 +181,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
for (k=1;k<=MaxIterations;k++){
|
for (k=1;k<=MaxIterations;k++){
|
||||||
|
|
||||||
a = c /cp;
|
a = c /cp;
|
||||||
|
AXPYTimer.Start();
|
||||||
axpy(p,a,p,r);
|
axpy(p,a,p,r);
|
||||||
|
AXPYTimer.Stop();
|
||||||
|
|
||||||
// Note to self - direction ps is iterated seperately
|
// Note to self - direction ps is iterated seperately
|
||||||
// for each shift. Does not appear to have any scope
|
// for each shift. Does not appear to have any scope
|
||||||
@ -180,6 +192,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
// However SAME r is used. Could load "r" and update
|
// However SAME r is used. Could load "r" and update
|
||||||
// ALL ps[s]. 2/3 Bandwidth saving
|
// ALL ps[s]. 2/3 Bandwidth saving
|
||||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||||
|
AXPYTimer.Start();
|
||||||
for(int s=0;s<nshift;s++){
|
for(int s=0;s<nshift;s++){
|
||||||
if ( ! converged[s] ) {
|
if ( ! converged[s] ) {
|
||||||
if (s==0){
|
if (s==0){
|
||||||
@ -190,22 +203,34 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
AXPYTimer.Stop();
|
||||||
|
|
||||||
cp=c;
|
cp=c;
|
||||||
|
MatrixTimer.Start();
|
||||||
|
//Linop.HermOpAndNorm(p,mmp,d,qq); // d is used
|
||||||
|
// The below is faster on KNL
|
||||||
|
Linop.HermOp(p,mmp);
|
||||||
|
d=real(innerProduct(p,mmp));
|
||||||
|
|
||||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
MatrixTimer.Stop();
|
||||||
|
|
||||||
|
AXPYTimer.Start();
|
||||||
axpy(mmp,mass[0],p,mmp);
|
axpy(mmp,mass[0],p,mmp);
|
||||||
|
AXPYTimer.Stop();
|
||||||
RealD rn = norm2(p);
|
RealD rn = norm2(p);
|
||||||
d += rn*mass[0];
|
d += rn*mass[0];
|
||||||
|
|
||||||
bp=b;
|
bp=b;
|
||||||
b=-cp/d;
|
b=-cp/d;
|
||||||
|
|
||||||
|
AXPYTimer.Start();
|
||||||
c=axpy_norm(r,b,mmp,r);
|
c=axpy_norm(r,b,mmp,r);
|
||||||
|
AXPYTimer.Stop();
|
||||||
|
|
||||||
// Toggle the recurrence history
|
// Toggle the recurrence history
|
||||||
bs[0] = b;
|
bs[0] = b;
|
||||||
iz = 1-iz;
|
iz = 1-iz;
|
||||||
|
ShiftTimer.Start();
|
||||||
for(int s=1;s<nshift;s++){
|
for(int s=1;s<nshift;s++){
|
||||||
if((!converged[s])){
|
if((!converged[s])){
|
||||||
RealD z0 = z[s][1-iz];
|
RealD z0 = z[s][1-iz];
|
||||||
@ -215,6 +240,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
ShiftTimer.Stop();
|
||||||
|
|
||||||
for(int s=0;s<nshift;s++){
|
for(int s=0;s<nshift;s++){
|
||||||
int ss = s;
|
int ss = s;
|
||||||
@ -257,6 +283,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
|
|
||||||
if ( all_converged ){
|
if ( all_converged ){
|
||||||
|
|
||||||
|
SolverTimer.Stop();
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
|
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
|
||||||
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
|
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
|
||||||
|
|
||||||
@ -269,8 +298,19 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
RealD cn = norm2(src);
|
RealD cn = norm2(src);
|
||||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tMarix " << MatrixTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
|
||||||
|
|
||||||
|
IterationsToComplete = k;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
// ugly hack
|
// ugly hack
|
||||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
104
Grid/algorithms/iterative/Deflation.h
Normal file
104
Grid/algorithms/iterative/Deflation.h
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef GRID_DEFLATION_H
|
||||||
|
#define GRID_DEFLATION_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class ZeroGuesser: public LinearFunction<Field> {
|
||||||
|
public:
|
||||||
|
virtual void operator()(const Field &src, Field &guess) { guess = zero; };
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class SourceGuesser: public LinearFunction<Field> {
|
||||||
|
public:
|
||||||
|
virtual void operator()(const Field &src, Field &guess) { guess = src; };
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// Fine grid deflation
|
||||||
|
////////////////////////////////
|
||||||
|
template<class Field>
|
||||||
|
class DeflatedGuesser: public LinearFunction<Field> {
|
||||||
|
private:
|
||||||
|
const std::vector<Field> &evec;
|
||||||
|
const std::vector<RealD> &eval;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
|
||||||
|
|
||||||
|
virtual void operator()(const Field &src,Field &guess) {
|
||||||
|
guess = zero;
|
||||||
|
assert(evec.size()==eval.size());
|
||||||
|
auto N = evec.size();
|
||||||
|
for (int i=0;i<N;i++) {
|
||||||
|
const Field& tmp = evec[i];
|
||||||
|
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
||||||
|
}
|
||||||
|
guess.checkerboard = src.checkerboard;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class FineField, class CoarseField>
|
||||||
|
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
|
||||||
|
private:
|
||||||
|
const std::vector<FineField> &subspace;
|
||||||
|
const std::vector<CoarseField> &evec_coarse;
|
||||||
|
const std::vector<RealD> &eval_coarse;
|
||||||
|
public:
|
||||||
|
|
||||||
|
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
|
||||||
|
const std::vector<CoarseField> &_evec_coarse,
|
||||||
|
const std::vector<RealD> &_eval_coarse)
|
||||||
|
: subspace(_subspace),
|
||||||
|
evec_coarse(_evec_coarse),
|
||||||
|
eval_coarse(_eval_coarse)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator()(const FineField &src,FineField &guess) {
|
||||||
|
int N = (int)evec_coarse.size();
|
||||||
|
CoarseField src_coarse(evec_coarse[0]._grid);
|
||||||
|
CoarseField guess_coarse(evec_coarse[0]._grid); guess_coarse = zero;
|
||||||
|
blockProject(src_coarse,src,subspace);
|
||||||
|
for (int i=0;i<N;i++) {
|
||||||
|
const CoarseField & tmp = evec_coarse[i];
|
||||||
|
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
|
||||||
|
}
|
||||||
|
blockPromote(guess_coarse,guess,subspace);
|
||||||
|
guess.checkerboard = src.checkerboard;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@ -57,7 +57,8 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
|
|||||||
|
|
||||||
parallel_region
|
parallel_region
|
||||||
{
|
{
|
||||||
std::vector < vobj > B(Nm); // Thread private
|
|
||||||
|
std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
|
||||||
|
|
||||||
parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
|
parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
|
||||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
for(int j=j0; j<j1; ++j) B[j]=0.;
|
||||||
@ -149,19 +150,6 @@ void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, boo
|
|||||||
basisReorderInPlace(_v,sort_vals,idx);
|
basisReorderInPlace(_v,sort_vals,idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
// PAB: faster to compute the inner products first then fuse loops.
|
|
||||||
// If performance critical can improve.
|
|
||||||
template<class Field>
|
|
||||||
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
|
|
||||||
result = zero;
|
|
||||||
assert(_v.size()==eval.size());
|
|
||||||
int N = (int)_v.size();
|
|
||||||
for (int i=0;i<N;i++) {
|
|
||||||
Field& tmp = _v[i];
|
|
||||||
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Implicitly restarted lanczos
|
// Implicitly restarted lanczos
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
@ -181,6 +169,7 @@ enum IRLdiagonalisation {
|
|||||||
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
|
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
LinearFunction<Field> &_HermOp;
|
LinearFunction<Field> &_HermOp;
|
||||||
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
|
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
|
||||||
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
|
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
|
||||||
@ -243,6 +232,7 @@ class ImplicitlyRestartedLanczos {
|
|||||||
/////////////////////////
|
/////////////////////////
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
// PAB:
|
// PAB:
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
@ -490,15 +480,13 @@ until convergence
|
|||||||
Field B(grid); B.checkerboard = evec[0].checkerboard;
|
Field B(grid); B.checkerboard = evec[0].checkerboard;
|
||||||
|
|
||||||
// power of two search pattern; not every evalue in eval2 is assessed.
|
// power of two search pattern; not every evalue in eval2 is assessed.
|
||||||
|
int allconv =1;
|
||||||
for(int jj = 1; jj<=Nstop; jj*=2){
|
for(int jj = 1; jj<=Nstop; jj*=2){
|
||||||
int j = Nstop-jj;
|
int j = Nstop-jj;
|
||||||
RealD e = eval2_copy[j]; // Discard the evalue
|
RealD e = eval2_copy[j]; // Discard the evalue
|
||||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||||
if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||||
if ( j > Nconv ) {
|
allconv=0;
|
||||||
Nconv=j+1;
|
|
||||||
jj=Nstop; // Terminate the scan
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Do evec[0] for good measure
|
// Do evec[0] for good measure
|
||||||
@ -506,8 +494,10 @@ until convergence
|
|||||||
int j=0;
|
int j=0;
|
||||||
RealD e = eval2_copy[0];
|
RealD e = eval2_copy[0];
|
||||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||||
_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox);
|
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
|
||||||
}
|
}
|
||||||
|
if ( allconv ) Nconv = Nstop;
|
||||||
|
|
||||||
// test if we converged, if so, terminate
|
// test if we converged, if so, terminate
|
||||||
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
|
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
|
||||||
// if( Nconv>=Nstop || beta_k < betastp){
|
// if( Nconv>=Nstop || beta_k < betastp){
|
@ -28,7 +28,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#ifndef GRID_LOCAL_COHERENCE_IRL_H
|
#ifndef GRID_LOCAL_COHERENCE_IRL_H
|
||||||
#define GRID_LOCAL_COHERENCE_IRL_H
|
#define GRID_LOCAL_COHERENCE_IRL_H
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
|
||||||
struct LanczosParams : Serializable {
|
struct LanczosParams : Serializable {
|
||||||
public:
|
public:
|
||||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
|
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
|
||||||
@ -45,6 +48,7 @@ struct LanczosParams : Serializable {
|
|||||||
struct LocalCoherenceLanczosParams : Serializable {
|
struct LocalCoherenceLanczosParams : Serializable {
|
||||||
public:
|
public:
|
||||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
||||||
|
bool, saveEvecs,
|
||||||
bool, doFine,
|
bool, doFine,
|
||||||
bool, doFineRead,
|
bool, doFineRead,
|
||||||
bool, doCoarse,
|
bool, doCoarse,
|
||||||
@ -70,21 +74,24 @@ public:
|
|||||||
typedef Lattice<Fobj> FineField;
|
typedef Lattice<Fobj> FineField;
|
||||||
|
|
||||||
LinearOperatorBase<FineField> &_Linop;
|
LinearOperatorBase<FineField> &_Linop;
|
||||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
std::vector<FineField> &subspace;
|
||||||
|
|
||||||
ProjectedHermOp(LinearOperatorBase<FineField>& linop, Aggregation<Fobj,CComplex,nbasis> &aggregate) :
|
ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) :
|
||||||
_Linop(linop),
|
_Linop(linop), subspace(_subspace)
|
||||||
_Aggregate(aggregate) { };
|
{
|
||||||
|
assert(subspace.size() >0);
|
||||||
|
};
|
||||||
|
|
||||||
void operator()(const CoarseField& in, CoarseField& out) {
|
void operator()(const CoarseField& in, CoarseField& out) {
|
||||||
|
GridBase *FineGrid = subspace[0]._grid;
|
||||||
|
int checkerboard = subspace[0].checkerboard;
|
||||||
|
|
||||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
FineField fin (FineGrid); fin.checkerboard= checkerboard;
|
||||||
FineField fin(FineGrid);
|
FineField fout(FineGrid); fout.checkerboard = checkerboard;
|
||||||
FineField fout(FineGrid);
|
|
||||||
|
|
||||||
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
|
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
|
||||||
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
|
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
|
||||||
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
|
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -99,24 +106,27 @@ public:
|
|||||||
|
|
||||||
OperatorFunction<FineField> & _poly;
|
OperatorFunction<FineField> & _poly;
|
||||||
LinearOperatorBase<FineField> &_Linop;
|
LinearOperatorBase<FineField> &_Linop;
|
||||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
std::vector<FineField> &subspace;
|
||||||
|
|
||||||
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop,
|
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
|
||||||
Aggregation<Fobj,CComplex,nbasis> &aggregate) :
|
LinearOperatorBase<FineField>& linop,
|
||||||
|
std::vector<FineField> & _subspace) :
|
||||||
_poly(poly),
|
_poly(poly),
|
||||||
_Linop(linop),
|
_Linop(linop),
|
||||||
_Aggregate(aggregate) { };
|
subspace(_subspace)
|
||||||
|
{ };
|
||||||
|
|
||||||
void operator()(const CoarseField& in, CoarseField& out) {
|
void operator()(const CoarseField& in, CoarseField& out) {
|
||||||
|
|
||||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
GridBase *FineGrid = subspace[0]._grid;
|
||||||
|
int checkerboard = subspace[0].checkerboard;
|
||||||
|
|
||||||
FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard;
|
FineField fin (FineGrid); fin.checkerboard =checkerboard;
|
||||||
FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard;
|
FineField fout(FineGrid);fout.checkerboard =checkerboard;
|
||||||
|
|
||||||
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
|
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
|
||||||
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
|
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
|
||||||
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
|
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -132,19 +142,23 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc
|
|||||||
LinearFunction<CoarseField> & _Poly;
|
LinearFunction<CoarseField> & _Poly;
|
||||||
OperatorFunction<FineField> & _smoother;
|
OperatorFunction<FineField> & _smoother;
|
||||||
LinearOperatorBase<FineField> &_Linop;
|
LinearOperatorBase<FineField> &_Linop;
|
||||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
|
||||||
RealD _coarse_relax_tol;
|
RealD _coarse_relax_tol;
|
||||||
|
std::vector<FineField> &_subspace;
|
||||||
|
|
||||||
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
|
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
|
||||||
OperatorFunction<FineField> &smoother,
|
OperatorFunction<FineField> &smoother,
|
||||||
LinearOperatorBase<FineField> &Linop,
|
LinearOperatorBase<FineField> &Linop,
|
||||||
Aggregation<Fobj,CComplex,nbasis> &Aggregate,
|
std::vector<FineField> &subspace,
|
||||||
RealD coarse_relax_tol=5.0e3)
|
RealD coarse_relax_tol=5.0e3)
|
||||||
: _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { };
|
: _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
|
||||||
|
_coarse_relax_tol(coarse_relax_tol)
|
||||||
|
{ };
|
||||||
|
|
||||||
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||||
{
|
{
|
||||||
CoarseField v(B);
|
CoarseField v(B);
|
||||||
RealD eval_poly = eval;
|
RealD eval_poly = eval;
|
||||||
|
|
||||||
// Apply operator
|
// Apply operator
|
||||||
_Poly(B,v);
|
_Poly(B,v);
|
||||||
|
|
||||||
@ -168,14 +182,13 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc
|
|||||||
}
|
}
|
||||||
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||||
{
|
{
|
||||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
GridBase *FineGrid = _subspace[0]._grid;
|
||||||
|
int checkerboard = _subspace[0].checkerboard;
|
||||||
int checkerboard = _Aggregate.checkerboard;
|
|
||||||
|
|
||||||
FineField fB(FineGrid);fB.checkerboard =checkerboard;
|
FineField fB(FineGrid);fB.checkerboard =checkerboard;
|
||||||
FineField fv(FineGrid);fv.checkerboard =checkerboard;
|
FineField fv(FineGrid);fv.checkerboard =checkerboard;
|
||||||
|
|
||||||
_Aggregate.PromoteFromSubspace(B,fv);
|
blockPromote(B,fv,_subspace);
|
||||||
|
|
||||||
_smoother(_Linop,fv,fB);
|
_smoother(_Linop,fv,fB);
|
||||||
|
|
||||||
RealD eval_poly = eval;
|
RealD eval_poly = eval;
|
||||||
@ -217,27 +230,67 @@ protected:
|
|||||||
int _checkerboard;
|
int _checkerboard;
|
||||||
LinearOperatorBase<FineField> & _FineOp;
|
LinearOperatorBase<FineField> & _FineOp;
|
||||||
|
|
||||||
// FIXME replace Aggregation with vector of fine; the code reuse is too small for
|
std::vector<RealD> &evals_fine;
|
||||||
// the hassle and complexity of cross coupling.
|
std::vector<RealD> &evals_coarse;
|
||||||
Aggregation<Fobj,CComplex,nbasis> _Aggregate;
|
std::vector<FineField> &subspace;
|
||||||
std::vector<RealD> evals_fine;
|
std::vector<CoarseField> &evec_coarse;
|
||||||
std::vector<RealD> evals_coarse;
|
|
||||||
std::vector<CoarseField> evec_coarse;
|
private:
|
||||||
|
std::vector<RealD> _evals_fine;
|
||||||
|
std::vector<RealD> _evals_coarse;
|
||||||
|
std::vector<FineField> _subspace;
|
||||||
|
std::vector<CoarseField> _evec_coarse;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||||
GridBase *CoarseGrid,
|
GridBase *CoarseGrid,
|
||||||
LinearOperatorBase<FineField> &FineOp,
|
LinearOperatorBase<FineField> &FineOp,
|
||||||
int checkerboard) :
|
int checkerboard) :
|
||||||
_CoarseGrid(CoarseGrid),
|
_CoarseGrid(CoarseGrid),
|
||||||
_FineGrid(FineGrid),
|
_FineGrid(FineGrid),
|
||||||
_Aggregate(CoarseGrid,FineGrid,checkerboard),
|
|
||||||
_FineOp(FineOp),
|
_FineOp(FineOp),
|
||||||
_checkerboard(checkerboard)
|
_checkerboard(checkerboard),
|
||||||
|
evals_fine (_evals_fine),
|
||||||
|
evals_coarse(_evals_coarse),
|
||||||
|
subspace (_subspace),
|
||||||
|
evec_coarse(_evec_coarse)
|
||||||
{
|
{
|
||||||
evals_fine.resize(0);
|
evals_fine.resize(0);
|
||||||
evals_coarse.resize(0);
|
evals_coarse.resize(0);
|
||||||
};
|
};
|
||||||
void Orthogonalise(void ) { _Aggregate.Orthogonalise(); }
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// Alternate constructore, external storage for use by Hadrons module
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||||
|
GridBase *CoarseGrid,
|
||||||
|
LinearOperatorBase<FineField> &FineOp,
|
||||||
|
int checkerboard,
|
||||||
|
std::vector<FineField> &ext_subspace,
|
||||||
|
std::vector<CoarseField> &ext_coarse,
|
||||||
|
std::vector<RealD> &ext_eval_fine,
|
||||||
|
std::vector<RealD> &ext_eval_coarse
|
||||||
|
) :
|
||||||
|
_CoarseGrid(CoarseGrid),
|
||||||
|
_FineGrid(FineGrid),
|
||||||
|
_FineOp(FineOp),
|
||||||
|
_checkerboard(checkerboard),
|
||||||
|
evals_fine (ext_eval_fine),
|
||||||
|
evals_coarse(ext_eval_coarse),
|
||||||
|
subspace (ext_subspace),
|
||||||
|
evec_coarse (ext_coarse)
|
||||||
|
{
|
||||||
|
evals_fine.resize(0);
|
||||||
|
evals_coarse.resize(0);
|
||||||
|
};
|
||||||
|
|
||||||
|
void Orthogonalise(void ) {
|
||||||
|
CoarseScalar InnerProd(_CoarseGrid);
|
||||||
|
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
|
||||||
|
blockOrthogonalise(InnerProd,subspace);
|
||||||
|
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
|
||||||
|
blockOrthogonalise(InnerProd,subspace);
|
||||||
|
};
|
||||||
|
|
||||||
template<typename T> static RealD normalise(T& v)
|
template<typename T> static RealD normalise(T& v)
|
||||||
{
|
{
|
||||||
@ -246,43 +299,44 @@ public:
|
|||||||
v = v * (1.0/nn);
|
v = v * (1.0/nn);
|
||||||
return nn;
|
return nn;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
void fakeFine(void)
|
void fakeFine(void)
|
||||||
{
|
{
|
||||||
int Nk = nbasis;
|
int Nk = nbasis;
|
||||||
_Aggregate.subspace.resize(Nk,_FineGrid);
|
subspace.resize(Nk,_FineGrid);
|
||||||
_Aggregate.subspace[0]=1.0;
|
subspace[0]=1.0;
|
||||||
_Aggregate.subspace[0].checkerboard=_checkerboard;
|
subspace[0].checkerboard=_checkerboard;
|
||||||
normalise(_Aggregate.subspace[0]);
|
normalise(subspace[0]);
|
||||||
PlainHermOp<FineField> Op(_FineOp);
|
PlainHermOp<FineField> Op(_FineOp);
|
||||||
for(int k=1;k<Nk;k++){
|
for(int k=1;k<Nk;k++){
|
||||||
_Aggregate.subspace[k].checkerboard=_checkerboard;
|
subspace[k].checkerboard=_checkerboard;
|
||||||
Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]);
|
Op(subspace[k-1],subspace[k]);
|
||||||
normalise(_Aggregate.subspace[k]);
|
normalise(subspace[k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
void testFine(RealD resid)
|
void testFine(RealD resid)
|
||||||
{
|
{
|
||||||
assert(evals_fine.size() == nbasis);
|
assert(evals_fine.size() == nbasis);
|
||||||
assert(_Aggregate.subspace.size() == nbasis);
|
assert(subspace.size() == nbasis);
|
||||||
PlainHermOp<FineField> Op(_FineOp);
|
PlainHermOp<FineField> Op(_FineOp);
|
||||||
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
|
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
|
||||||
for(int k=0;k<nbasis;k++){
|
for(int k=0;k<nbasis;k++){
|
||||||
assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1);
|
assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
|
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
|
||||||
{
|
{
|
||||||
assert(evals_fine.size() == nbasis);
|
assert(evals_fine.size() == nbasis);
|
||||||
assert(_Aggregate.subspace.size() == nbasis);
|
assert(subspace.size() == nbasis);
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate);
|
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
|
||||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
|
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||||
|
|
||||||
for(int k=0;k<evec_coarse.size();k++){
|
for(int k=0;k<evec_coarse.size();k++){
|
||||||
if ( k < nbasis ) {
|
if ( k < nbasis ) {
|
||||||
@ -302,34 +356,34 @@ public:
|
|||||||
PlainHermOp<FineField> Op(_FineOp);
|
PlainHermOp<FineField> Op(_FineOp);
|
||||||
|
|
||||||
evals_fine.resize(Nm);
|
evals_fine.resize(Nm);
|
||||||
_Aggregate.subspace.resize(Nm,_FineGrid);
|
subspace.resize(Nm,_FineGrid);
|
||||||
|
|
||||||
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
||||||
|
|
||||||
FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
|
FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
|
||||||
|
|
||||||
int Nconv;
|
int Nconv;
|
||||||
IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false);
|
IRL.calc(evals_fine,subspace,src,Nconv,false);
|
||||||
|
|
||||||
// Shrink down to number saved
|
// Shrink down to number saved
|
||||||
assert(Nstop>=nbasis);
|
assert(Nstop>=nbasis);
|
||||||
assert(Nconv>=nbasis);
|
assert(Nconv>=nbasis);
|
||||||
evals_fine.resize(nbasis);
|
evals_fine.resize(nbasis);
|
||||||
_Aggregate.subspace.resize(nbasis,_FineGrid);
|
subspace.resize(nbasis,_FineGrid);
|
||||||
}
|
}
|
||||||
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
|
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
|
||||||
int Nstop, int Nk, int Nm,RealD resid,
|
int Nstop, int Nk, int Nm,RealD resid,
|
||||||
RealD MaxIt, RealD betastp, int MinRes)
|
RealD MaxIt, RealD betastp, int MinRes)
|
||||||
{
|
{
|
||||||
Chebyshev<FineField> Cheby(cheby_op);
|
Chebyshev<FineField> Cheby(cheby_op);
|
||||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_Aggregate);
|
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
|
||||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate);
|
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
|
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||||
|
|
||||||
evals_coarse.resize(Nm);
|
evals_coarse.resize(Nm);
|
||||||
evec_coarse.resize(Nm,_CoarseGrid);
|
evec_coarse.resize(Nm,_CoarseGrid);
|
473
Grid/algorithms/iterative/SchurRedBlack.h
Normal file
473
Grid/algorithms/iterative/SchurRedBlack.h
Normal file
@ -0,0 +1,473 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef GRID_SCHUR_RED_BLACK_H
|
||||||
|
#define GRID_SCHUR_RED_BLACK_H
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Red black Schur decomposition
|
||||||
|
*
|
||||||
|
* M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo)
|
||||||
|
* (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 )
|
||||||
|
* = L D U
|
||||||
|
*
|
||||||
|
* L^-1 = (1 0 )
|
||||||
|
* (-MoeMee^{-1} 1 )
|
||||||
|
* L^{dag} = ( 1 Mee^{-dag} Moe^{dag} )
|
||||||
|
* ( 0 1 )
|
||||||
|
* L^{-d} = ( 1 -Mee^{-dag} Moe^{dag} )
|
||||||
|
* ( 0 1 )
|
||||||
|
*
|
||||||
|
* U^-1 = (1 -Mee^{-1} Meo)
|
||||||
|
* (0 1 )
|
||||||
|
* U^{dag} = ( 1 0)
|
||||||
|
* (Meo^dag Mee^{-dag} 1)
|
||||||
|
* U^{-dag} = ( 1 0)
|
||||||
|
* (-Meo^dag Mee^{-dag} 1)
|
||||||
|
***********************
|
||||||
|
* M psi = eta
|
||||||
|
***********************
|
||||||
|
*Odd
|
||||||
|
* i) D_oo psi_o = L^{-1} eta_o
|
||||||
|
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||||
|
*
|
||||||
|
* Wilson:
|
||||||
|
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
|
||||||
|
* Stag:
|
||||||
|
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
|
||||||
|
*
|
||||||
|
* L^-1 eta_o= (1 0 ) (e
|
||||||
|
* (-MoeMee^{-1} 1 )
|
||||||
|
*
|
||||||
|
*Even
|
||||||
|
* ii) Mee psi_e + Meo psi_o = src_e
|
||||||
|
*
|
||||||
|
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* TODO: Other options:
|
||||||
|
*
|
||||||
|
* a) change checkerboards for Schur e<->o
|
||||||
|
*
|
||||||
|
* Left precon by Moo^-1
|
||||||
|
* b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o
|
||||||
|
* eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
|
||||||
|
*
|
||||||
|
* Right precon by Moo^-1
|
||||||
|
* c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o
|
||||||
|
* eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||||
|
* psi_o = M_oo^-1 phi_o
|
||||||
|
* TODO: Deflation
|
||||||
|
*/
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Use base class to share code
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||||
|
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<class Field> class SchurRedBlackBase {
|
||||||
|
protected:
|
||||||
|
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||||
|
OperatorFunction<Field> & _HermitianRBSolver;
|
||||||
|
int CBfactorise;
|
||||||
|
bool subGuess;
|
||||||
|
public:
|
||||||
|
|
||||||
|
SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
|
||||||
|
_HermitianRBSolver(HermitianRBSolver)
|
||||||
|
{
|
||||||
|
CBfactorise = 0;
|
||||||
|
subtractGuess(initSubGuess);
|
||||||
|
};
|
||||||
|
void subtractGuess(const bool initSubGuess)
|
||||||
|
{
|
||||||
|
subGuess = initSubGuess;
|
||||||
|
}
|
||||||
|
bool isSubtractGuess(void)
|
||||||
|
{
|
||||||
|
return subGuess;
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Shared code
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||||
|
ZeroGuesser<Field> guess;
|
||||||
|
(*this)(_Matrix,in,out,guess);
|
||||||
|
}
|
||||||
|
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out)
|
||||||
|
{
|
||||||
|
ZeroGuesser<Field> guess;
|
||||||
|
(*this)(_Matrix,in,out,guess);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Guesser>
|
||||||
|
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
|
||||||
|
{
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
GridBase *fgrid= _Matrix.Grid();
|
||||||
|
int nblock = in.size();
|
||||||
|
|
||||||
|
std::vector<Field> src_o(nblock,grid);
|
||||||
|
std::vector<Field> sol_o(nblock,grid);
|
||||||
|
|
||||||
|
std::vector<Field> guess_save;
|
||||||
|
|
||||||
|
Field resid(fgrid);
|
||||||
|
Field tmp(grid);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// Prepare RedBlack source
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
for(int b=0;b<nblock;b++){
|
||||||
|
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
|
||||||
|
}
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// Make the guesses
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
if ( subGuess ) guess_save.resize(nblock,grid);
|
||||||
|
|
||||||
|
for(int b=0;b<nblock;b++){
|
||||||
|
guess(src_o[b],sol_o[b]);
|
||||||
|
|
||||||
|
if ( subGuess ) {
|
||||||
|
guess_save[b] = sol_o[b];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
// Call the block solver
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
|
||||||
|
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// A2A boolean behavioural control & reconstruct other checkerboard
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
for(int b=0;b<nblock;b++) {
|
||||||
|
|
||||||
|
if (subGuess) sol_o[b] = sol_o[b] - guess_save[b];
|
||||||
|
|
||||||
|
///////// Needs even source //////////////
|
||||||
|
pickCheckerboard(Even,tmp,in[b]);
|
||||||
|
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////
|
||||||
|
// Check unprec residual if possible
|
||||||
|
/////////////////////////////////////////////////
|
||||||
|
if ( ! subGuess ) {
|
||||||
|
_Matrix.M(out[b],resid);
|
||||||
|
resid = resid-in[b];
|
||||||
|
RealD ns = norm2(in[b]);
|
||||||
|
RealD nr = norm2(resid);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
|
||||||
|
} else {
|
||||||
|
std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class Guesser>
|
||||||
|
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||||
|
|
||||||
|
// FIXME CGdiagonalMee not implemented virtual function
|
||||||
|
// FIXME use CBfactorise to control schur decomp
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
GridBase *fgrid= _Matrix.Grid();
|
||||||
|
|
||||||
|
Field resid(fgrid);
|
||||||
|
Field src_o(grid);
|
||||||
|
Field src_e(grid);
|
||||||
|
Field sol_o(grid);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// RedBlack source
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
RedBlackSource(_Matrix,in,src_e,src_o);
|
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// Construct the guess
|
||||||
|
////////////////////////////////
|
||||||
|
Field tmp(grid);
|
||||||
|
guess(src_o,sol_o);
|
||||||
|
|
||||||
|
Field guess_save(grid);
|
||||||
|
guess_save = sol_o;
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
// Call the red-black solver
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// Fionn A2A boolean behavioural control
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
if (subGuess) sol_o= sol_o-guess_save;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// RedBlack solution needs the even source
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
RedBlackSolution(_Matrix,sol_o,src_e,out);
|
||||||
|
|
||||||
|
// Verify the unprec residual
|
||||||
|
if ( ! subGuess ) {
|
||||||
|
_Matrix.M(out,resid);
|
||||||
|
resid = resid-in;
|
||||||
|
RealD ns = norm2(in);
|
||||||
|
RealD nr = norm2(resid);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
|
||||||
|
} else {
|
||||||
|
std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Override in derived. Not virtual as template methods
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0;
|
||||||
|
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0;
|
||||||
|
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) =0;
|
||||||
|
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)=0;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
|
||||||
|
public:
|
||||||
|
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||||
|
|
||||||
|
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||||
|
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
// Override RedBlack specialisation
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||||
|
{
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
GridBase *fgrid= _Matrix.Grid();
|
||||||
|
|
||||||
|
Field tmp(grid);
|
||||||
|
Field Mtmp(grid);
|
||||||
|
|
||||||
|
pickCheckerboard(Even,src_e,src);
|
||||||
|
pickCheckerboard(Odd ,src_o,src);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// src_o = (source_o - Moe MeeInv source_e)
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||||
|
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||||
|
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||||
|
|
||||||
|
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
|
||||||
|
}
|
||||||
|
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
|
||||||
|
{
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
GridBase *fgrid= _Matrix.Grid();
|
||||||
|
|
||||||
|
Field tmp(grid);
|
||||||
|
Field sol_e(grid);
|
||||||
|
Field src_e(grid);
|
||||||
|
|
||||||
|
src_e = src_e_c; // Const correctness
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||||
|
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||||
|
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||||
|
|
||||||
|
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
|
||||||
|
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||||
|
}
|
||||||
|
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||||
|
{
|
||||||
|
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||||
|
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||||
|
};
|
||||||
|
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||||
|
{
|
||||||
|
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||||
|
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Site diagonal has Mooee on it.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
|
||||||
|
public:
|
||||||
|
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||||
|
|
||||||
|
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||||
|
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess) {};
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
// Override RedBlack specialisation
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||||
|
{
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
GridBase *fgrid= _Matrix.Grid();
|
||||||
|
|
||||||
|
Field tmp(grid);
|
||||||
|
Field Mtmp(grid);
|
||||||
|
|
||||||
|
pickCheckerboard(Even,src_e,src);
|
||||||
|
pickCheckerboard(Odd ,src_o,src);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||||
|
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||||
|
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||||
|
|
||||||
|
// get the right MpcDag
|
||||||
|
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||||
|
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||||
|
|
||||||
|
}
|
||||||
|
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||||
|
{
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
GridBase *fgrid= _Matrix.Grid();
|
||||||
|
|
||||||
|
Field tmp(grid);
|
||||||
|
Field sol_e(grid);
|
||||||
|
Field src_e_i(grid);
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||||
|
src_e_i = src_e-tmp; assert( src_e_i.checkerboard ==Even);
|
||||||
|
_Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.checkerboard ==Even);
|
||||||
|
|
||||||
|
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
|
||||||
|
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||||
|
}
|
||||||
|
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||||
|
{
|
||||||
|
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||||
|
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||||
|
};
|
||||||
|
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||||
|
{
|
||||||
|
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||||
|
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Site diagonal is identity, right preconditioned by Mee^inv
|
||||||
|
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
|
||||||
|
//=> psi = MeeInv phi
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
|
||||||
|
public:
|
||||||
|
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Wrap the usual normal equations Schur trick
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||||
|
: SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess) {};
|
||||||
|
|
||||||
|
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||||
|
{
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
GridBase *fgrid= _Matrix.Grid();
|
||||||
|
|
||||||
|
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||||
|
|
||||||
|
Field tmp(grid);
|
||||||
|
Field Mtmp(grid);
|
||||||
|
|
||||||
|
pickCheckerboard(Even,src_e,src);
|
||||||
|
pickCheckerboard(Odd ,src_o,src);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||||
|
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||||
|
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||||
|
|
||||||
|
// get the right MpcDag
|
||||||
|
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||||
|
{
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
GridBase *fgrid= _Matrix.Grid();
|
||||||
|
|
||||||
|
Field sol_o_i(grid);
|
||||||
|
Field tmp(grid);
|
||||||
|
Field sol_e(grid);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// MooeeInv due to pecond
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
_Matrix.MooeeInv(sol_o,tmp);
|
||||||
|
sol_o_i = tmp;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
_Matrix.Meooe(sol_o_i,tmp); assert( tmp.checkerboard ==Even);
|
||||||
|
tmp = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||||
|
_Matrix.MooeeInv(tmp,sol_e); assert( sol_e.checkerboard ==Even);
|
||||||
|
|
||||||
|
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
|
||||||
|
setCheckerboard(sol,sol_o_i); assert( sol_o_i.checkerboard ==Odd );
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||||
|
{
|
||||||
|
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||||
|
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||||
|
};
|
||||||
|
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||||
|
{
|
||||||
|
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||||
|
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@ -277,7 +277,9 @@ public:
|
|||||||
uint8_t *cp = (uint8_t *)ptr;
|
uint8_t *cp = (uint8_t *)ptr;
|
||||||
if ( ptr ) {
|
if ( ptr ) {
|
||||||
// One touch per 4k page, static OMP loop to catch same loop order
|
// One touch per 4k page, static OMP loop to catch same loop order
|
||||||
|
#ifdef GRID_OMP
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
|
#endif
|
||||||
for(size_type n=0;n<bytes;n+=4096){
|
for(size_type n=0;n<bytes;n+=4096){
|
||||||
cp[n]=0;
|
cp[n]=0;
|
||||||
}
|
}
|
@ -44,11 +44,13 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
|
|||||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||||
if ( !flag ) {
|
if ( !flag ) {
|
||||||
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
|
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
|
||||||
assert (provided == MPI_THREAD_MULTIPLE);
|
//If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
|
||||||
|
if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
|
||||||
|
(nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) )
|
||||||
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
Grid_quiesce_nodes();
|
// Never clean up as done once.
|
||||||
|
|
||||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||||
|
|
||||||
GlobalSharedMemory::Init(communicator_world);
|
GlobalSharedMemory::Init(communicator_world);
|
||||||
@ -85,9 +87,17 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c
|
|||||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
{
|
{
|
||||||
MPI_Comm optimal_comm;
|
MPI_Comm optimal_comm;
|
||||||
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine
|
////////////////////////////////////////////////////
|
||||||
|
// Remap using the shared memory optimising routine
|
||||||
|
// The remap creates a comm which must be freed
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm);
|
||||||
InitFromMPICommunicator(processors,optimal_comm);
|
InitFromMPICommunicator(processors,optimal_comm);
|
||||||
SetCommunicator(optimal_comm);
|
SetCommunicator(optimal_comm);
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// Free the temp communicator
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
MPI_Comm_free(&optimal_comm);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
@ -112,10 +122,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
|||||||
// split the communicator
|
// split the communicator
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// int Nparent = parent._processors ;
|
// int Nparent = parent._processors ;
|
||||||
// std::cout << " splitting from communicator "<<parent.communicator <<std::endl;
|
|
||||||
int Nparent;
|
int Nparent;
|
||||||
MPI_Comm_size(parent.communicator,&Nparent);
|
MPI_Comm_size(parent.communicator,&Nparent);
|
||||||
// std::cout << " Parent size "<<Nparent <<std::endl;
|
|
||||||
|
|
||||||
int childsize=1;
|
int childsize=1;
|
||||||
for(int d=0;d<processors.size();d++) {
|
for(int d=0;d<processors.size();d++) {
|
||||||
@ -124,8 +132,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
|||||||
int Nchild = Nparent/childsize;
|
int Nchild = Nparent/childsize;
|
||||||
assert (childsize * Nchild == Nparent);
|
assert (childsize * Nchild == Nparent);
|
||||||
|
|
||||||
// std::cout << " child size "<<childsize <<std::endl;
|
|
||||||
|
|
||||||
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
|
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
|
||||||
std::vector<int> scoor(_ndimension); // coor of split within parent
|
std::vector<int> scoor(_ndimension); // coor of split within parent
|
||||||
std::vector<int> ssize(_ndimension); // coor of split within parent
|
std::vector<int> ssize(_ndimension); // coor of split within parent
|
||||||
@ -183,8 +189,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
srank = 0;
|
srank = 0;
|
||||||
comm_split = parent.communicator;
|
int ierr = MPI_Comm_dup (parent.communicator,&comm_split);
|
||||||
// std::cout << " Inherited communicator " <<comm_split <<std::endl;
|
assert(ierr==0);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -197,6 +203,11 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
SetCommunicator(comm_split);
|
SetCommunicator(comm_split);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
// Free the temp communicator
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
MPI_Comm_free(&comm_split);
|
||||||
|
|
||||||
if(0){
|
if(0){
|
||||||
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
||||||
for(int d=0;d<processors.size();d++){
|
for(int d=0;d<processors.size();d++){
|
||||||
@ -210,6 +221,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
|||||||
|
|
||||||
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
|
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
|
||||||
{
|
{
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
// Creates communicator, and the communicator_halo
|
||||||
|
////////////////////////////////////////////////////
|
||||||
_ndimension = processors.size();
|
_ndimension = processors.size();
|
||||||
_processor_coor.resize(_ndimension);
|
_processor_coor.resize(_ndimension);
|
||||||
|
|
@ -133,6 +133,7 @@ class SharedMemory
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
SharedMemory() {};
|
SharedMemory() {};
|
||||||
|
~SharedMemory();
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
// set the buffers & sizes
|
// set the buffers & sizes
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
@ -27,6 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
#include <Grid/GridCore.h>
|
#include <Grid/GridCore.h>
|
||||||
|
#include <pwd.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -113,19 +114,150 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
|||||||
assert(WorldNode!=-1);
|
assert(WorldNode!=-1);
|
||||||
_ShmSetup=1;
|
_ShmSetup=1;
|
||||||
}
|
}
|
||||||
|
// Gray encode support
|
||||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
int BinaryToGray (int binary) {
|
||||||
|
int gray = (binary>>1)^binary;
|
||||||
|
return gray;
|
||||||
|
}
|
||||||
|
int Log2Size(int TwoToPower,int MAXLOG2)
|
||||||
{
|
{
|
||||||
////////////////////////////////////////////////////////////////
|
|
||||||
// Assert power of two shm_size.
|
|
||||||
////////////////////////////////////////////////////////////////
|
|
||||||
int log2size = -1;
|
int log2size = -1;
|
||||||
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
|
for(int i=0;i<=MAXLOG2;i++){
|
||||||
if ( (0x1<<i) == WorldShmSize ) {
|
if ( (0x1<<i) == TwoToPower ) {
|
||||||
log2size = i;
|
log2size = i;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return log2size;
|
||||||
|
}
|
||||||
|
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||||
|
{
|
||||||
|
#ifdef HYPERCUBE
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Assert power of two shm_size.
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||||
|
assert(log2size != -1);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Identify the hypercube coordinate of this node using hostname
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits
|
||||||
|
// i runs 0..7 3 bits
|
||||||
|
// r runs 0..3 2 bits
|
||||||
|
// 2^10 = 1024 nodes
|
||||||
|
const int maxhdim = 10;
|
||||||
|
std::vector<int> HyperCubeCoords(maxhdim,0);
|
||||||
|
std::vector<int> RootHyperCubeCoords(maxhdim,0);
|
||||||
|
int R;
|
||||||
|
int I;
|
||||||
|
int N;
|
||||||
|
const int namelen = _POSIX_HOST_NAME_MAX;
|
||||||
|
char name[namelen];
|
||||||
|
|
||||||
|
// Parse ICE-XA hostname to get hypercube location
|
||||||
|
gethostname(name,namelen);
|
||||||
|
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
|
||||||
|
assert(nscan==3);
|
||||||
|
|
||||||
|
int nlo = N%9;
|
||||||
|
int nhi = N/9;
|
||||||
|
uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
|
||||||
|
uint32_t rootcoor = hypercoor;
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////
|
||||||
|
// Print debug info
|
||||||
|
//////////////////////////////////////////////////////////////////
|
||||||
|
for(int d=0;d<maxhdim;d++){
|
||||||
|
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string hname(name);
|
||||||
|
std::cout << "hostname "<<hname<<std::endl;
|
||||||
|
std::cout << "R " << R << " I " << I << " N "<< N
|
||||||
|
<< " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////
|
||||||
|
// broadcast node 0's base coordinate for this partition.
|
||||||
|
//////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm);
|
||||||
|
hypercoor=hypercoor-rootcoor;
|
||||||
|
assert(hypercoor<WorldSize);
|
||||||
|
assert(hypercoor>=0);
|
||||||
|
|
||||||
|
//////////////////////////////////////
|
||||||
|
// Printing
|
||||||
|
//////////////////////////////////////
|
||||||
|
for(int d=0;d<maxhdim;d++){
|
||||||
|
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Identify subblock of ranks on node spreading across dims
|
||||||
|
// in a maximally symmetrical way
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int ndimension = processors.size();
|
||||||
|
std::vector<int> processor_coor(ndimension);
|
||||||
|
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
|
||||||
|
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
|
||||||
|
std::vector<int> HyperCoor(ndimension);
|
||||||
|
int dim = 0;
|
||||||
|
for(int l2=0;l2<log2size;l2++){
|
||||||
|
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
|
||||||
|
ShmDims[dim]*=2;
|
||||||
|
dim=(dim+1)%ndimension;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Establish torus of processes and nodes with sub-blockings
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
for(int d=0;d<ndimension;d++){
|
||||||
|
NodeDims[d] = WorldDims[d]/ShmDims[d];
|
||||||
|
}
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Map Hcube according to physical lattice
|
||||||
|
// must partition. Loop over dims and find out who would join.
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int hcoor = hypercoor;
|
||||||
|
for(int d=0;d<ndimension;d++){
|
||||||
|
int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
|
||||||
|
int msk = (0x1<<bits)-1;
|
||||||
|
HyperCoor[d]=hcoor & msk;
|
||||||
|
HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
|
||||||
|
hcoor = hcoor >> bits;
|
||||||
|
}
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Check processor counts match
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int Nprocessors=1;
|
||||||
|
for(int i=0;i<ndimension;i++){
|
||||||
|
Nprocessors*=processors[i];
|
||||||
|
}
|
||||||
|
assert(WorldSize==Nprocessors);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Establish mapping between lexico physics coord and WorldRank
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int rank;
|
||||||
|
|
||||||
|
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
|
||||||
|
|
||||||
|
for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
|
||||||
|
|
||||||
|
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
|
||||||
|
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||||
|
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////
|
||||||
|
// Build the new communicator
|
||||||
|
/////////////////////////////////////////////////////////////////
|
||||||
|
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||||
|
assert(ierr==0);
|
||||||
|
#else
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Assert power of two shm_size.
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||||
assert(log2size != -1);
|
assert(log2size != -1);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
@ -174,7 +306,69 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
|
|||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// SHMGET
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#ifdef GRID_MPI3_SHMGET
|
||||||
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
|
{
|
||||||
|
std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
|
||||||
|
assert(_ShmSetup==1);
|
||||||
|
assert(_ShmAlloc==0);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// allocate the shared windows for our group
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Barrier(WorldShmComm);
|
||||||
|
WorldShmCommBufs.resize(WorldShmSize);
|
||||||
|
std::vector<int> shmids(WorldShmSize);
|
||||||
|
|
||||||
|
if ( WorldShmRank == 0 ) {
|
||||||
|
for(int r=0;r<WorldShmSize;r++){
|
||||||
|
size_t size = bytes;
|
||||||
|
key_t key = IPC_PRIVATE;
|
||||||
|
int flags = IPC_CREAT | SHM_R | SHM_W;
|
||||||
|
#ifdef SHM_HUGETLB
|
||||||
|
if (Hugepages) flags|=SHM_HUGETLB;
|
||||||
|
#endif
|
||||||
|
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
|
||||||
|
int errsv = errno;
|
||||||
|
printf("Errno %d\n",errsv);
|
||||||
|
printf("key %d\n",key);
|
||||||
|
printf("size %lld\n",size);
|
||||||
|
printf("flags %d\n",flags);
|
||||||
|
perror("shmget");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MPI_Barrier(WorldShmComm);
|
||||||
|
MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
|
||||||
|
MPI_Barrier(WorldShmComm);
|
||||||
|
|
||||||
|
for(int r=0;r<WorldShmSize;r++){
|
||||||
|
WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
|
||||||
|
if (WorldShmCommBufs[r] == (uint64_t *)-1) {
|
||||||
|
perror("Shared memory attach failure");
|
||||||
|
shmctl(shmids[r], IPC_RMID, NULL);
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MPI_Barrier(WorldShmComm);
|
||||||
|
///////////////////////////////////
|
||||||
|
// Mark for clean up
|
||||||
|
///////////////////////////////////
|
||||||
|
for(int r=0;r<WorldShmSize;r++){
|
||||||
|
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
|
||||||
|
}
|
||||||
|
MPI_Barrier(WorldShmComm);
|
||||||
|
|
||||||
|
_ShmAlloc=1;
|
||||||
|
_ShmAllocBytes = bytes;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Hugetlbfs mapping intended
|
// Hugetlbfs mapping intended
|
||||||
@ -182,6 +376,7 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
|
|||||||
#ifdef GRID_MPI3_SHMMMAP
|
#ifdef GRID_MPI3_SHMMMAP
|
||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
{
|
{
|
||||||
|
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
|
||||||
assert(_ShmSetup==1);
|
assert(_ShmSetup==1);
|
||||||
assert(_ShmAlloc==0);
|
assert(_ShmAlloc==0);
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -191,7 +386,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
WorldShmCommBufs.resize(WorldShmSize);
|
WorldShmCommBufs.resize(WorldShmSize);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Hugetlbf and others map filesystems as mappable huge pages
|
// Hugetlbfs and others map filesystems as mappable huge pages
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
char shm_name [NAME_MAX];
|
char shm_name [NAME_MAX];
|
||||||
for(int r=0;r<WorldShmSize;r++){
|
for(int r=0;r<WorldShmSize;r++){
|
||||||
@ -218,6 +413,49 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
assert(((uint64_t)ptr&0x3F)==0);
|
assert(((uint64_t)ptr&0x3F)==0);
|
||||||
close(fd);
|
close(fd);
|
||||||
WorldShmCommBufs[r] =ptr;
|
WorldShmCommBufs[r] =ptr;
|
||||||
|
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||||
|
}
|
||||||
|
_ShmAlloc=1;
|
||||||
|
_ShmAllocBytes = bytes;
|
||||||
|
};
|
||||||
|
#endif // MMAP
|
||||||
|
|
||||||
|
#ifdef GRID_MPI3_SHM_NONE
|
||||||
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
|
{
|
||||||
|
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
|
||||||
|
assert(_ShmSetup==1);
|
||||||
|
assert(_ShmAlloc==0);
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// allocate the shared windows for our group
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Barrier(WorldShmComm);
|
||||||
|
WorldShmCommBufs.resize(WorldShmSize);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Hugetlbf and others map filesystems as mappable huge pages
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
char shm_name [NAME_MAX];
|
||||||
|
assert(WorldShmSize == 1);
|
||||||
|
for(int r=0;r<WorldShmSize;r++){
|
||||||
|
|
||||||
|
int fd=-1;
|
||||||
|
int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
|
||||||
|
#ifdef MAP_POPULATE
|
||||||
|
mmap_flag|=MAP_POPULATE;
|
||||||
|
#endif
|
||||||
|
#ifdef MAP_HUGETLB
|
||||||
|
if ( flags ) mmap_flag |= MAP_HUGETLB;
|
||||||
|
#endif
|
||||||
|
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
|
||||||
|
if ( ptr == (void *)MAP_FAILED ) {
|
||||||
|
printf("mmap %s failed\n",shm_name);
|
||||||
|
perror("failed mmap"); assert(0);
|
||||||
|
}
|
||||||
|
assert(((uint64_t)ptr&0x3F)==0);
|
||||||
|
close(fd);
|
||||||
|
WorldShmCommBufs[r] =ptr;
|
||||||
|
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||||
}
|
}
|
||||||
_ShmAlloc=1;
|
_ShmAlloc=1;
|
||||||
_ShmAllocBytes = bytes;
|
_ShmAllocBytes = bytes;
|
||||||
@ -232,6 +470,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
{
|
{
|
||||||
|
std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
|
||||||
assert(_ShmSetup==1);
|
assert(_ShmSetup==1);
|
||||||
assert(_ShmAlloc==0);
|
assert(_ShmAlloc==0);
|
||||||
MPI_Barrier(WorldShmComm);
|
MPI_Barrier(WorldShmComm);
|
||||||
@ -243,7 +482,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
|
|
||||||
size_t size = bytes;
|
size_t size = bytes;
|
||||||
|
|
||||||
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
|
struct passwd *pw = getpwuid (getuid());
|
||||||
|
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||||
|
|
||||||
shm_unlink(shm_name);
|
shm_unlink(shm_name);
|
||||||
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
|
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
|
||||||
@ -259,7 +499,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
#endif
|
#endif
|
||||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
|
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
|
||||||
|
|
||||||
if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
|
||||||
|
if ( ptr == (void * )MAP_FAILED ) {
|
||||||
|
perror("failed mmap");
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
assert(((uint64_t)ptr&0x3F)==0);
|
assert(((uint64_t)ptr&0x3F)==0);
|
||||||
|
|
||||||
WorldShmCommBufs[r] =ptr;
|
WorldShmCommBufs[r] =ptr;
|
||||||
@ -274,7 +518,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
|
|
||||||
size_t size = bytes ;
|
size_t size = bytes ;
|
||||||
|
|
||||||
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
|
struct passwd *pw = getpwuid (getuid());
|
||||||
|
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||||
|
|
||||||
int fd=shm_open(shm_name,O_RDWR,0666);
|
int fd=shm_open(shm_name,O_RDWR,0666);
|
||||||
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
|
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
|
||||||
@ -292,6 +537,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Global shared functionality finished
|
// Global shared functionality finished
|
||||||
// Now move to per communicator functionality
|
// Now move to per communicator functionality
|
||||||
@ -318,11 +566,12 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
|||||||
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
||||||
for(int r=0;r<ShmSize;r++){
|
for(int r=0;r<ShmSize;r++){
|
||||||
|
|
||||||
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ;
|
uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
|
||||||
|
|
||||||
MPI_Allreduce(MPI_IN_PLACE,&sr,1,MPI_UINT32_T,MPI_SUM,comm);
|
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
|
||||||
|
|
||||||
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[sr];
|
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
|
||||||
|
// std::cout << "SetCommunicator ShmCommBufs ["<< r<< "] = "<< ShmCommBufs[r]<< " wsr = "<<wsr<<std::endl;
|
||||||
}
|
}
|
||||||
ShmBufferFreeAll();
|
ShmBufferFreeAll();
|
||||||
|
|
||||||
@ -391,5 +640,12 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
|||||||
return (void *) remote;
|
return (void *) remote;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
SharedMemory::~SharedMemory()
|
||||||
|
{
|
||||||
|
int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised);
|
||||||
|
if ( !MPI_is_finalised ) {
|
||||||
|
MPI_Comm_free(&ShmComm);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
@ -122,5 +122,7 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
|||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
SharedMemory::~SharedMemory()
|
||||||
|
{};
|
||||||
|
|
||||||
}
|
}
|
@ -45,31 +45,33 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
|||||||
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
int ent = 0;
|
||||||
|
|
||||||
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
|
|
||||||
int stride=rhs._grid->_slice_stride[dimension];
|
int stride=rhs._grid->_slice_stride[dimension];
|
||||||
if ( cbmask == 0x3 ) {
|
if ( cbmask == 0x3 ) {
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*stride;
|
int o = n*stride;
|
||||||
int bo = n*e2;
|
int bo = n*e2;
|
||||||
buffer[off+bo+b]=rhs._odata[so+o+b];
|
table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int bo=0;
|
int bo=0;
|
||||||
std::vector<std::pair<int,int> > table;
|
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*stride;
|
int o = n*stride;
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||||
if ( ocb &cbmask ) {
|
if ( ocb &cbmask ) {
|
||||||
table.push_back(std::pair<int,int> (bo++,o+b));
|
table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
parallel_for(int i=0;i<table.size();i++){
|
|
||||||
buffer[off+table[i].first]=rhs._odata[so+table[i].second];
|
|
||||||
}
|
}
|
||||||
|
parallel_for(int i=0;i<ent;i++){
|
||||||
|
buffer[table[i].first]=rhs._odata[table[i].second];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -141,31 +143,35 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
|||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
int stride=rhs._grid->_slice_stride[dimension];
|
int stride=rhs._grid->_slice_stride[dimension];
|
||||||
|
|
||||||
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
|
int ent =0;
|
||||||
|
|
||||||
if ( cbmask ==0x3 ) {
|
if ( cbmask ==0x3 ) {
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*rhs._grid->_slice_stride[dimension];
|
int o =n*rhs._grid->_slice_stride[dimension];
|
||||||
int bo =n*rhs._grid->_slice_block[dimension];
|
int bo =n*rhs._grid->_slice_block[dimension];
|
||||||
rhs._odata[so+o+b]=buffer[bo+b];
|
table[ent++] = std::pair<int,int>(so+o+b,bo+b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
std::vector<std::pair<int,int> > table;
|
|
||||||
int bo=0;
|
int bo=0;
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*rhs._grid->_slice_stride[dimension];
|
int o =n*rhs._grid->_slice_stride[dimension];
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||||
if ( ocb & cbmask ) {
|
if ( ocb & cbmask ) {
|
||||||
table.push_back(std::pair<int,int> (so+o+b,bo++));
|
table[ent++]=std::pair<int,int> (so+o+b,bo++);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
parallel_for(int i=0;i<table.size();i++){
|
}
|
||||||
// std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
|
|
||||||
|
parallel_for(int i=0;i<ent;i++){
|
||||||
rhs._odata[table[i].first]=buffer[table[i].second];
|
rhs._odata[table[i].first]=buffer[table[i].second];
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
@ -228,29 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
|||||||
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
int stride = rhs._grid->_slice_stride[dimension];
|
int stride = rhs._grid->_slice_stride[dimension];
|
||||||
if(cbmask == 0x3 ){
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
int ent=0;
|
||||||
for(int b=0;b<e2;b++){
|
|
||||||
|
|
||||||
|
if(cbmask == 0x3 ){
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*stride+b;
|
int o =n*stride+b;
|
||||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o =n*stride+b;
|
int o =n*stride+b;
|
||||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
||||||
if ( ocb&cbmask ) {
|
if ( ocb&cbmask ) {
|
||||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parallel_for(int i=0;i<ent;i++){
|
||||||
|
lhs._odata[table[i].first]=rhs._odata[table[i].second];
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
||||||
@ -269,16 +278,28 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
|
|||||||
int e2=rhs._grid->_slice_block [dimension];
|
int e2=rhs._grid->_slice_block [dimension];
|
||||||
int stride = rhs._grid->_slice_stride[dimension];
|
int stride = rhs._grid->_slice_stride[dimension];
|
||||||
|
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
for(int b=0;b<e2;b++){
|
int ent=0;
|
||||||
|
|
||||||
|
double t_tab,t_perm;
|
||||||
|
if ( cbmask == 0x3 ) {
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
|
int o =n*stride;
|
||||||
|
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||||
|
}}
|
||||||
|
} else {
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*stride;
|
int o =n*stride;
|
||||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
||||||
if ( ocb&cbmask ) {
|
if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||||
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
parallel_for(int i=0;i<ent;i++){
|
||||||
|
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
@ -291,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
|||||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||||
|
|
||||||
|
double t_local;
|
||||||
|
|
||||||
if ( sshift[0] == sshift[1] ) {
|
if ( sshift[0] == sshift[1] ) {
|
||||||
Cshift_local(ret,rhs,dimension,shift,0x3);
|
Cshift_local(ret,rhs,dimension,shift,0x3);
|
||||||
} else {
|
} else {
|
||||||
@ -299,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||||
{
|
{
|
||||||
GridBase *grid = rhs._grid;
|
GridBase *grid = rhs._grid;
|
||||||
int fd = grid->_fdimensions[dimension];
|
int fd = grid->_fdimensions[dimension];
|
||||||
@ -326,10 +349,6 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
|||||||
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||||
int sx = (x+sshift)%rd;
|
int sx = (x+sshift)%rd;
|
||||||
|
|
||||||
// FIXME : This must change where we have a
|
|
||||||
// Rotate slice.
|
|
||||||
|
|
||||||
// Document how this works ; why didn't I do this when I first wrote it...
|
|
||||||
// wrap is whether sshift > rd.
|
// wrap is whether sshift > rd.
|
||||||
// num is sshift mod rd.
|
// num is sshift mod rd.
|
||||||
//
|
//
|
||||||
@ -366,9 +385,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
|||||||
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
||||||
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
@ -54,13 +54,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
|||||||
|
|
||||||
|
|
||||||
if ( !comm_dim ) {
|
if ( !comm_dim ) {
|
||||||
// std::cout << "Cshift_local" <<std::endl;
|
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
|
||||||
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
||||||
} else if ( splice_dim ) {
|
} else if ( splice_dim ) {
|
||||||
// std::cout << "Cshift_comms_simd" <<std::endl;
|
//std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
|
||||||
Cshift_comms_simd(ret,rhs,dimension,shift);
|
Cshift_comms_simd(ret,rhs,dimension,shift);
|
||||||
} else {
|
} else {
|
||||||
// std::cout << "Cshift_comms" <<std::endl;
|
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
|
||||||
Cshift_comms(ret,rhs,dimension,shift);
|
Cshift_comms(ret,rhs,dimension,shift);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@ -91,9 +91,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vob
|
|||||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||||
|
|
||||||
|
//std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||||
if ( sshift[0] == sshift[1] ) {
|
if ( sshift[0] == sshift[1] ) {
|
||||||
|
//std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
|
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
|
||||||
} else {
|
} else {
|
||||||
|
//std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||||
}
|
}
|
||||||
@ -175,6 +178,10 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
int simd_layout = grid->_simd_layout[dimension];
|
int simd_layout = grid->_simd_layout[dimension];
|
||||||
int comm_dim = grid->_processors[dimension] >1 ;
|
int comm_dim = grid->_processors[dimension] >1 ;
|
||||||
|
|
||||||
|
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
|
||||||
|
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
|
||||||
|
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
|
||||||
|
|
||||||
assert(comm_dim==1);
|
assert(comm_dim==1);
|
||||||
assert(simd_layout==2);
|
assert(simd_layout==2);
|
||||||
assert(shift>=0);
|
assert(shift>=0);
|
File diff suppressed because it is too large
Load Diff
@ -244,19 +244,11 @@ namespace Grid {
|
|||||||
|
|
||||||
template<class sobj,class vobj> strong_inline
|
template<class sobj,class vobj> strong_inline
|
||||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||||
ret.checkerboard = x.checkerboard;
|
return axpy_norm_fast(ret,a,x,y);
|
||||||
conformable(ret,x);
|
|
||||||
conformable(x,y);
|
|
||||||
axpy(ret,a,x,y);
|
|
||||||
return norm2(ret);
|
|
||||||
}
|
}
|
||||||
template<class sobj,class vobj> strong_inline
|
template<class sobj,class vobj> strong_inline
|
||||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||||
ret.checkerboard = x.checkerboard;
|
return axpby_norm_fast(ret,a,b,x,y);
|
||||||
conformable(ret,x);
|
|
||||||
conformable(x,y);
|
|
||||||
axpby(ret,a,b,x,y);
|
|
||||||
return norm2(ret); // FIXME implement parallel norm in ss loop
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -257,7 +257,40 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Lattice(Lattice&& r){ // move constructor
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata=std::move(r._odata);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
||||||
|
{
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata =std::move(r._odata);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata.resize(_grid->oSites());// essential
|
||||||
|
|
||||||
|
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
|
_odata[ss]=r._odata[ss];
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||||
|
this->checkerboard = r.checkerboard;
|
||||||
|
conformable(*this,r);
|
||||||
|
|
||||||
|
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
|
this->_odata[ss]=r._odata[ss];
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
virtual ~Lattice(void) = default;
|
virtual ~Lattice(void) = default;
|
||||||
|
|
||||||
@ -277,15 +310,6 @@ public:
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
|
||||||
this->checkerboard = r.checkerboard;
|
|
||||||
conformable(*this,r);
|
|
||||||
|
|
||||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
|
||||||
this->_odata[ss]=r._odata[ss];
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||||
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
@ -179,7 +179,7 @@ namespace Grid {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DECLARE_RELATIONAL(op,functor) \
|
#define DECLARE_RELATIONAL_EQ(op,functor) \
|
||||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||||
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
|
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
|
||||||
{\
|
{\
|
||||||
@ -198,11 +198,6 @@ namespace Grid {
|
|||||||
typedef typename vsimd::scalar_type scalar;\
|
typedef typename vsimd::scalar_type scalar;\
|
||||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||||
}\
|
}\
|
||||||
template<class vsimd>\
|
|
||||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
|
||||||
{ \
|
|
||||||
return lhs._internal op rhs._internal; \
|
|
||||||
} \
|
|
||||||
template<class vsimd>\
|
template<class vsimd>\
|
||||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||||
{ \
|
{ \
|
||||||
@ -212,14 +207,21 @@ namespace Grid {
|
|||||||
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||||
{ \
|
{ \
|
||||||
return lhs op rhs._internal; \
|
return lhs op rhs._internal; \
|
||||||
}
|
} \
|
||||||
|
|
||||||
|
#define DECLARE_RELATIONAL(op,functor) \
|
||||||
|
DECLARE_RELATIONAL_EQ(op,functor) \
|
||||||
|
template<class vsimd>\
|
||||||
|
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||||
|
{ \
|
||||||
|
return lhs._internal op rhs._internal; \
|
||||||
|
}
|
||||||
|
|
||||||
DECLARE_RELATIONAL(<,slt);
|
DECLARE_RELATIONAL(<,slt);
|
||||||
DECLARE_RELATIONAL(<=,sle);
|
DECLARE_RELATIONAL(<=,sle);
|
||||||
DECLARE_RELATIONAL(>,sgt);
|
DECLARE_RELATIONAL(>,sgt);
|
||||||
DECLARE_RELATIONAL(>=,sge);
|
DECLARE_RELATIONAL(>=,sge);
|
||||||
DECLARE_RELATIONAL(==,seq);
|
DECLARE_RELATIONAL_EQ(==,seq);
|
||||||
DECLARE_RELATIONAL(!=,sne);
|
DECLARE_RELATIONAL(!=,sne);
|
||||||
|
|
||||||
#undef DECLARE_RELATIONAL
|
#undef DECLARE_RELATIONAL
|
@ -52,23 +52,5 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// LatticeCoordinate();
|
|
||||||
// FIXME for debug; deprecate this; made obscelete by
|
|
||||||
template<class vobj> void lex_sites(Lattice<vobj> &l){
|
|
||||||
Real *v_ptr = (Real *)&l._odata[0];
|
|
||||||
size_t o_len = l._grid->oSites();
|
|
||||||
size_t v_len = sizeof(vobj)/sizeof(vRealF);
|
|
||||||
size_t vec_len = vRealF::Nsimd();
|
|
||||||
|
|
||||||
for(int i=0;i<o_len;i++){
|
|
||||||
for(int j=0;j<v_len;j++){
|
|
||||||
for(int vv=0;vv<vec_len;vv+=2){
|
|
||||||
v_ptr[i*v_len*vec_len+j*vec_len+vv ]= i+vv*500;
|
|
||||||
v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
@ -33,7 +33,7 @@ namespace Grid {
|
|||||||
// Deterministic Reduction operations
|
// Deterministic Reduction operations
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||||
ComplexD nrm = innerProduct(arg,arg);
|
auto nrm = innerProduct(arg,arg);
|
||||||
return std::real(nrm);
|
return std::real(nrm);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -43,32 +43,85 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
|||||||
{
|
{
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
typedef typename vobj::vector_typeD vector_type;
|
typedef typename vobj::vector_typeD vector_type;
|
||||||
scalar_type nrm;
|
|
||||||
|
|
||||||
GridBase *grid = left._grid;
|
GridBase *grid = left._grid;
|
||||||
|
const int pad = 8;
|
||||||
|
|
||||||
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
|
ComplexD inner;
|
||||||
|
Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
|
||||||
|
|
||||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||||
int nwork, mywork, myoff;
|
int nwork, mywork, myoff;
|
||||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||||
|
|
||||||
decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
|
decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
|
||||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||||
vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]);
|
vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
|
||||||
}
|
}
|
||||||
sumarray[thr]=TensorRemove(vnrm) ;
|
// All threads sum across SIMD; reduce serial work at end
|
||||||
|
// one write per cacheline with streaming store
|
||||||
|
ComplexD tmp = Reduce(TensorRemove(vinner)) ;
|
||||||
|
vstream(sumarray[thr*pad],tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
vector_type vvnrm; vvnrm=zero; // sum across threads
|
inner=0.0;
|
||||||
for(int i=0;i<grid->SumArraySize();i++){
|
for(int i=0;i<grid->SumArraySize();i++){
|
||||||
vvnrm = vvnrm+sumarray[i];
|
inner = inner+sumarray[i*pad];
|
||||||
}
|
}
|
||||||
nrm = Reduce(vvnrm);// sum across simd
|
right._grid->GlobalSum(inner);
|
||||||
right._grid->GlobalSum(nrm);
|
return inner;
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////
|
||||||
|
// Fast axpby_norm
|
||||||
|
// z = a x + b y
|
||||||
|
// return norm z
|
||||||
|
/////////////////////////
|
||||||
|
template<class sobj,class vobj> strong_inline RealD
|
||||||
|
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||||
|
{
|
||||||
|
sobj one(1.0);
|
||||||
|
return axpby_norm_fast(z,a,one,x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class sobj,class vobj> strong_inline RealD
|
||||||
|
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||||
|
{
|
||||||
|
const int pad = 8;
|
||||||
|
z.checkerboard = x.checkerboard;
|
||||||
|
conformable(z,x);
|
||||||
|
conformable(x,y);
|
||||||
|
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
typedef typename vobj::vector_typeD vector_type;
|
||||||
|
RealD nrm;
|
||||||
|
|
||||||
|
GridBase *grid = x._grid;
|
||||||
|
|
||||||
|
Vector<RealD> sumarray(grid->SumArraySize()*pad);
|
||||||
|
|
||||||
|
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||||
|
int nwork, mywork, myoff;
|
||||||
|
GridThread::GetWork(x._grid->oSites(),thr,mywork,myoff);
|
||||||
|
|
||||||
|
// private to thread; sub summation
|
||||||
|
decltype(innerProductD(z._odata[0],z._odata[0])) vnrm=zero;
|
||||||
|
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||||
|
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
|
||||||
|
vnrm = vnrm + innerProductD(tmp,tmp);
|
||||||
|
vstream(z._odata[ss],tmp);
|
||||||
|
}
|
||||||
|
vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
|
||||||
|
}
|
||||||
|
|
||||||
|
nrm = 0.0; // sum across threads; linear in thread count but fast
|
||||||
|
for(int i=0;i<grid->SumArraySize();i++){
|
||||||
|
nrm = nrm+sumarray[i*pad];
|
||||||
|
}
|
||||||
|
z._grid->GlobalSum(nrm);
|
||||||
return nrm;
|
return nrm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class Op,class T1>
|
template<class Op,class T1>
|
||||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
|
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
|
||||||
@ -221,6 +274,115 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||||
|
{
|
||||||
|
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
|
||||||
|
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
std::vector<scalar_type> lsSum;
|
||||||
|
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
|
||||||
|
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
|
||||||
|
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class vobj>
|
||||||
|
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||||
|
{
|
||||||
|
// std::cout << GridLogMessage << "Start prep" << std::endl;
|
||||||
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
GridBase *grid = lhs._grid;
|
||||||
|
assert(grid!=NULL);
|
||||||
|
conformable(grid,rhs._grid);
|
||||||
|
|
||||||
|
const int Nd = grid->_ndimension;
|
||||||
|
const int Nsimd = grid->Nsimd();
|
||||||
|
|
||||||
|
assert(orthogdim >= 0);
|
||||||
|
assert(orthogdim < Nd);
|
||||||
|
|
||||||
|
int fd=grid->_fdimensions[orthogdim];
|
||||||
|
int ld=grid->_ldimensions[orthogdim];
|
||||||
|
int rd=grid->_rdimensions[orthogdim];
|
||||||
|
// std::cout << GridLogMessage << "Start alloc" << std::endl;
|
||||||
|
|
||||||
|
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
|
||||||
|
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||||
|
std::vector<iScalar<scalar_type>> extracted(Nsimd); // splitting the SIMD
|
||||||
|
// std::cout << GridLogMessage << "End alloc" << std::endl;
|
||||||
|
|
||||||
|
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||||
|
for(int r=0;r<rd;r++){
|
||||||
|
lvSum[r]=zero;
|
||||||
|
}
|
||||||
|
|
||||||
|
int e1= grid->_slice_nblock[orthogdim];
|
||||||
|
int e2= grid->_slice_block [orthogdim];
|
||||||
|
int stride=grid->_slice_stride[orthogdim];
|
||||||
|
// std::cout << GridLogMessage << "End prep" << std::endl;
|
||||||
|
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
|
||||||
|
vector_type vv;
|
||||||
|
parallel_for(int r=0;r<rd;r++)
|
||||||
|
{
|
||||||
|
|
||||||
|
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||||
|
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
|
int ss = so + n * stride + b;
|
||||||
|
vv = TensorRemove(innerProduct(lhs._odata[ss], rhs._odata[ss]));
|
||||||
|
lvSum[r] = lvSum[r] + vv;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
|
||||||
|
|
||||||
|
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||||
|
std::vector<int> icoor(Nd);
|
||||||
|
for(int rt=0;rt<rd;rt++){
|
||||||
|
|
||||||
|
iScalar<vector_type> temp;
|
||||||
|
temp._internal = lvSum[rt];
|
||||||
|
extract(temp,extracted);
|
||||||
|
|
||||||
|
for(int idx=0;idx<Nsimd;idx++){
|
||||||
|
|
||||||
|
grid->iCoorFromIindex(icoor,idx);
|
||||||
|
|
||||||
|
int ldx =rt+icoor[orthogdim]*rd;
|
||||||
|
|
||||||
|
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
|
||||||
|
}
|
||||||
|
template <class vobj>
|
||||||
|
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
GridBase *grid = lhs._grid;
|
||||||
|
int fd = result.size();
|
||||||
|
int ld = lsSum.size();
|
||||||
|
// sum over nodes.
|
||||||
|
std::vector<scalar_type> gsum;
|
||||||
|
gsum.resize(fd, scalar_type(0.0));
|
||||||
|
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
|
||||||
|
for(int t=0;t<fd;t++){
|
||||||
|
int pt = t/ld; // processor plane
|
||||||
|
int lt = t%ld;
|
||||||
|
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||||
|
gsum[t]=lsSum[lt];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
|
||||||
|
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
|
||||||
|
grid->GlobalSumVector(&gsum[0], fd);
|
||||||
|
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
|
||||||
|
|
||||||
|
result = gsum;
|
||||||
|
}
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||||
{
|
{
|
@ -158,10 +158,19 @@ namespace Grid {
|
|||||||
// tens of seconds per trajectory so this is clean in all reasonable cases,
|
// tens of seconds per trajectory so this is clean in all reasonable cases,
|
||||||
// and margin of safety is orders of magnitude.
|
// and margin of safety is orders of magnitude.
|
||||||
// We could hack Sitmo to skip in the higher order words of state if necessary
|
// We could hack Sitmo to skip in the higher order words of state if necessary
|
||||||
|
//
|
||||||
|
// Replace with 2^30 ; avoid problem on large volumes
|
||||||
|
//
|
||||||
/////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////
|
||||||
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
|
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
|
||||||
|
const int shift = 30;
|
||||||
|
|
||||||
uint64_t skip = site;
|
uint64_t skip = site;
|
||||||
skip = skip<<40;
|
|
||||||
|
skip = skip<<shift;
|
||||||
|
|
||||||
|
assert((skip >> shift)==site); // check for overflow
|
||||||
|
|
||||||
eng.discard(skip);
|
eng.discard(skip);
|
||||||
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
|
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
|
||||||
}
|
}
|
||||||
@ -308,6 +317,19 @@ namespace Grid {
|
|||||||
std::seed_seq src(seeds.begin(),seeds.end());
|
std::seed_seq src(seeds.begin(),seeds.end());
|
||||||
Seed(src,0);
|
Seed(src,0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SeedUniqueString(const std::string &s){
|
||||||
|
std::vector<int> seeds;
|
||||||
|
std::stringstream sha;
|
||||||
|
seeds = GridChecksum::sha256_seeds(s);
|
||||||
|
for(int i=0;i<seeds.size();i++) {
|
||||||
|
sha << std::hex << seeds[i];
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "Intialising serial RNG with unique string '"
|
||||||
|
<< s << "'" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
|
||||||
|
SeedFixedIntegers(seeds);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class GridParallelRNG : public GridRNGbase {
|
class GridParallelRNG : public GridRNGbase {
|
||||||
@ -368,6 +390,14 @@ namespace Grid {
|
|||||||
_time_counter += usecond()- inner_time_counter;
|
_time_counter += usecond()- inner_time_counter;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void SeedUniqueString(const std::string &s){
|
||||||
|
std::vector<int> seeds;
|
||||||
|
seeds = GridChecksum::sha256_seeds(s);
|
||||||
|
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
|
||||||
|
<< s << "'" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
|
||||||
|
SeedFixedIntegers(seeds);
|
||||||
|
}
|
||||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||||
|
|
||||||
// Everyone generates the same seed_seq based on input seeds
|
// Everyone generates the same seed_seq based on input seeds
|
@ -464,9 +464,11 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
|||||||
assert(orthog>=0);
|
assert(orthog>=0);
|
||||||
|
|
||||||
for(int d=0;d<nh;d++){
|
for(int d=0;d<nh;d++){
|
||||||
|
if ( d!=orthog ) {
|
||||||
assert(lg->_processors[d] == hg->_processors[d]);
|
assert(lg->_processors[d] == hg->_processors[d]);
|
||||||
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
parallel_for(int idx=0;idx<lg->lSites();idx++){
|
parallel_for(int idx=0;idx<lg->lSites();idx++){
|
||||||
@ -485,7 +487,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
|||||||
|
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||||
{
|
{
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
@ -499,9 +501,11 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slic
|
|||||||
assert(orthog>=0);
|
assert(orthog>=0);
|
||||||
|
|
||||||
for(int d=0;d<nh;d++){
|
for(int d=0;d<nh;d++){
|
||||||
|
if ( d!=orthog ) {
|
||||||
assert(lg->_processors[d] == hg->_processors[d]);
|
assert(lg->_processors[d] == hg->_processors[d]);
|
||||||
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
parallel_for(int idx=0;idx<lg->lSites();idx++){
|
parallel_for(int idx=0;idx<lg->lSites();idx++){
|
||||||
@ -599,6 +603,51 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
|||||||
extract1(in_vobj, out_ptrs, 0);
|
extract1(in_vobj, out_ptrs, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename vobj, typename sobj>
|
||||||
|
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type
|
||||||
|
unvectorizeToRevLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||||
|
{
|
||||||
|
|
||||||
|
typedef typename vobj::vector_type vtype;
|
||||||
|
|
||||||
|
GridBase* in_grid = in._grid;
|
||||||
|
out.resize(in_grid->lSites());
|
||||||
|
|
||||||
|
int ndim = in_grid->Nd();
|
||||||
|
int in_nsimd = vtype::Nsimd();
|
||||||
|
|
||||||
|
std::vector<std::vector<int> > in_icoor(in_nsimd);
|
||||||
|
|
||||||
|
for(int lane=0; lane < in_nsimd; lane++){
|
||||||
|
in_icoor[lane].resize(ndim);
|
||||||
|
in_grid->iCoorFromIindex(in_icoor[lane], lane);
|
||||||
|
}
|
||||||
|
|
||||||
|
parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
|
||||||
|
//Assemble vector of pointers to output elements
|
||||||
|
std::vector<sobj*> out_ptrs(in_nsimd);
|
||||||
|
|
||||||
|
std::vector<int> in_ocoor(ndim);
|
||||||
|
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
|
||||||
|
|
||||||
|
std::vector<int> lcoor(in_grid->Nd());
|
||||||
|
|
||||||
|
for(int lane=0; lane < in_nsimd; lane++){
|
||||||
|
for(int mu=0;mu<ndim;mu++)
|
||||||
|
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
|
||||||
|
|
||||||
|
int lex;
|
||||||
|
Lexicographic::IndexFromCoorReversed(lcoor, lex, in_grid->_ldimensions);
|
||||||
|
out_ptrs[lane] = &out[lex];
|
||||||
|
}
|
||||||
|
|
||||||
|
//Unpack into those ptrs
|
||||||
|
const vobj & in_vobj = in._odata[in_oidx];
|
||||||
|
extract1(in_vobj, out_ptrs, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
|
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
|
||||||
template<typename vobj, typename sobj>
|
template<typename vobj, typename sobj>
|
||||||
typename std::enable_if<isSIMDvectorized<vobj>::value
|
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||||
@ -648,10 +697,59 @@ vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename vobj, typename sobj>
|
||||||
|
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||||
|
&& !isSIMDvectorized<sobj>::value, void>::type
|
||||||
|
vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||||
|
{
|
||||||
|
|
||||||
|
typedef typename vobj::vector_type vtype;
|
||||||
|
|
||||||
|
GridBase* grid = out._grid;
|
||||||
|
assert(in.size()==grid->lSites());
|
||||||
|
|
||||||
|
int ndim = grid->Nd();
|
||||||
|
int nsimd = vtype::Nsimd();
|
||||||
|
|
||||||
|
std::vector<std::vector<int> > icoor(nsimd);
|
||||||
|
|
||||||
|
for(int lane=0; lane < nsimd; lane++){
|
||||||
|
icoor[lane].resize(ndim);
|
||||||
|
grid->iCoorFromIindex(icoor[lane],lane);
|
||||||
|
}
|
||||||
|
|
||||||
|
parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index
|
||||||
|
//Assemble vector of pointers to output elements
|
||||||
|
std::vector<sobj*> ptrs(nsimd);
|
||||||
|
|
||||||
|
std::vector<int> ocoor(ndim);
|
||||||
|
grid->oCoorFromOindex(ocoor, oidx);
|
||||||
|
|
||||||
|
std::vector<int> lcoor(grid->Nd());
|
||||||
|
|
||||||
|
for(int lane=0; lane < nsimd; lane++){
|
||||||
|
|
||||||
|
for(int mu=0;mu<ndim;mu++){
|
||||||
|
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
|
||||||
|
}
|
||||||
|
|
||||||
|
int lex;
|
||||||
|
Lexicographic::IndexFromCoorReversed(lcoor, lex, grid->_ldimensions);
|
||||||
|
ptrs[lane] = &in[lex];
|
||||||
|
}
|
||||||
|
|
||||||
|
//pack from those ptrs
|
||||||
|
vobj vecobj;
|
||||||
|
merge1(vecobj, ptrs, 0);
|
||||||
|
out._odata[oidx] = vecobj;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//Convert a Lattice from one precision to another
|
//Convert a Lattice from one precision to another
|
||||||
template<class VobjOut, class VobjIn>
|
template<class VobjOut, class VobjIn>
|
||||||
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||||
assert(out._grid->Nd() == in._grid->Nd());
|
assert(out._grid->Nd() == in._grid->Nd());
|
||||||
|
assert(out._grid->FullDimensions() == in._grid->FullDimensions());
|
||||||
out.checkerboard = in.checkerboard;
|
out.checkerboard = in.checkerboard;
|
||||||
GridBase *in_grid=in._grid;
|
GridBase *in_grid=in._grid;
|
||||||
GridBase *out_grid = out._grid;
|
GridBase *out_grid = out._grid;
|
@ -86,7 +86,7 @@ protected:
|
|||||||
Colours &Painter;
|
Colours &Painter;
|
||||||
int active;
|
int active;
|
||||||
int timing_mode;
|
int timing_mode;
|
||||||
int topWidth{-1};
|
int topWidth{-1}, chanWidth{-1};
|
||||||
static int timestamp;
|
static int timestamp;
|
||||||
std::string name, topName;
|
std::string name, topName;
|
||||||
std::string COLOUR;
|
std::string COLOUR;
|
||||||
@ -126,6 +126,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
void setTopWidth(const int w) {topWidth = w;}
|
void setTopWidth(const int w) {topWidth = w;}
|
||||||
|
void setChanWidth(const int w) {chanWidth = w;}
|
||||||
|
|
||||||
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||||
|
|
||||||
@ -136,13 +137,20 @@ public:
|
|||||||
stream << std::setw(log.topWidth);
|
stream << std::setw(log.topWidth);
|
||||||
}
|
}
|
||||||
stream << log.topName << log.background()<< " : ";
|
stream << log.topName << log.background()<< " : ";
|
||||||
stream << log.colour() << std::left << log.name << log.background() << " : ";
|
stream << log.colour() << std::left;
|
||||||
|
if (log.chanWidth > 0)
|
||||||
|
{
|
||||||
|
stream << std::setw(log.chanWidth);
|
||||||
|
}
|
||||||
|
stream << log.name << log.background() << " : ";
|
||||||
if ( log.timestamp ) {
|
if ( log.timestamp ) {
|
||||||
log.StopWatch->Stop();
|
log.StopWatch->Stop();
|
||||||
GridTime now = log.StopWatch->Elapsed();
|
GridTime now = log.StopWatch->Elapsed();
|
||||||
|
|
||||||
if ( log.timing_mode==1 ) log.StopWatch->Reset();
|
if ( log.timing_mode==1 ) log.StopWatch->Reset();
|
||||||
log.StopWatch->Start();
|
log.StopWatch->Start();
|
||||||
stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ;
|
stream << log.evidence()
|
||||||
|
<< now << log.background() << " : " ;
|
||||||
}
|
}
|
||||||
stream << log.colour();
|
stream << log.colour();
|
||||||
return stream;
|
return stream;
|
3
Grid/parallelIO/BinaryIO.cc
Normal file
3
Grid/parallelIO/BinaryIO.cc
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#include <Grid/GridCore.h>
|
||||||
|
|
||||||
|
int Grid::BinaryIO::latticeWriteMaxRetry = -1;
|
@ -81,6 +81,7 @@ inline void removeWhitespace(std::string &key)
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
class BinaryIO {
|
class BinaryIO {
|
||||||
public:
|
public:
|
||||||
|
static int latticeWriteMaxRetry;
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
// more byte manipulation helpers
|
// more byte manipulation helpers
|
||||||
@ -91,7 +92,7 @@ class BinaryIO {
|
|||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
GridBase *grid = lat._grid;
|
GridBase *grid = lat._grid;
|
||||||
int lsites = grid->lSites();
|
uint64_t lsites = grid->lSites();
|
||||||
|
|
||||||
std::vector<sobj> scalardata(lsites);
|
std::vector<sobj> scalardata(lsites);
|
||||||
unvectorizeToLexOrdArray(scalardata,lat);
|
unvectorizeToLexOrdArray(scalardata,lat);
|
||||||
@ -110,11 +111,11 @@ class BinaryIO {
|
|||||||
lsites = 1;
|
lsites = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp parallel
|
PARALLEL_REGION
|
||||||
{
|
{
|
||||||
uint32_t nersc_csum_thr = 0;
|
uint32_t nersc_csum_thr = 0;
|
||||||
|
|
||||||
#pragma omp for
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
for (uint64_t local_site = 0; local_site < lsites; local_site++)
|
for (uint64_t local_site = 0; local_site < lsites; local_site++)
|
||||||
{
|
{
|
||||||
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
||||||
@ -124,7 +125,7 @@ class BinaryIO {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp critical
|
PARALLEL_CRITICAL
|
||||||
{
|
{
|
||||||
nersc_csum += nersc_csum_thr;
|
nersc_csum += nersc_csum_thr;
|
||||||
}
|
}
|
||||||
@ -146,21 +147,23 @@ class BinaryIO {
|
|||||||
std::vector<int> local_start =grid->LocalStarts();
|
std::vector<int> local_start =grid->LocalStarts();
|
||||||
std::vector<int> global_vol =grid->FullDimensions();
|
std::vector<int> global_vol =grid->FullDimensions();
|
||||||
|
|
||||||
#pragma omp parallel
|
PARALLEL_REGION
|
||||||
{
|
{
|
||||||
std::vector<int> coor(nd);
|
std::vector<int> coor(nd);
|
||||||
uint32_t scidac_csuma_thr=0;
|
uint32_t scidac_csuma_thr=0;
|
||||||
uint32_t scidac_csumb_thr=0;
|
uint32_t scidac_csumb_thr=0;
|
||||||
uint32_t site_crc=0;
|
uint32_t site_crc=0;
|
||||||
|
|
||||||
#pragma omp for
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
for(uint64_t local_site=0;local_site<lsites;local_site++){
|
for(uint64_t local_site=0;local_site<lsites;local_site++){
|
||||||
|
|
||||||
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Scidac csum is rather more heavyweight
|
* Scidac csum is rather more heavyweight
|
||||||
|
* FIXME -- 128^3 x 256 x 16 will overflow.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int global_site;
|
int global_site;
|
||||||
|
|
||||||
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
|
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
|
||||||
@ -181,7 +184,7 @@ class BinaryIO {
|
|||||||
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp critical
|
PARALLEL_CRITICAL
|
||||||
{
|
{
|
||||||
scidac_csuma^= scidac_csuma_thr;
|
scidac_csuma^= scidac_csuma_thr;
|
||||||
scidac_csumb^= scidac_csumb_thr;
|
scidac_csumb^= scidac_csumb_thr;
|
||||||
@ -261,7 +264,7 @@ class BinaryIO {
|
|||||||
GridBase *grid,
|
GridBase *grid,
|
||||||
std::vector<fobj> &iodata,
|
std::vector<fobj> &iodata,
|
||||||
std::string file,
|
std::string file,
|
||||||
Integer offset,
|
uint64_t& offset,
|
||||||
const std::string &format, int control,
|
const std::string &format, int control,
|
||||||
uint32_t &nersc_csum,
|
uint32_t &nersc_csum,
|
||||||
uint32_t &scidac_csuma,
|
uint32_t &scidac_csuma,
|
||||||
@ -368,7 +371,7 @@ class BinaryIO {
|
|||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
|
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
|
||||||
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
|
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
|
||||||
std::ifstream fin;
|
std::ifstream fin;
|
||||||
fin.open(file, std::ios::binary | std::ios::in);
|
fin.open(file, std::ios::binary | std::ios::in);
|
||||||
if (control & BINARYIO_MASTER_APPEND)
|
if (control & BINARYIO_MASTER_APPEND)
|
||||||
@ -429,14 +432,20 @@ class BinaryIO {
|
|||||||
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
|
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogDebug << "MPI read I/O set view " << file << std::endl;
|
std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
|
||||||
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
|
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
|
||||||
assert(ierr == 0);
|
assert(ierr == 0);
|
||||||
|
|
||||||
std::cout << GridLogDebug << "MPI read I/O write all " << file << std::endl;
|
std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
|
||||||
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
|
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
|
||||||
assert(ierr == 0);
|
assert(ierr == 0);
|
||||||
|
|
||||||
|
MPI_Offset os;
|
||||||
|
MPI_File_get_position(fh, &os);
|
||||||
|
MPI_File_get_byte_offset(fh, os, &disp);
|
||||||
|
offset = disp;
|
||||||
|
|
||||||
|
|
||||||
MPI_File_close(&fh);
|
MPI_File_close(&fh);
|
||||||
MPI_Type_free(&fileArray);
|
MPI_Type_free(&fileArray);
|
||||||
MPI_Type_free(&localArray);
|
MPI_Type_free(&localArray);
|
||||||
@ -446,16 +455,20 @@ class BinaryIO {
|
|||||||
} else {
|
} else {
|
||||||
|
|
||||||
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
|
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
|
||||||
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
|
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
|
||||||
|
|
||||||
std::ofstream fout;
|
std::ofstream fout;
|
||||||
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
|
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
|
||||||
try {
|
try {
|
||||||
|
if (offset) { // Must already exist and contain data
|
||||||
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
|
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
|
||||||
|
} else { // Allow create
|
||||||
|
fout.open(file,std::ios::binary|std::ios::out);
|
||||||
|
}
|
||||||
} catch (const std::fstream::failure& exc) {
|
} catch (const std::fstream::failure& exc) {
|
||||||
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
|
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
|
||||||
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
|
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
|
||||||
std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
|
// std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
|
||||||
#ifdef USE_MPI_IO
|
#ifdef USE_MPI_IO
|
||||||
MPI_Abort(MPI_COMM_WORLD,1);
|
MPI_Abort(MPI_COMM_WORLD,1);
|
||||||
#else
|
#else
|
||||||
@ -489,6 +502,7 @@ class BinaryIO {
|
|||||||
exit(1);
|
exit(1);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
offset = fout.tellp();
|
||||||
fout.close();
|
fout.close();
|
||||||
}
|
}
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
@ -523,7 +537,7 @@ class BinaryIO {
|
|||||||
static inline void readLatticeObject(Lattice<vobj> &Umu,
|
static inline void readLatticeObject(Lattice<vobj> &Umu,
|
||||||
std::string file,
|
std::string file,
|
||||||
munger munge,
|
munger munge,
|
||||||
Integer offset,
|
uint64_t offset,
|
||||||
const std::string &format,
|
const std::string &format,
|
||||||
uint32_t &nersc_csum,
|
uint32_t &nersc_csum,
|
||||||
uint32_t &scidac_csuma,
|
uint32_t &scidac_csuma,
|
||||||
@ -533,7 +547,7 @@ class BinaryIO {
|
|||||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||||
|
|
||||||
GridBase *grid = Umu._grid;
|
GridBase *grid = Umu._grid;
|
||||||
int lsites = grid->lSites();
|
uint64_t lsites = grid->lSites();
|
||||||
|
|
||||||
std::vector<sobj> scalardata(lsites);
|
std::vector<sobj> scalardata(lsites);
|
||||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||||
@ -544,7 +558,7 @@ class BinaryIO {
|
|||||||
GridStopWatch timer;
|
GridStopWatch timer;
|
||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
parallel_for(int x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
|
parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
|
||||||
|
|
||||||
vectorizeFromLexOrdArray(scalardata,Umu);
|
vectorizeFromLexOrdArray(scalardata,Umu);
|
||||||
grid->Barrier();
|
grid->Barrier();
|
||||||
@ -560,7 +574,7 @@ class BinaryIO {
|
|||||||
static inline void writeLatticeObject(Lattice<vobj> &Umu,
|
static inline void writeLatticeObject(Lattice<vobj> &Umu,
|
||||||
std::string file,
|
std::string file,
|
||||||
munger munge,
|
munger munge,
|
||||||
Integer offset,
|
uint64_t offset,
|
||||||
const std::string &format,
|
const std::string &format,
|
||||||
uint32_t &nersc_csum,
|
uint32_t &nersc_csum,
|
||||||
uint32_t &scidac_csuma,
|
uint32_t &scidac_csuma,
|
||||||
@ -569,7 +583,9 @@ class BinaryIO {
|
|||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||||
GridBase *grid = Umu._grid;
|
GridBase *grid = Umu._grid;
|
||||||
int lsites = grid->lSites();
|
uint64_t lsites = grid->lSites(), offsetCopy = offset;
|
||||||
|
int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry);
|
||||||
|
bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0);
|
||||||
|
|
||||||
std::vector<sobj> scalardata(lsites);
|
std::vector<sobj> scalardata(lsites);
|
||||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||||
@ -580,13 +596,39 @@ class BinaryIO {
|
|||||||
GridStopWatch timer; timer.Start();
|
GridStopWatch timer; timer.Start();
|
||||||
unvectorizeToLexOrdArray(scalardata,Umu);
|
unvectorizeToLexOrdArray(scalardata,Umu);
|
||||||
|
|
||||||
parallel_for(int x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
|
parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
|
||||||
|
|
||||||
grid->Barrier();
|
grid->Barrier();
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
|
while (attemptsLeft >= 0)
|
||||||
|
{
|
||||||
|
grid->Barrier();
|
||||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
|
if (checkWrite)
|
||||||
|
{
|
||||||
|
std::vector<fobj> ckiodata(lsites);
|
||||||
|
uint32_t cknersc_csum, ckscidac_csuma, ckscidac_csumb;
|
||||||
|
uint64_t ckoffset = offsetCopy;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "writeLatticeObject: read back object" << std::endl;
|
||||||
|
grid->Barrier();
|
||||||
|
IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
|
||||||
|
cknersc_csum,ckscidac_csuma,ckscidac_csumb);
|
||||||
|
if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb))
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl;
|
||||||
|
offset = offsetCopy;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << "writeLatticeObject: read test checksum correct" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
attemptsLeft--;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl;
|
std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl;
|
||||||
}
|
}
|
||||||
@ -597,7 +639,7 @@ class BinaryIO {
|
|||||||
static inline void readRNG(GridSerialRNG &serial,
|
static inline void readRNG(GridSerialRNG &serial,
|
||||||
GridParallelRNG ¶llel,
|
GridParallelRNG ¶llel,
|
||||||
std::string file,
|
std::string file,
|
||||||
Integer offset,
|
uint64_t offset,
|
||||||
uint32_t &nersc_csum,
|
uint32_t &nersc_csum,
|
||||||
uint32_t &scidac_csuma,
|
uint32_t &scidac_csuma,
|
||||||
uint32_t &scidac_csumb)
|
uint32_t &scidac_csumb)
|
||||||
@ -610,8 +652,8 @@ class BinaryIO {
|
|||||||
std::string format = "IEEE32BIG";
|
std::string format = "IEEE32BIG";
|
||||||
|
|
||||||
GridBase *grid = parallel._grid;
|
GridBase *grid = parallel._grid;
|
||||||
int gsites = grid->gSites();
|
uint64_t gsites = grid->gSites();
|
||||||
int lsites = grid->lSites();
|
uint64_t lsites = grid->lSites();
|
||||||
|
|
||||||
uint32_t nersc_csum_tmp = 0;
|
uint32_t nersc_csum_tmp = 0;
|
||||||
uint32_t scidac_csuma_tmp = 0;
|
uint32_t scidac_csuma_tmp = 0;
|
||||||
@ -626,7 +668,7 @@ class BinaryIO {
|
|||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
|
|
||||||
timer.Start();
|
timer.Start();
|
||||||
parallel_for(int lidx=0;lidx<lsites;lidx++){
|
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||||
std::vector<RngStateType> tmp(RngStateCount);
|
std::vector<RngStateType> tmp(RngStateCount);
|
||||||
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
|
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
|
||||||
parallel.SetState(tmp,lidx);
|
parallel.SetState(tmp,lidx);
|
||||||
@ -659,7 +701,7 @@ class BinaryIO {
|
|||||||
static inline void writeRNG(GridSerialRNG &serial,
|
static inline void writeRNG(GridSerialRNG &serial,
|
||||||
GridParallelRNG ¶llel,
|
GridParallelRNG ¶llel,
|
||||||
std::string file,
|
std::string file,
|
||||||
Integer offset,
|
uint64_t offset,
|
||||||
uint32_t &nersc_csum,
|
uint32_t &nersc_csum,
|
||||||
uint32_t &scidac_csuma,
|
uint32_t &scidac_csuma,
|
||||||
uint32_t &scidac_csumb)
|
uint32_t &scidac_csumb)
|
||||||
@ -670,8 +712,8 @@ class BinaryIO {
|
|||||||
typedef std::array<RngStateType,RngStateCount> RNGstate;
|
typedef std::array<RngStateType,RngStateCount> RNGstate;
|
||||||
|
|
||||||
GridBase *grid = parallel._grid;
|
GridBase *grid = parallel._grid;
|
||||||
int gsites = grid->gSites();
|
uint64_t gsites = grid->gSites();
|
||||||
int lsites = grid->lSites();
|
uint64_t lsites = grid->lSites();
|
||||||
|
|
||||||
uint32_t nersc_csum_tmp;
|
uint32_t nersc_csum_tmp;
|
||||||
uint32_t scidac_csuma_tmp;
|
uint32_t scidac_csuma_tmp;
|
||||||
@ -684,7 +726,7 @@ class BinaryIO {
|
|||||||
|
|
||||||
timer.Start();
|
timer.Start();
|
||||||
std::vector<RNGstate> iodata(lsites);
|
std::vector<RNGstate> iodata(lsites);
|
||||||
parallel_for(int lidx=0;lidx<lsites;lidx++){
|
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||||
std::vector<RngStateType> tmp(RngStateCount);
|
std::vector<RngStateType> tmp(RngStateCount);
|
||||||
parallel.GetState(tmp,lidx);
|
parallel.GetState(tmp,lidx);
|
||||||
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
|
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
|
||||||
@ -693,7 +735,6 @@ class BinaryIO {
|
|||||||
|
|
||||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
|
|
||||||
iodata.resize(1);
|
iodata.resize(1);
|
||||||
{
|
{
|
||||||
std::vector<RngStateType> tmp(RngStateCount);
|
std::vector<RngStateType> tmp(RngStateCount);
|
||||||
@ -713,5 +754,6 @@ class BinaryIO {
|
|||||||
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
|
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
@ -182,6 +182,11 @@ class GridLimeReader : public BinaryIO {
|
|||||||
{
|
{
|
||||||
filename= _filename;
|
filename= _filename;
|
||||||
File = fopen(filename.c_str(), "r");
|
File = fopen(filename.c_str(), "r");
|
||||||
|
if (File == nullptr)
|
||||||
|
{
|
||||||
|
std::cerr << "cannot open file '" << filename << "'" << std::endl;
|
||||||
|
abort();
|
||||||
|
}
|
||||||
LimeR = limeCreateReader(File);
|
LimeR = limeCreateReader(File);
|
||||||
}
|
}
|
||||||
/////////////////////////////////////////////
|
/////////////////////////////////////////////
|
||||||
@ -228,7 +233,8 @@ class GridLimeReader : public BinaryIO {
|
|||||||
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
|
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
|
||||||
BinarySimpleMunger<sobj,sobj> munge;
|
BinarySimpleMunger<sobj,sobj> munge;
|
||||||
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
|
std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
|
||||||
|
std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
|
||||||
/////////////////////////////////////////////
|
/////////////////////////////////////////////
|
||||||
// Insist checksum is next record
|
// Insist checksum is next record
|
||||||
/////////////////////////////////////////////
|
/////////////////////////////////////////////
|
||||||
@ -245,10 +251,8 @@ class GridLimeReader : public BinaryIO {
|
|||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Read a generic serialisable object
|
// Read a generic serialisable object
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
template<class serialisable_object>
|
void readLimeObject(std::string &xmlstring,std::string record_name)
|
||||||
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
|
|
||||||
{
|
{
|
||||||
std::string xmlstring;
|
|
||||||
// should this be a do while; can we miss a first record??
|
// should this be a do while; can we miss a first record??
|
||||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||||
|
|
||||||
@ -262,18 +266,29 @@ class GridLimeReader : public BinaryIO {
|
|||||||
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
|
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
|
||||||
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
|
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
|
||||||
|
|
||||||
XmlReader RD(&xmlc[0],"");
|
xmlstring = std::string(&xmlc[0]);
|
||||||
read(RD,object_name,object);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class serialisable_object>
|
||||||
|
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
|
||||||
|
{
|
||||||
|
std::string xmlstring;
|
||||||
|
|
||||||
|
readLimeObject(xmlstring, record_name);
|
||||||
|
XmlReader RD(xmlstring, true, "");
|
||||||
|
read(RD,object_name,object);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class GridLimeWriter : public BinaryIO {
|
class GridLimeWriter : public BinaryIO
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
// FIXME: format for RNG? Now just binary out instead
|
// FIXME: format for RNG? Now just binary out instead
|
||||||
// FIXME: collective calls or not ?
|
// FIXME: collective calls or not ?
|
||||||
@ -282,17 +297,24 @@ class GridLimeWriter : public BinaryIO {
|
|||||||
FILE *File;
|
FILE *File;
|
||||||
LimeWriter *LimeW;
|
LimeWriter *LimeW;
|
||||||
std::string filename;
|
std::string filename;
|
||||||
|
bool boss_node;
|
||||||
|
GridLimeWriter( bool isboss = true) {
|
||||||
|
boss_node = isboss;
|
||||||
|
}
|
||||||
void open(const std::string &_filename) {
|
void open(const std::string &_filename) {
|
||||||
filename= _filename;
|
filename= _filename;
|
||||||
|
if ( boss_node ) {
|
||||||
File = fopen(filename.c_str(), "w");
|
File = fopen(filename.c_str(), "w");
|
||||||
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
|
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
|
||||||
}
|
}
|
||||||
|
}
|
||||||
/////////////////////////////////////////////
|
/////////////////////////////////////////////
|
||||||
// Close the file
|
// Close the file
|
||||||
/////////////////////////////////////////////
|
/////////////////////////////////////////////
|
||||||
void close(void) {
|
void close(void) {
|
||||||
|
if ( boss_node ) {
|
||||||
fclose(File);
|
fclose(File);
|
||||||
|
}
|
||||||
// limeDestroyWriter(LimeW);
|
// limeDestroyWriter(LimeW);
|
||||||
}
|
}
|
||||||
///////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////
|
||||||
@ -300,24 +322,22 @@ class GridLimeWriter : public BinaryIO {
|
|||||||
///////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////
|
||||||
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
|
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
|
||||||
{
|
{
|
||||||
|
if ( boss_node ) {
|
||||||
LimeRecordHeader *h;
|
LimeRecordHeader *h;
|
||||||
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
|
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
|
||||||
assert(limeWriteRecordHeader(h, LimeW) >= 0);
|
assert(limeWriteRecordHeader(h, LimeW) >= 0);
|
||||||
limeDestroyHeader(h);
|
limeDestroyHeader(h);
|
||||||
|
}
|
||||||
return LIME_SUCCESS;
|
return LIME_SUCCESS;
|
||||||
}
|
}
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Write a generic serialisable object
|
// Write a generic serialisable object
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
template<class serialisable_object>
|
void writeLimeObject(int MB,int ME,XmlWriter &writer,std::string object_name,std::string record_name)
|
||||||
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name)
|
|
||||||
{
|
{
|
||||||
std::string xmlstring;
|
if ( boss_node ) {
|
||||||
{
|
std::string xmlstring = writer.docString();
|
||||||
XmlWriter WR("","");
|
|
||||||
write(WR,object_name,object);
|
|
||||||
xmlstring = WR.XmlString();
|
|
||||||
}
|
|
||||||
// std::cout << "WriteLimeObject" << record_name <<std::endl;
|
// std::cout << "WriteLimeObject" << record_name <<std::endl;
|
||||||
uint64_t nbytes = xmlstring.size();
|
uint64_t nbytes = xmlstring.size();
|
||||||
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
|
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
|
||||||
@ -329,48 +349,95 @@ class GridLimeWriter : public BinaryIO {
|
|||||||
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
|
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
|
||||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||||
limeDestroyHeader(h);
|
limeDestroyHeader(h);
|
||||||
// std::cout << " File offset is now"<<ftello(File) << std::endl;
|
|
||||||
}
|
}
|
||||||
////////////////////////////////////////////
|
}
|
||||||
|
|
||||||
|
template<class serialisable_object>
|
||||||
|
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, const unsigned int scientificPrec = 0)
|
||||||
|
{
|
||||||
|
XmlWriter WR("","");
|
||||||
|
|
||||||
|
if (scientificPrec)
|
||||||
|
{
|
||||||
|
WR.scientificFormat(true);
|
||||||
|
WR.setPrecision(scientificPrec);
|
||||||
|
}
|
||||||
|
write(WR,object_name,object);
|
||||||
|
writeLimeObject(MB, ME, WR, object_name, record_name);
|
||||||
|
}
|
||||||
|
////////////////////////////////////////////////////
|
||||||
// Write a generic lattice field and csum
|
// Write a generic lattice field and csum
|
||||||
////////////////////////////////////////////
|
// This routine is Collectively called by all nodes
|
||||||
|
// in communicator used by the field._grid
|
||||||
|
////////////////////////////////////////////////////
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
|
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
|
||||||
{
|
{
|
||||||
////////////////////////////////////////////
|
|
||||||
// Create record header
|
|
||||||
////////////////////////////////////////////
|
|
||||||
typedef typename vobj::scalar_object sobj;
|
|
||||||
int err;
|
|
||||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
|
||||||
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
|
|
||||||
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
|
|
||||||
|
|
||||||
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
|
|
||||||
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
|
|
||||||
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// NB: FILE and iostream are jointly writing disjoint sequences in the
|
// NB: FILE and iostream are jointly writing disjoint sequences in the
|
||||||
// the same file through different file handles (integer units).
|
// the same file through different file handles (integer units).
|
||||||
//
|
//
|
||||||
// These are both buffered, so why I think this code is right is as follows.
|
// These are both buffered, so why I think this code is right is as follows.
|
||||||
//
|
//
|
||||||
// i) write record header to FILE *File, telegraphing the size.
|
// i) write record header to FILE *File, telegraphing the size; flush
|
||||||
// ii) ftello reads the offset from FILE *File .
|
// ii) ftello reads the offset from FILE *File .
|
||||||
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
|
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
|
||||||
// Closes iostream and flushes.
|
// Closes iostream and flushes.
|
||||||
// iv) fseek on FILE * to end of this disjoint section.
|
// iv) fseek on FILE * to end of this disjoint section.
|
||||||
// v) Continue writing scidac record.
|
// v) Continue writing scidac record.
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
uint64_t offset = ftello(File);
|
|
||||||
// std::cout << " Writing to offset "<<offset << std::endl;
|
GridBase *grid = field._grid;
|
||||||
|
assert(boss_node == field._grid->IsBoss() );
|
||||||
|
|
||||||
|
////////////////////////////////////////////
|
||||||
|
// Create record header
|
||||||
|
////////////////////////////////////////////
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
int err;
|
||||||
|
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||||
|
uint64_t PayloadSize = sizeof(sobj) * grid->_gsites;
|
||||||
|
if ( boss_node ) {
|
||||||
|
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
|
||||||
|
fflush(File);
|
||||||
|
}
|
||||||
|
|
||||||
|
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
|
||||||
|
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
|
||||||
|
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// Check all nodes agree on file position
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
uint64_t offset1;
|
||||||
|
if ( boss_node ) {
|
||||||
|
offset1 = ftello(File);
|
||||||
|
}
|
||||||
|
grid->Broadcast(0,(void *)&offset1,sizeof(offset1));
|
||||||
|
|
||||||
|
///////////////////////////////////////////
|
||||||
|
// The above is collective. Write by other means into the binary record
|
||||||
|
///////////////////////////////////////////
|
||||||
std::string format = getFormatString<vobj>();
|
std::string format = getFormatString<vobj>();
|
||||||
BinarySimpleMunger<sobj,sobj> munge;
|
BinarySimpleMunger<sobj,sobj> munge;
|
||||||
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
// fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<<offset << std::endl;
|
|
||||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////
|
||||||
|
// Wind forward and close the record
|
||||||
|
///////////////////////////////////////////
|
||||||
|
if ( boss_node ) {
|
||||||
|
fseek(File,0,SEEK_END);
|
||||||
|
uint64_t offset2 = ftello(File); // std::cout << " now at offset "<<offset2 << std::endl;
|
||||||
|
assert( (offset2-offset1) == PayloadSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Check MPI-2 I/O did what we expect to file
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
if ( boss_node ) {
|
||||||
|
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||||
|
}
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
// Write checksum element, propagaing forward from the BinaryIO
|
// Write checksum element, propagaing forward from the BinaryIO
|
||||||
// Always pair a checksum with a binary object, and close message
|
// Always pair a checksum with a binary object, and close message
|
||||||
@ -380,26 +447,32 @@ class GridLimeWriter : public BinaryIO {
|
|||||||
std::stringstream streamb; streamb << std::hex << scidac_csumb;
|
std::stringstream streamb; streamb << std::hex << scidac_csumb;
|
||||||
checksum.suma= streama.str();
|
checksum.suma= streama.str();
|
||||||
checksum.sumb= streamb.str();
|
checksum.sumb= streamb.str();
|
||||||
// std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
|
if ( boss_node ) {
|
||||||
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
|
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class ScidacWriter : public GridLimeWriter {
|
class ScidacWriter : public GridLimeWriter {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
ScidacWriter(bool isboss =true ) : GridLimeWriter(isboss) { };
|
||||||
|
|
||||||
template<class SerialisableUserFile>
|
template<class SerialisableUserFile>
|
||||||
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
|
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
|
||||||
{
|
{
|
||||||
scidacFile _scidacFile(grid);
|
scidacFile _scidacFile(grid);
|
||||||
|
if ( this->boss_node ) {
|
||||||
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
||||||
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
// Write generic lattice field in scidac format
|
// Write generic lattice field in scidac format
|
||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
template <class vobj, class userRecord>
|
template <class vobj, class userRecord>
|
||||||
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)
|
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
|
||||||
|
const unsigned int recordScientificPrec = 0)
|
||||||
{
|
{
|
||||||
GridBase * grid = field._grid;
|
GridBase * grid = field._grid;
|
||||||
|
|
||||||
@ -415,9 +488,12 @@ class ScidacWriter : public GridLimeWriter {
|
|||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
// Fill the Lime file record by record
|
// Fill the Lime file record by record
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
|
if ( this->boss_node ) {
|
||||||
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
||||||
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
|
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML), recordScientificPrec);
|
||||||
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||||
|
}
|
||||||
|
// Collective call
|
||||||
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -485,6 +561,8 @@ class ScidacReader : public GridLimeReader {
|
|||||||
class IldgWriter : public ScidacWriter {
|
class IldgWriter : public ScidacWriter {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
IldgWriter(bool isboss) : ScidacWriter(isboss) {};
|
||||||
|
|
||||||
///////////////////////////////////
|
///////////////////////////////////
|
||||||
// A little helper
|
// A little helper
|
||||||
///////////////////////////////////
|
///////////////////////////////////
|
||||||
@ -568,7 +646,6 @@ class IldgWriter : public ScidacWriter {
|
|||||||
writeLimeIldgLFN(header.ildg_lfn); // rec
|
writeLimeIldgLFN(header.ildg_lfn); // rec
|
||||||
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||||
// limeDestroyWriter(LimeW);
|
// limeDestroyWriter(LimeW);
|
||||||
fclose(File);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -644,9 +721,11 @@ class IldgReader : public GridLimeReader {
|
|||||||
|
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
// ILDG format record
|
// ILDG format record
|
||||||
|
|
||||||
|
std::string xmlstring(&xmlc[0]);
|
||||||
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
|
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||||
|
|
||||||
XmlReader RD(&xmlc[0],"");
|
XmlReader RD(xmlstring, true, "");
|
||||||
read(RD,"ildgFormat",ildgFormat_);
|
read(RD,"ildgFormat",ildgFormat_);
|
||||||
|
|
||||||
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
|
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
|
||||||
@ -661,13 +740,13 @@ class IldgReader : public GridLimeReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
|
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
|
||||||
FieldMetaData_.ildg_lfn = std::string(&xmlc[0]);
|
FieldMetaData_.ildg_lfn = xmlstring;
|
||||||
found_ildgLFN = 1;
|
found_ildgLFN = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
|
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||||
|
|
||||||
XmlReader RD(&xmlc[0],"");
|
XmlReader RD(xmlstring, true, "");
|
||||||
read(RD,"FieldMetaData",FieldMetaData_);
|
read(RD,"FieldMetaData",FieldMetaData_);
|
||||||
|
|
||||||
format = FieldMetaData_.floating_point;
|
format = FieldMetaData_.floating_point;
|
||||||
@ -681,18 +760,17 @@ class IldgReader : public GridLimeReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
|
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
|
||||||
std::string xmls(&xmlc[0]);
|
|
||||||
// is it a USQCD info field
|
// is it a USQCD info field
|
||||||
if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {
|
if ( xmlstring.find(std::string("usqcdInfo")) != std::string::npos ) {
|
||||||
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
|
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
|
||||||
XmlReader RD(&xmlc[0],"");
|
XmlReader RD(xmlstring, true, "");
|
||||||
read(RD,"usqcdInfo",usqcdInfo_);
|
read(RD,"usqcdInfo",usqcdInfo_);
|
||||||
found_usqcdInfo = 1;
|
found_usqcdInfo = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
|
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
|
||||||
XmlReader RD(&xmlc[0],"");
|
XmlReader RD(xmlstring, true, "");
|
||||||
read(RD,"scidacChecksum",scidacChecksum_);
|
read(RD,"scidacChecksum",scidacChecksum_);
|
||||||
found_scidacChecksum = 1;
|
found_scidacChecksum = 1;
|
||||||
}
|
}
|
@ -136,8 +136,9 @@ struct scidacRecord : Serializable {
|
|||||||
int, typesize,
|
int, typesize,
|
||||||
int, datacount);
|
int, datacount);
|
||||||
|
|
||||||
scidacRecord() { version =1.0; }
|
scidacRecord()
|
||||||
|
: version(1.0), recordtype(0), colors(0), spins(0), typesize(0), datacount(0)
|
||||||
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////
|
////////////////////////
|
@ -81,18 +81,16 @@ namespace Grid {
|
|||||||
std::string, creation_date,
|
std::string, creation_date,
|
||||||
std::string, archive_date,
|
std::string, archive_date,
|
||||||
std::string, floating_point);
|
std::string, floating_point);
|
||||||
FieldMetaData(void) {
|
// WARNING: non-initialised values might lead to twisted parallel IO
|
||||||
nd=4;
|
// issues, std::string are fine because they initliase to size 0
|
||||||
dimension.resize(4);
|
// as per C++ standard.
|
||||||
boundary.resize(4);
|
FieldMetaData(void)
|
||||||
scidac_checksuma=0;
|
: nd(4), dimension(4,0), boundary(4, ""), data_start(0),
|
||||||
scidac_checksumb=0;
|
link_trace(0.), plaquette(0.), checksum(0),
|
||||||
checksum=0;
|
scidac_checksuma(0), scidac_checksumb(0), sequence_number(0)
|
||||||
}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
@ -57,7 +57,7 @@ namespace Grid {
|
|||||||
// for the header-reader
|
// for the header-reader
|
||||||
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
|
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
|
||||||
{
|
{
|
||||||
int offset=0;
|
uint64_t offset=0;
|
||||||
std::map<std::string,std::string> header;
|
std::map<std::string,std::string> header;
|
||||||
std::string line;
|
std::string line;
|
||||||
|
|
||||||
@ -139,7 +139,7 @@ namespace Grid {
|
|||||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||||
|
|
||||||
GridBase *grid = Umu._grid;
|
GridBase *grid = Umu._grid;
|
||||||
int offset = readHeader(file,Umu._grid,header);
|
uint64_t offset = readHeader(file,Umu._grid,header);
|
||||||
|
|
||||||
FieldMetaData clone(header);
|
FieldMetaData clone(header);
|
||||||
|
|
||||||
@ -236,21 +236,25 @@ namespace Grid {
|
|||||||
GaugeStatistics(Umu,header);
|
GaugeStatistics(Umu,header);
|
||||||
MachineCharacteristics(header);
|
MachineCharacteristics(header);
|
||||||
|
|
||||||
int offset;
|
uint64_t offset;
|
||||||
|
|
||||||
truncate(file);
|
|
||||||
|
|
||||||
// Sod it -- always write 3x3 double
|
// Sod it -- always write 3x3 double
|
||||||
header.floating_point = std::string("IEEE64BIG");
|
header.floating_point = std::string("IEEE64BIG");
|
||||||
header.data_type = std::string("4D_SU3_GAUGE_3x3");
|
header.data_type = std::string("4D_SU3_GAUGE_3x3");
|
||||||
GaugeSimpleUnmunger<fobj3D,sobj> munge;
|
GaugeSimpleUnmunger<fobj3D,sobj> munge;
|
||||||
|
if ( grid->IsBoss() ) {
|
||||||
|
truncate(file);
|
||||||
offset = writeHeader(header,file);
|
offset = writeHeader(header,file);
|
||||||
|
}
|
||||||
|
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||||
|
|
||||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||||
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
|
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
|
||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
header.checksum = nersc_csum;
|
header.checksum = nersc_csum;
|
||||||
|
if ( grid->IsBoss() ) {
|
||||||
writeHeader(header,file);
|
writeHeader(header,file);
|
||||||
|
}
|
||||||
|
|
||||||
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
|
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
|
||||||
<<std::hex<<header.checksum
|
<<std::hex<<header.checksum
|
||||||
@ -278,7 +282,7 @@ namespace Grid {
|
|||||||
header.plaquette=0.0;
|
header.plaquette=0.0;
|
||||||
MachineCharacteristics(header);
|
MachineCharacteristics(header);
|
||||||
|
|
||||||
int offset;
|
uint64_t offset;
|
||||||
|
|
||||||
#ifdef RNG_RANLUX
|
#ifdef RNG_RANLUX
|
||||||
header.floating_point = std::string("UINT64");
|
header.floating_point = std::string("UINT64");
|
||||||
@ -293,12 +297,18 @@ namespace Grid {
|
|||||||
header.data_type = std::string("SITMO");
|
header.data_type = std::string("SITMO");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if ( grid->IsBoss() ) {
|
||||||
truncate(file);
|
truncate(file);
|
||||||
offset = writeHeader(header,file);
|
offset = writeHeader(header,file);
|
||||||
|
}
|
||||||
|
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||||
|
|
||||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||||
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
header.checksum = nersc_csum;
|
header.checksum = nersc_csum;
|
||||||
|
if ( grid->IsBoss() ) {
|
||||||
offset = writeHeader(header,file);
|
offset = writeHeader(header,file);
|
||||||
|
}
|
||||||
|
|
||||||
std::cout<<GridLogMessage
|
std::cout<<GridLogMessage
|
||||||
<<"Written NERSC RNG STATE "<<file<< " checksum "
|
<<"Written NERSC RNG STATE "<<file<< " checksum "
|
||||||
@ -313,7 +323,7 @@ namespace Grid {
|
|||||||
|
|
||||||
GridBase *grid = parallel._grid;
|
GridBase *grid = parallel._grid;
|
||||||
|
|
||||||
int offset = readHeader(file,grid,header);
|
uint64_t offset = readHeader(file,grid,header);
|
||||||
|
|
||||||
FieldMetaData clone(header);
|
FieldMetaData clone(header);
|
||||||
|
|
@ -49,14 +49,38 @@ inline double usecond(void) {
|
|||||||
|
|
||||||
typedef std::chrono::system_clock GridClock;
|
typedef std::chrono::system_clock GridClock;
|
||||||
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
||||||
typedef std::chrono::milliseconds GridTime;
|
|
||||||
typedef std::chrono::microseconds GridUsecs;
|
|
||||||
|
|
||||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
|
typedef std::chrono::seconds GridSecs;
|
||||||
|
typedef std::chrono::milliseconds GridMillisecs;
|
||||||
|
typedef std::chrono::microseconds GridUsecs;
|
||||||
|
typedef std::chrono::microseconds GridTime;
|
||||||
|
|
||||||
|
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
|
||||||
{
|
{
|
||||||
stream << time.count()<<" ms";
|
stream << time.count()<<" s";
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
inline std::ostream& operator<< (std::ostream & stream, const GridMillisecs & now)
|
||||||
|
{
|
||||||
|
GridSecs second(1);
|
||||||
|
auto secs = now/second ;
|
||||||
|
auto subseconds = now%second ;
|
||||||
|
auto fill = stream.fill();
|
||||||
|
stream << secs<<"."<<std::setw(3)<<std::setfill('0')<<subseconds.count()<<" s";
|
||||||
|
stream.fill(fill);
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
inline std::ostream& operator<< (std::ostream & stream, const GridUsecs & now)
|
||||||
|
{
|
||||||
|
GridSecs second(1);
|
||||||
|
auto seconds = now/second ;
|
||||||
|
auto subseconds = now%second ;
|
||||||
|
auto fill = stream.fill();
|
||||||
|
stream << seconds<<"."<<std::setw(6)<<std::setfill('0')<<subseconds.count()<<" s";
|
||||||
|
stream.fill(fill);
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class GridStopWatch {
|
class GridStopWatch {
|
||||||
private:
|
private:
|
||||||
@ -96,6 +120,9 @@ public:
|
|||||||
assert(running == false);
|
assert(running == false);
|
||||||
return (uint64_t) accumulator.count();
|
return (uint64_t) accumulator.count();
|
||||||
}
|
}
|
||||||
|
bool isRunning(void){
|
||||||
|
return running;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
@ -1,7 +1,7 @@
|
|||||||
/**
|
/**
|
||||||
* pugixml parser - version 1.6
|
* pugixml parser - version 1.9
|
||||||
* --------------------------------------------------------
|
* --------------------------------------------------------
|
||||||
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||||
* Report bugs and download new versions at http://pugixml.org/
|
* Report bugs and download new versions at http://pugixml.org/
|
||||||
*
|
*
|
||||||
* This library is distributed under the MIT License. See notice at the end
|
* This library is distributed under the MIT License. See notice at the end
|
||||||
@ -17,6 +17,9 @@
|
|||||||
// Uncomment this to enable wchar_t mode
|
// Uncomment this to enable wchar_t mode
|
||||||
// #define PUGIXML_WCHAR_MODE
|
// #define PUGIXML_WCHAR_MODE
|
||||||
|
|
||||||
|
// Uncomment this to enable compact mode
|
||||||
|
// #define PUGIXML_COMPACT
|
||||||
|
|
||||||
// Uncomment this to disable XPath
|
// Uncomment this to disable XPath
|
||||||
// #define PUGIXML_NO_XPATH
|
// #define PUGIXML_NO_XPATH
|
||||||
|
|
||||||
@ -46,7 +49,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copyright (c) 2006-2015 Arseny Kapoulkine
|
* Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* obtaining a copy of this software and associated documentation
|
* obtaining a copy of this software and associated documentation
|
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
|||||||
/**
|
/**
|
||||||
* pugixml parser - version 1.6
|
* pugixml parser - version 1.9
|
||||||
* --------------------------------------------------------
|
* --------------------------------------------------------
|
||||||
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||||
* Report bugs and download new versions at http://pugixml.org/
|
* Report bugs and download new versions at http://pugixml.org/
|
||||||
*
|
*
|
||||||
* This library is distributed under the MIT License. See notice at the end
|
* This library is distributed under the MIT License. See notice at the end
|
||||||
@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
#ifndef PUGIXML_VERSION
|
#ifndef PUGIXML_VERSION
|
||||||
// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
|
// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
|
||||||
# define PUGIXML_VERSION 160
|
# define PUGIXML_VERSION 190
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Include user configuration file (this can define various configuration macros)
|
// Include user configuration file (this can define various configuration macros)
|
||||||
@ -72,6 +72,44 @@
|
|||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// If the platform is known to have move semantics support, compile move ctor/operator implementation
|
||||||
|
#ifndef PUGIXML_HAS_MOVE
|
||||||
|
# if __cplusplus >= 201103
|
||||||
|
# define PUGIXML_HAS_MOVE
|
||||||
|
# elif defined(_MSC_VER) && _MSC_VER >= 1600
|
||||||
|
# define PUGIXML_HAS_MOVE
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// If C++ is 2011 or higher, add 'noexcept' specifiers
|
||||||
|
#ifndef PUGIXML_NOEXCEPT
|
||||||
|
# if __cplusplus >= 201103
|
||||||
|
# define PUGIXML_NOEXCEPT noexcept
|
||||||
|
# elif defined(_MSC_VER) && _MSC_VER >= 1900
|
||||||
|
# define PUGIXML_NOEXCEPT noexcept
|
||||||
|
# else
|
||||||
|
# define PUGIXML_NOEXCEPT
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Some functions can not be noexcept in compact mode
|
||||||
|
#ifdef PUGIXML_COMPACT
|
||||||
|
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT
|
||||||
|
#else
|
||||||
|
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT PUGIXML_NOEXCEPT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// If C++ is 2011 or higher, add 'override' qualifiers
|
||||||
|
#ifndef PUGIXML_OVERRIDE
|
||||||
|
# if __cplusplus >= 201103
|
||||||
|
# define PUGIXML_OVERRIDE override
|
||||||
|
# elif defined(_MSC_VER) && _MSC_VER >= 1700
|
||||||
|
# define PUGIXML_OVERRIDE override
|
||||||
|
# else
|
||||||
|
# define PUGIXML_OVERRIDE
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// Character interface macros
|
// Character interface macros
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
# define PUGIXML_TEXT(t) L ## t
|
# define PUGIXML_TEXT(t) L ## t
|
||||||
@ -158,6 +196,11 @@ namespace pugi
|
|||||||
// is a valid document. This flag is off by default.
|
// is a valid document. This flag is off by default.
|
||||||
const unsigned int parse_fragment = 0x1000;
|
const unsigned int parse_fragment = 0x1000;
|
||||||
|
|
||||||
|
// This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of
|
||||||
|
// the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments.
|
||||||
|
// This flag is off by default.
|
||||||
|
const unsigned int parse_embed_pcdata = 0x2000;
|
||||||
|
|
||||||
// The default parsing mode.
|
// The default parsing mode.
|
||||||
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
|
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
|
||||||
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
|
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
|
||||||
@ -206,6 +249,9 @@ namespace pugi
|
|||||||
// Write every attribute on a new line with appropriate indentation. This flag is off by default.
|
// Write every attribute on a new line with appropriate indentation. This flag is off by default.
|
||||||
const unsigned int format_indent_attributes = 0x40;
|
const unsigned int format_indent_attributes = 0x40;
|
||||||
|
|
||||||
|
// Don't output empty element tags, instead writing an explicit start and end tag even if there are no children. This flag is off by default.
|
||||||
|
const unsigned int format_no_empty_element_tags = 0x80;
|
||||||
|
|
||||||
// The default set of formatting flags.
|
// The default set of formatting flags.
|
||||||
// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
|
// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
|
||||||
const unsigned int format_default = format_indent;
|
const unsigned int format_default = format_indent;
|
||||||
@ -268,7 +314,7 @@ namespace pugi
|
|||||||
// Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
|
// Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
|
||||||
xml_writer_file(void* file);
|
xml_writer_file(void* file);
|
||||||
|
|
||||||
virtual void write(const void* data, size_t size);
|
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void* file;
|
void* file;
|
||||||
@ -283,7 +329,7 @@ namespace pugi
|
|||||||
xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
|
xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
|
||||||
xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
|
xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
|
||||||
|
|
||||||
virtual void write(const void* data, size_t size);
|
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
|
std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
|
||||||
@ -354,6 +400,8 @@ namespace pugi
|
|||||||
// Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
|
// Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
|
||||||
bool set_value(int rhs);
|
bool set_value(int rhs);
|
||||||
bool set_value(unsigned int rhs);
|
bool set_value(unsigned int rhs);
|
||||||
|
bool set_value(long rhs);
|
||||||
|
bool set_value(unsigned long rhs);
|
||||||
bool set_value(double rhs);
|
bool set_value(double rhs);
|
||||||
bool set_value(float rhs);
|
bool set_value(float rhs);
|
||||||
bool set_value(bool rhs);
|
bool set_value(bool rhs);
|
||||||
@ -367,6 +415,8 @@ namespace pugi
|
|||||||
xml_attribute& operator=(const char_t* rhs);
|
xml_attribute& operator=(const char_t* rhs);
|
||||||
xml_attribute& operator=(int rhs);
|
xml_attribute& operator=(int rhs);
|
||||||
xml_attribute& operator=(unsigned int rhs);
|
xml_attribute& operator=(unsigned int rhs);
|
||||||
|
xml_attribute& operator=(long rhs);
|
||||||
|
xml_attribute& operator=(unsigned long rhs);
|
||||||
xml_attribute& operator=(double rhs);
|
xml_attribute& operator=(double rhs);
|
||||||
xml_attribute& operator=(float rhs);
|
xml_attribute& operator=(float rhs);
|
||||||
xml_attribute& operator=(bool rhs);
|
xml_attribute& operator=(bool rhs);
|
||||||
@ -601,8 +651,8 @@ namespace pugi
|
|||||||
xpath_node_set select_nodes(const xpath_query& query) const;
|
xpath_node_set select_nodes(const xpath_query& query) const;
|
||||||
|
|
||||||
// (deprecated: use select_node instead) Select single node by evaluating XPath query.
|
// (deprecated: use select_node instead) Select single node by evaluating XPath query.
|
||||||
xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
PUGIXML_DEPRECATED xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
||||||
xpath_node select_single_node(const xpath_query& query) const;
|
PUGIXML_DEPRECATED xpath_node select_single_node(const xpath_query& query) const;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -701,6 +751,8 @@ namespace pugi
|
|||||||
// Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
|
// Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
|
||||||
bool set(int rhs);
|
bool set(int rhs);
|
||||||
bool set(unsigned int rhs);
|
bool set(unsigned int rhs);
|
||||||
|
bool set(long rhs);
|
||||||
|
bool set(unsigned long rhs);
|
||||||
bool set(double rhs);
|
bool set(double rhs);
|
||||||
bool set(float rhs);
|
bool set(float rhs);
|
||||||
bool set(bool rhs);
|
bool set(bool rhs);
|
||||||
@ -714,6 +766,8 @@ namespace pugi
|
|||||||
xml_text& operator=(const char_t* rhs);
|
xml_text& operator=(const char_t* rhs);
|
||||||
xml_text& operator=(int rhs);
|
xml_text& operator=(int rhs);
|
||||||
xml_text& operator=(unsigned int rhs);
|
xml_text& operator=(unsigned int rhs);
|
||||||
|
xml_text& operator=(long rhs);
|
||||||
|
xml_text& operator=(unsigned long rhs);
|
||||||
xml_text& operator=(double rhs);
|
xml_text& operator=(double rhs);
|
||||||
xml_text& operator=(float rhs);
|
xml_text& operator=(float rhs);
|
||||||
xml_text& operator=(bool rhs);
|
xml_text& operator=(bool rhs);
|
||||||
@ -945,10 +999,11 @@ namespace pugi
|
|||||||
|
|
||||||
// Non-copyable semantics
|
// Non-copyable semantics
|
||||||
xml_document(const xml_document&);
|
xml_document(const xml_document&);
|
||||||
const xml_document& operator=(const xml_document&);
|
xml_document& operator=(const xml_document&);
|
||||||
|
|
||||||
void create();
|
void _create();
|
||||||
void destroy();
|
void _destroy();
|
||||||
|
void _move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Default constructor, makes empty document
|
// Default constructor, makes empty document
|
||||||
@ -957,6 +1012,12 @@ namespace pugi
|
|||||||
// Destructor, invalidates all node/attribute handles to this document
|
// Destructor, invalidates all node/attribute handles to this document
|
||||||
~xml_document();
|
~xml_document();
|
||||||
|
|
||||||
|
#ifdef PUGIXML_HAS_MOVE
|
||||||
|
// Move semantics support
|
||||||
|
xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||||
|
xml_document& operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||||
|
#endif
|
||||||
|
|
||||||
// Removes all nodes, leaving the empty document
|
// Removes all nodes, leaving the empty document
|
||||||
void reset();
|
void reset();
|
||||||
|
|
||||||
@ -970,7 +1031,7 @@ namespace pugi
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
|
// (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
|
||||||
xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
|
PUGIXML_DEPRECATED xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
|
||||||
|
|
||||||
// Load document from zero-terminated string. No encoding conversions are applied.
|
// Load document from zero-terminated string. No encoding conversions are applied.
|
||||||
xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
|
xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
|
||||||
@ -1095,10 +1156,10 @@ namespace pugi
|
|||||||
xpath_variable_set(const xpath_variable_set& rhs);
|
xpath_variable_set(const xpath_variable_set& rhs);
|
||||||
xpath_variable_set& operator=(const xpath_variable_set& rhs);
|
xpath_variable_set& operator=(const xpath_variable_set& rhs);
|
||||||
|
|
||||||
#if __cplusplus >= 201103
|
#ifdef PUGIXML_HAS_MOVE
|
||||||
// Move semantics support
|
// Move semantics support
|
||||||
xpath_variable_set(xpath_variable_set&& rhs);
|
xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
|
||||||
xpath_variable_set& operator=(xpath_variable_set&& rhs);
|
xpath_variable_set& operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Add a new variable or get the existing one, if the types match
|
// Add a new variable or get the existing one, if the types match
|
||||||
@ -1139,10 +1200,10 @@ namespace pugi
|
|||||||
// Destructor
|
// Destructor
|
||||||
~xpath_query();
|
~xpath_query();
|
||||||
|
|
||||||
#if __cplusplus >= 201103
|
#ifdef PUGIXML_HAS_MOVE
|
||||||
// Move semantics support
|
// Move semantics support
|
||||||
xpath_query(xpath_query&& rhs);
|
xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT;
|
||||||
xpath_query& operator=(xpath_query&& rhs);
|
xpath_query& operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Get query expression return type
|
// Get query expression return type
|
||||||
@ -1201,7 +1262,7 @@ namespace pugi
|
|||||||
explicit xpath_exception(const xpath_parse_result& result);
|
explicit xpath_exception(const xpath_parse_result& result);
|
||||||
|
|
||||||
// Get error message
|
// Get error message
|
||||||
virtual const char* what() const throw();
|
virtual const char* what() const throw() PUGIXML_OVERRIDE;
|
||||||
|
|
||||||
// Get parse result
|
// Get parse result
|
||||||
const xpath_parse_result& result() const;
|
const xpath_parse_result& result() const;
|
||||||
@ -1280,10 +1341,10 @@ namespace pugi
|
|||||||
xpath_node_set(const xpath_node_set& ns);
|
xpath_node_set(const xpath_node_set& ns);
|
||||||
xpath_node_set& operator=(const xpath_node_set& ns);
|
xpath_node_set& operator=(const xpath_node_set& ns);
|
||||||
|
|
||||||
#if __cplusplus >= 201103
|
#ifdef PUGIXML_HAS_MOVE
|
||||||
// Move semantics support
|
// Move semantics support
|
||||||
xpath_node_set(xpath_node_set&& rhs);
|
xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
|
||||||
xpath_node_set& operator=(xpath_node_set&& rhs);
|
xpath_node_set& operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Get collection type
|
// Get collection type
|
||||||
@ -1317,7 +1378,7 @@ namespace pugi
|
|||||||
xpath_node* _end;
|
xpath_node* _end;
|
||||||
|
|
||||||
void _assign(const_iterator begin, const_iterator end, type_t type);
|
void _assign(const_iterator begin, const_iterator end, type_t type);
|
||||||
void _move(xpath_node_set& rhs);
|
void _move(xpath_node_set& rhs) PUGIXML_NOEXCEPT;
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1375,7 +1436,7 @@ namespace std
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copyright (c) 2006-2015 Arseny Kapoulkine
|
* Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* obtaining a copy of this software and associated documentation
|
* obtaining a copy of this software and associated documentation
|
@ -1,6 +1,6 @@
|
|||||||
pugixml 1.6 - an XML processing library
|
pugixml 1.9 - an XML processing library
|
||||||
|
|
||||||
Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||||
Report bugs and download new versions at http://pugixml.org/
|
Report bugs and download new versions at http://pugixml.org/
|
||||||
|
|
||||||
This is the distribution of pugixml, which is a C++ XML processing library,
|
This is the distribution of pugixml, which is a C++ XML processing library,
|
||||||
@ -28,7 +28,7 @@ The distribution contains the following folders:
|
|||||||
|
|
||||||
This library is distributed under the MIT License:
|
This library is distributed under the MIT License:
|
||||||
|
|
||||||
Copyright (c) 2006-2015 Arseny Kapoulkine
|
Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person
|
Permission is hereby granted, free of charge, to any person
|
||||||
obtaining a copy of this software and associated documentation
|
obtaining a copy of this software and associated documentation
|
@ -90,6 +90,7 @@ namespace QCD {
|
|||||||
// That probably makes for GridRedBlack4dCartesian grid.
|
// That probably makes for GridRedBlack4dCartesian grid.
|
||||||
|
|
||||||
// s,sp,c,spc,lc
|
// s,sp,c,spc,lc
|
||||||
|
|
||||||
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
||||||
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
||||||
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
|
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
|
||||||
@ -101,6 +102,8 @@ namespace QCD {
|
|||||||
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
||||||
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||||
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||||
|
template<typename vtype> using iSpinColourSpinColourMatrix = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
|
||||||
|
|
||||||
|
|
||||||
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
|
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
|
||||||
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||||
@ -132,6 +135,24 @@ namespace QCD {
|
|||||||
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
|
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
|
||||||
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
|
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
|
||||||
|
|
||||||
|
// SpinColourSpinColour matrix
|
||||||
|
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||||
|
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||||
|
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||||
|
|
||||||
|
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||||
|
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||||
|
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||||
|
|
||||||
|
// SpinColourSpinColour matrix
|
||||||
|
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||||
|
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||||
|
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||||
|
|
||||||
|
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||||
|
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||||
|
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||||
|
|
||||||
// LorentzColour
|
// LorentzColour
|
||||||
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
|
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
|
||||||
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
|
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
|
||||||
@ -229,6 +250,9 @@ namespace QCD {
|
|||||||
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
|
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
|
||||||
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
|
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
|
||||||
|
|
||||||
|
typedef Lattice<vSpinColourSpinColourMatrix> LatticeSpinColourSpinColourMatrix;
|
||||||
|
typedef Lattice<vSpinColourSpinColourMatrixF> LatticeSpinColourSpinColourMatrixF;
|
||||||
|
typedef Lattice<vSpinColourSpinColourMatrixD> LatticeSpinColourSpinColourMatrixD;
|
||||||
|
|
||||||
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
|
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
|
||||||
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
|
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user