mirror of
https://github.com/paboyle/Grid.git
synced 2025-10-24 01:34:47 +01:00
Compare commits
690 Commits
feature/cl
...
feature/re
Author | SHA1 | Date | |
---|---|---|---|
49b934310b | |||
01e8cf5017 | |||
12f4499502 | |||
05aec72887 | |||
136d3802cb | |||
a4c55406ed | |||
c7f33ca2a8 | |||
0e3035c51d | |||
10fc263675 | |||
bccfd4cbb3 | |||
0b50d4a328 | |||
e232257cb6 | |||
09451b5e48 | |||
6364aa8acf | |||
b9e84ecab7 | |||
41032fef44 | |||
d77bc88170 | |||
494b3c9e57 | |||
2ba19a9e07 | |||
5d7cc29eaf | |||
f22a27d7f9 | |||
|
33a0bbb17b | ||
f592ec8baa | |||
8b007b5c24 | |||
9bb170576d | |||
|
a7e3977b75 | ||
|
995f20e45d | ||
|
d058b4e681 | ||
8e0d2f3402 | |||
2ac57370f1 | |||
344e832a4e | |||
cfe281f1a4 | |||
f5422c7334 | |||
68c76a410d | |||
69b6ba0a73 | |||
65349b07a7 | |||
7cd9914f0e | |||
|
f3f24b3017 | ||
|
8ef4657805 | ||
|
78c1086f8b | ||
|
68c13045d6 | ||
|
e9b6f58fdc | ||
|
839605c45c | ||
1ff1422e07 | |||
32376f0437 | |||
0c6e581336 | |||
|
e0a79a5bbf | ||
|
4c016cc1a4 | ||
|
2205b1e63e | ||
|
6f421c7a6f | ||
|
b62b9ac214 | ||
88d9922e4f | |||
9734e3ee58 | |||
|
8c3a599148 | ||
|
4a47b11876 | ||
|
f1382cf81d | ||
|
85699daef2 | ||
1651111d18 | |||
1ed4ea344d | |||
8f514ae550 | |||
4a7415e83c | |||
0ffcfea724 | |||
febe41cc1d | |||
62173395b8 | |||
b48611b80f | |||
6b559d68aa | |||
1982cc58dd | |||
2e2e5ce596 | |||
7d84dca8e9 | |||
2d3916418e | |||
21304e2139 | |||
7b850eb48b | |||
a3ace57e01 | |||
b1c3cbe35e | |||
f31d6bfec2 | |||
a7cfa26901 | |||
f333f3e575 | |||
2b4e253473 | |||
0ba3d469c7 | |||
f709329d96 | |||
f05b25dae4 | |||
3e1d268fa3 | |||
|
109c74bed8 | ||
3023287fd9 | |||
b3d6805638 | |||
291bc2a1f0 | |||
2f368c33fc | |||
9592115341 | |||
|
24c07694bc | ||
|
f0229025e2 | ||
|
6de9a45a09 | ||
|
03c3d495a2 | ||
|
49f25e08e8 | ||
efc0c65056 | |||
936eaac8e1 | |||
fe6a372f75 | |||
148fc052bd | |||
c073341a10 | |||
78299daaac | |||
866449c804 | |||
d69a52079f | |||
9f4f8a14a3 | |||
f6593dc881 | |||
|
b46d31d4b6 | ||
58567fc650 | |||
|
7c57cac670 | ||
d0b21bf1ff | |||
a1825d1f59 | |||
5a3e83ff7b | |||
52569d98d8 | |||
b351103c29 | |||
118cca4681 | |||
44de727cd2 | |||
888ebc3cf9 | |||
6c031a1b81 | |||
02aa4bd762 | |||
9aafa8ee60 | |||
430b98b354 | |||
84189867ef | |||
4ab8cfbe2a | |||
aadd9f4468 | |||
8fbb27ce13 | |||
21bba95909 | |||
6448fe7121 | |||
2458a11d1d | |||
d0ca7c3fe6 | |||
57f899d79c | |||
e881a0c157 | |||
f411657118 | |||
|
7458c6174b | ||
|
21b269d0f9 | ||
|
083af92ac2 | ||
|
2c162577b5 | ||
|
b1c4e96382 | ||
|
a55c6f34f3 | ||
|
beed527ea3 | ||
eaa633cf69 | |||
c632455129 | |||
c012899ed5 | |||
|
8bab544c2f | ||
|
76fc06a5dc | ||
4af6c7e7aa | |||
f60fbcfc4d | |||
464c81706e | |||
408130b808 | |||
375edd1370 | |||
6d912f6c67 | |||
6d1d28955e | |||
920b471761 | |||
63c21767ba | |||
7b6b712565 | |||
35abd05ee9 | |||
dd36e60f6a | |||
cb6c548e21 | |||
02c4ccf621 | |||
fd24588212 | |||
b800bb3ecb | |||
f8abd0978b | |||
12c7c493bf | |||
|
c7c9072313 | ||
2bf3be5fae | |||
3a40e4fc69 | |||
2e69e03f6f | |||
a09f9bb528 | |||
f0e341d726 | |||
6f09df0daf | |||
26cee605b8 | |||
b3fa18c229 | |||
2940c9bcfd | |||
0bb532f72b | |||
fada2aa0f7 | |||
c193e4e675 | |||
3ee682f676 | |||
d85ec3bac2 | |||
b52d8eb1e3 | |||
ee630d2e8b | |||
2f0af79869 | |||
1b7fb79ec0 | |||
2db1a4628c | |||
6aa047d842 | |||
8779c32ae1 | |||
c527dc3358 | |||
6b42577b6b | |||
fb3596f968 | |||
f3a0158213 | |||
0250aa9347 | |||
3df6743396 | |||
fb7d021b9d | |||
5f206df775 | |||
7727e81113 | |||
c4115544a5 | |||
08c47328ba | |||
09001aedca | |||
2c67304716 | |||
dc6d8686de | |||
cc2780bea3 | |||
6e5a2b7922 | |||
f4878d3a13 | |||
89d2fac92e | |||
f2d3e41cf2 | |||
3c27bb36d4 | |||
603d59f389 | |||
07a0ef3f95 | |||
503259f9c9 | |||
5be6a51044 | |||
ac69f042b1 | |||
133d5c2e34 | |||
2a94244890 | |||
a15a2dfd29 | |||
093bb02633 | |||
99a85116f8 | |||
|
27cdb79063 | ||
f4cbfd63ff | |||
2b794b6aa7 | |||
d0244a059f | |||
dcdd891d7d | |||
6d2df9de79 | |||
41d4e37bae | |||
ee5c0cc9b6 | |||
0a4020eb4d | |||
b2de26589b | |||
0677adb4dd | |||
231cc95be6 | |||
639f9cab82 | |||
4eac4e575e | |||
3f0f92cda6 | |||
d2650e89bd | |||
2962123cba | |||
830168ec37 | |||
584c921ca0 | |||
81347b4d16 | |||
2cfa0b0e6b | |||
|
fa5dee76b1 | ||
|
8d1679c6b8 | ||
|
3791a38f7c | ||
|
142f7b0c86 | ||
|
891ad66eab | ||
|
60c43151c5 | ||
|
e036800261 | ||
|
62900def36 | ||
|
e3a309a73f | ||
|
ad6c1c0c4e | ||
|
00b92a91b5 | ||
|
65533741f7 | ||
|
dc0259fbda | ||
|
131a6785d4 | ||
|
44f4f5c8e2 | ||
|
2679df034f | ||
bf71162b97 | |||
299e828d83 | |||
ef5452cddf | |||
80de748737 | |||
|
71e1006ba8 | ||
00f31ae83f | |||
cce339deaf | |||
|
24128ff109 | ||
|
34e9d3f0ca | ||
|
c995788259 | ||
|
94c7198001 | ||
|
04d86fe9f3 | ||
|
b78074b6a0 | ||
|
7dfd3cdae8 | ||
|
cecee1ef2c | ||
|
355d4b58be | ||
|
2c54a536f3 | ||
|
d868a45120 | ||
|
9deae8c962 | ||
|
db86cdd7bd | ||
|
ec9939c1ba | ||
|
f74617c124 | ||
|
8c6a3921ed | ||
a8a15dd9d0 | |||
3ce68a751a | |||
|
daa0977d01 | ||
|
a2929f4384 | ||
|
7fe3974c0a | ||
|
f7e86f81a0 | ||
|
fecec803d9 | ||
|
8fe9a13cdd | ||
d2c42e6f42 | |||
049cc518f4 | |||
2e1c66897f | |||
adcef36189 | |||
|
2f121c41c9 | ||
e0ed7e300f | |||
485207901b | |||
c760f0a4c3 | |||
c84eeedec3 | |||
|
1ac3526f33 | ||
|
0de090ee74 | ||
91405de3f7 | |||
|
8fccda301a | ||
|
7a0abfac89 | ||
|
ae37fda699 | ||
|
b5fc5e2030 | ||
8db0ef9736 | |||
|
95d4b46446 | ||
|
5dfd216a34 | ||
|
c2e8d0aa88 | ||
|
0fe5aeffbb | ||
|
7fbc469046 | ||
|
bf96a4bdbf | ||
|
84685c9bc3 | ||
|
a8d4156997 | ||
|
c18074869b | ||
|
f4c6d39238 | ||
200d35b38a | |||
eb52e84d09 | |||
72abc34764 | |||
e3164d4c7b | |||
|
f5db386c55 | ||
|
294ee70a7a | ||
|
013ea4e8d1 | ||
|
7fbbb31a50 | ||
|
0e127b1fc7 | ||
|
68c028b0a6 | ||
255d4992e1 | |||
a0d399e5ce | |||
fd3b2e945a | |||
b999984501 | |||
|
7836cc2d74 | ||
a61e0df54b | |||
9d835afa35 | |||
5e3be47117 | |||
48de706dd5 | |||
f871fb0c6d | |||
93771f3099 | |||
8cb205725b | |||
9ad580d82f | |||
899f961d0d | |||
54d789204f | |||
25828746f3 | |||
f362c00739 | |||
|
25d1cadd3b | ||
|
c24d53bbd1 | ||
2017e4e3b4 | |||
27a4d4c951 | |||
2f92721249 | |||
3c7a4106ed | |||
3252059daf | |||
|
6eed167f0c | ||
|
4ad0df6fde | ||
661381e881 | |||
|
68a5079f33 | ||
|
8634e19f1b | ||
|
9ada378e38 | ||
|
9d9692d439 | ||
0659ae4014 | |||
bfbf2f1fa0 | |||
dd6b796a01 | |||
|
52a856b4a8 | ||
|
04190ee7f3 | ||
|
587bfcc0f4 | ||
|
2700992ef5 | ||
|
8c658de179 | ||
|
ba37d51ee9 | ||
|
4f4181c54a | ||
|
4d4ac2517b | ||
|
e568c24d1d | ||
|
b458326744 | ||
|
6e7d5e2243 | ||
|
b35169f1dd | ||
|
441ad7498d | ||
|
6f6c5c549a | ||
|
1584e17b54 | ||
|
12982a4455 | ||
|
172f412102 | ||
|
a64497265d | ||
ca639c195f | |||
edc28dcfbf | |||
|
c45f24a1b5 | ||
|
aaf37ee4d7 | ||
|
1dddd17e3c | ||
|
661f1d3e8e | ||
|
edcf9b9293 | ||
|
fe6860b4dd | ||
|
d6406b13e1 | ||
|
e369d7306d | ||
|
9f8d63e104 | ||
|
9b0240d101 | ||
|
b27f0e5a53 | ||
|
75e4483407 | ||
|
0734e9ddd4 | ||
|
809b1cdd58 | ||
|
1be8089604 | ||
|
3e0eff6468 | ||
|
7ecc47ac89 | ||
|
e9f1ac09de | ||
|
fa0d8feff4 | ||
49b8501fd4 | |||
d47484717e | |||
|
05b44aef6b | ||
|
03e9832efa | ||
|
28a375d35d | ||
|
3b06381745 | ||
|
91a0a3f820 | ||
|
8f44c799a6 | ||
|
96272f3841 | ||
|
5c936d88a0 | ||
|
1c64ee926e | ||
|
2cbb72a81c | ||
|
31d83ee046 | ||
|
a9e8758a01 | ||
|
3e125c5b61 | ||
|
eac6ec4b5e | ||
|
213f8db6a2 | ||
|
6358f35b7e | ||
|
43f5a0df50 | ||
|
c897878776 | ||
cc6eb51e3e | |||
|
507009089b | ||
|
2baf193031 | ||
|
362ba0443a | ||
|
276a2353df | ||
b234784c8e | |||
6ea2a8b7ca | |||
c1d0359aaa | |||
047ee4ad0b | |||
a13106da0c | |||
75113e6523 | |||
325c73d051 | |||
b25a59e95e | |||
|
c5b9147b53 | ||
|
64ac815fd9 | ||
|
a1be533329 | ||
7c4533797f | |||
af84fd65bb | |||
|
1a2613086a | ||
|
4f110c09a5 | ||
6764362237 | |||
2fa2b0e0b1 | |||
b61292f735 | |||
ce7720e221 | |||
853a5528dc | |||
169f405c9c | |||
c6125b01ce | |||
b0b5b34bff | |||
1c9722357d | |||
141da3ae71 | |||
94edf9cf8b | |||
c11a3ca0a7 | |||
|
870b1a85ae | ||
|
b5510427f9 | ||
|
26ed65c8f8 | ||
|
f7f043d8cf | ||
|
ddcaa6ad29 | ||
334da7f452 | |||
4669ecd4ba | |||
4573b34cac | |||
17f57e85d1 | |||
c8d4d184ee | |||
17f27b1ebd | |||
a16bbecb8a | |||
7c9b0dd842 | |||
6b7228b3e6 | |||
f117552334 | |||
a21a160029 | |||
1569a374a9 | |||
eddf023b8a | |||
6b8ffbe735 | |||
81050535a5 | |||
7dcf5c90e3 | |||
9ce00f26f9 | |||
85c253ed4a | |||
ccfc0a5a89 | |||
d3f857b1c9 | |||
fb62035aa0 | |||
0260bc7705 | |||
68e6a58f12 | |||
640515e3d8 | |||
|
f089bf5629 | ||
|
276f113f28 | ||
97c579f637 | |||
a13c109111 | |||
|
ab6afd18ac | ||
|
5bde64d48b | ||
|
2f5add4d5f | ||
c5a885dcd6 | |||
a4d8512fb8 | |||
5ec903044d | |||
8a0cf0194f | |||
1c680d4b7a | |||
|
c9c073eee4 | ||
|
f290b2e908 | ||
|
5f8225461b | ||
e9323460c7 | |||
20e186a1e0 | |||
|
6ef4af989b | ||
|
ccde8b817f | ||
|
68168bf72d | ||
|
e93d0feaa7 | ||
|
8f601d9b39 | ||
|
5436308e4a | ||
|
07fe7d0cbe | ||
|
60b57706c4 | ||
|
58c2f60b69 | ||
|
bfa3a7b3b0 | ||
|
954e38bebe | ||
|
b1a38bde7a | ||
|
2581875edc | ||
|
f212b0a963 | ||
|
62702dbcb8 | ||
41d6cab033 | |||
5a31e747c9 | |||
cbc73a3fd1 | |||
|
6c6d43eb4e | ||
|
e1dcfd3553 | ||
|
888838473a | ||
|
01568b0e62 | ||
|
d5ce66f6ab | ||
|
d86936a3de | ||
d516938707 | |||
72344d1418 | |||
7ecf6ab38b | |||
2d4d70d3ec | |||
78f8d47528 | |||
b85f987b0b | |||
f57afe2079 | |||
|
0fb84fa34b | ||
|
8462bbfe63 | ||
229977c955 | |||
e485a07133 | |||
|
0880747edb | ||
|
b801e1fcd6 | ||
70ec2faa98 | |||
2f849ee252 | |||
bb6ed44339 | |||
360cface33 | |||
|
80302e95a8 | ||
caf2f6b274 | |||
c49be8988b | |||
971c2379bd | |||
|
94b0d66e4c | ||
|
5e8af396fd | ||
9942723189 | |||
a7d19dbb64 | |||
90dbe03e17 | |||
8b14096990 | |||
|
b938202081 | ||
e79ef469ac | |||
485c5db0fe | |||
|
c793947209 | ||
3e9ee053a1 | |||
dda6c69d5b | |||
cd51b9af99 | |||
|
c399c2b44d | ||
|
af7de7a294 | ||
|
1dc86efd26 | ||
f32555dcc5 | |||
30391cb2eb | |||
e93c883470 | |||
|
2e88408f5c | ||
fcac5c0772 | |||
90f4000935 | |||
480708b9a0 | |||
c4baf876d4 | |||
2f4dac3531 | |||
3ec6890850 | |||
018801d973 | |||
1d83521daa | |||
fc5670c6a4 | |||
d9c435e282 | |||
614a0e8277 | |||
|
aaf39222c3 | ||
550142bd6a | |||
c0a929aef7 | |||
37fe944224 | |||
|
315a42843f | ||
83a101db83 | |||
c4274e1660 | |||
ba6db55cb0 | |||
e5ea84d531 | |||
15767a1491 | |||
4d2a32ae7a | |||
5b937e3644 | |||
e418b044f7 | |||
b8b05f143f | |||
6ec42b4b82 | |||
abb7d4d2f5 | |||
16ebbfff29 | |||
4828226095 | |||
8a049f27b8 | |||
43578a3eb4 | |||
fdbd42e542 | |||
e7e4cee4f3 | |||
|
ec3954ff5f | ||
|
0f468e2179 | ||
|
8e61286741 | ||
|
4790e99817 | ||
|
2dd63aa7a4 | ||
|
559a501140 | ||
|
945684c470 | ||
|
e30a80a234 | ||
|
69e4ecc1d2 | ||
|
5f483df16b | ||
|
4680a977c3 | ||
|
de42456171 | ||
|
d55212c998 | ||
|
c96483e3bd | ||
|
c6e1f64573 | ||
|
ae31a6a760 | ||
|
dd8f2a64fe | ||
|
724cf02d4a | ||
|
7b8b2731e7 | ||
|
237a8ec918 | ||
|
49a0ae73eb | ||
|
315f1146cd | ||
|
9f202782c5 | ||
|
594a262dcc | ||
|
7f8ca54285 | ||
|
c5b23c367e | ||
|
b6fe03eb26 | ||
|
f37ed4958b | ||
|
896f3a8002 | ||
|
5f85473d6b | ||
|
ac3b0ebc58 | ||
|
f0fcdf75b5 | ||
|
53bffb83d4 | ||
|
cd44e851f1 | ||
|
fb24e3a7d2 | ||
|
655a69259a | ||
|
4e0cf0cc28 | ||
|
507c4e9efc | ||
|
cdf550845f | ||
|
3db7a5387b | ||
|
f8a5194c70 | ||
|
cff3bae155 | ||
|
90dffc73c8 | ||
a1151fc734 | |||
|
ab3baeb38f | ||
|
389731d373 | ||
6e3ce7423e | |||
15f15a7cfd | |||
0e5f626226 | |||
|
97b9c6f03d | ||
|
63982819c6 | ||
|
6fec507bef | ||
|
219b3bd34f | ||
|
b00d2d2c39 | ||
|
f1b3e21830 | ||
|
24162c9ead | ||
|
935cd1e173 | ||
|
55e39df30f | ||
|
581be32ed2 | ||
|
6bc136b1d0 | ||
|
0c668bf46a | ||
|
840814c776 | ||
|
95af55128e | ||
|
9f2a57e334 | ||
|
c645d33db5 | ||
|
e0f1349524 | ||
|
79b761f923 | ||
|
0d4e31ca58 | ||
|
b07a354a33 | ||
|
c433939795 | ||
|
b6a4c31b48 | ||
|
98b1439ff9 | ||
|
564738b1ff | ||
|
a80e43dbcf | ||
|
b99622d9fb | ||
937c77ead2 | |||
95e5a2ade3 | |||
|
91676d1dda | ||
|
ac3611bb19 | ||
|
cc4afb978d | ||
|
20e92a7009 | ||
|
42f0afcbfa | ||
|
20ac13fdf3 | ||
|
e38612e6fa | ||
|
c2b2b71c5d | ||
|
009f48a904 | ||
|
5cfc0180aa | ||
|
914f180fa3 | ||
|
6cb563a40c | ||
|
db3837be22 | ||
|
2f0dd83016 | ||
|
3ac27e5596 | ||
|
bd466a55a8 | ||
|
c8e6f58e24 | ||
|
888988ad37 | ||
|
e4a105a30b | ||
|
26ebe41fef | ||
1e496fee74 | |||
|
9f755e0379 | ||
|
4512dbdf58 | ||
|
483fd3cfa1 | ||
|
85516e9c7c | ||
|
0c006fbfaa | ||
|
54c10a42cc | ||
|
ef0fe2bcc1 |
25
.gitignore
vendored
25
.gitignore
vendored
@@ -83,6 +83,7 @@ ltmain.sh
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
.dirstamp
|
||||
|
||||
# build directory #
|
||||
###################
|
||||
@@ -97,11 +98,8 @@ build.sh
|
||||
|
||||
# Eigen source #
|
||||
################
|
||||
lib/Eigen/*
|
||||
|
||||
# FFTW source #
|
||||
################
|
||||
lib/fftw/*
|
||||
Grid/Eigen
|
||||
Eigen/*
|
||||
|
||||
# libtool macros #
|
||||
##################
|
||||
@@ -112,20 +110,7 @@ m4/libtool.m4
|
||||
################
|
||||
gh-pages/
|
||||
|
||||
# Buck files #
|
||||
##############
|
||||
.buck*
|
||||
buck-out
|
||||
BUCK
|
||||
make-bin-BUCK.sh
|
||||
|
||||
# generated sources #
|
||||
#####################
|
||||
lib/qcd/spin/gamma-gen/*.h
|
||||
lib/qcd/spin/gamma-gen/*.cc
|
||||
|
||||
# vs code editor files #
|
||||
########################
|
||||
.vscode/
|
||||
.vscode/settings.json
|
||||
settings.json
|
||||
Grid/qcd/spin/gamma-gen/*.h
|
||||
Grid/qcd/spin/gamma-gen/*.cc
|
||||
|
26
.travis.yml
26
.travis.yml
@@ -9,6 +9,11 @@ matrix:
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
env: PREC=single
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
env: PREC=double
|
||||
|
||||
before_install:
|
||||
- export GRIDDIR=`pwd`
|
||||
@@ -16,9 +21,11 @@ before_install:
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
|
||||
|
||||
install:
|
||||
- export CWD=`pwd`
|
||||
- echo $CWD
|
||||
- export CC=$CC$VERSION
|
||||
- export CXX=$CXX$VERSION
|
||||
- echo $PATH
|
||||
@@ -31,17 +38,24 @@ install:
|
||||
- which $CXX
|
||||
- $CXX --version
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
|
||||
|
||||
script:
|
||||
- ./bootstrap.sh
|
||||
- mkdir build
|
||||
- cd build
|
||||
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
||||
- mkdir lime
|
||||
- cd lime
|
||||
- mkdir build
|
||||
- cd build
|
||||
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
|
||||
- tar xf lime-1.3.2.tar.gz
|
||||
- cd lime-1.3.2
|
||||
- ./configure --prefix=$CWD/build/lime/install
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- echo make clean
|
||||
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
||||
- make install
|
||||
- cd $CWD/build
|
||||
- ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- make check
|
||||
|
||||
|
@@ -48,6 +48,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/serialisation/Serialisation.h>
|
||||
#include <Grid/threads/Threads.h>
|
||||
#include <Grid/util/Util.h>
|
||||
#include <Grid/util/Sha.h>
|
||||
#include <Grid/communicator/Communicator.h>
|
||||
#include <Grid/cartesian/Cartesian.h>
|
||||
#include <Grid/tensors/Tensors.h>
|
@@ -1,4 +1,9 @@
|
||||
#pragma once
|
||||
// Force Eigen to use MKL if Grid has been configured with --enable-mkl
|
||||
#ifdef USE_MKL
|
||||
#define EIGEN_USE_MKL_ALL
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
@@ -21,6 +21,32 @@ if BUILD_HDF5
|
||||
extra_headers+=serialisation/Hdf5Type.h
|
||||
endif
|
||||
|
||||
all: version-cache
|
||||
|
||||
version-cache:
|
||||
@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
|
||||
a="uncommited changes";\
|
||||
else\
|
||||
a="clean";\
|
||||
fi;\
|
||||
echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
|
||||
if [ -e version-cache ]; then\
|
||||
d=`diff vertmp version-cache`;\
|
||||
if [ "$${d}" != "" ]; then\
|
||||
mv vertmp version-cache;\
|
||||
rm -f Version.h;\
|
||||
fi;\
|
||||
else\
|
||||
mv vertmp version-cache;\
|
||||
rm -f Version.h;\
|
||||
fi;\
|
||||
rm -f vertmp
|
||||
|
||||
Version.h:
|
||||
cp version-cache Version.h
|
||||
|
||||
.PHONY: version-cache
|
||||
|
||||
#
|
||||
# Libraries
|
||||
#
|
||||
@@ -30,8 +56,8 @@ include Eigen.inc
|
||||
lib_LIBRARIES = libGrid.a
|
||||
|
||||
CCFILES += $(extra_sources)
|
||||
HFILES += $(extra_headers)
|
||||
HFILES += $(extra_headers) Config.h Version.h
|
||||
|
||||
libGrid_a_SOURCES = $(CCFILES)
|
||||
libGrid_adir = $(pkgincludedir)
|
||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
|
||||
libGrid_adir = $(includedir)/Grid
|
||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)
|
@@ -39,6 +39,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||
#include <Grid/algorithms/approx/Forecast.h>
|
||||
|
||||
#include <Grid/algorithms/iterative/Deflation.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||
#include <Grid/algorithms/iterative/NormalEquations.h>
|
@@ -51,7 +51,7 @@ namespace Grid {
|
||||
|
||||
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0;
|
||||
virtual void HermOp(const Field &in, Field &out)=0;
|
||||
};
|
||||
|
||||
@@ -309,36 +309,59 @@ namespace Grid {
|
||||
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
Field tmp;
|
||||
RealD mass;
|
||||
double tMpc;
|
||||
double tIP;
|
||||
double tMeo;
|
||||
double taxpby_norm;
|
||||
uint64_t ncall;
|
||||
public:
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
|
||||
void Report(void)
|
||||
{
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.IP "<< tIP /ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.MeoMoe "<< tMeo/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.axpby_norm "<< taxpby_norm/ncall<<" usec "<<std::endl;
|
||||
}
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||
{
|
||||
assert( _Mat.isTrivialEE() );
|
||||
mass = _Mat.Mass();
|
||||
tMpc=0;
|
||||
tIP =0;
|
||||
tMeo=0;
|
||||
taxpby_norm=0;
|
||||
ncall=0;
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
GridLogIterative.TimingMode(1);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl;
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
n2 = Mpc(in,out);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl;
|
||||
tMpc+=usecond();
|
||||
tIP-=usecond();
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl;
|
||||
tIP+=usecond();
|
||||
n1 = real(dot);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
std::cout << GridLogIterative << " HermOp "<<std::endl;
|
||||
Mpc(in,out);
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMpc+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
axpby(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
}
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
Field tmp2(in._grid);
|
||||
|
||||
std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
|
||||
_Mat.Mooee(in,out);
|
||||
_Mat.Mooee(out,tmp);
|
||||
std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
|
||||
|
||||
tMeo-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp2);
|
||||
std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl;
|
||||
|
||||
RealD nn=axpy_norm(out,-1.0,tmp2,tmp);
|
||||
std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl;
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMeo+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
return nn;
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
@@ -357,6 +380,12 @@ namespace Grid {
|
||||
template<class Field> class OperatorFunction {
|
||||
public:
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const std::vector<Field> &in,std::vector<Field> &out) {
|
||||
assert(in.size()==out.size());
|
||||
for(int k=0;k<in.size();k++){
|
||||
(*this)(Linop,in[k],out[k]);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template<class Field> class LinearFunction {
|
@@ -55,6 +55,14 @@ namespace Grid {
|
||||
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
|
||||
public:
|
||||
virtual GridBase *RedBlackGrid(void)=0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Query the even even properties to make algorithmic decisions
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
virtual RealD Mass(void) { return 0.0; };
|
||||
virtual int ConstEE(void) { return 0; }; // Disable assumptions unless overridden
|
||||
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
|
||||
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const Field &in, Field &out)=0;
|
||||
virtual void Mooee (const Field &in, Field &out)=0;
|
@@ -33,7 +33,7 @@ directory
|
||||
|
||||
namespace Grid {
|
||||
|
||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
|
||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec };
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Block conjugate gradient. Dimension zero should be the block direction
|
||||
@@ -42,7 +42,6 @@ template <class Field>
|
||||
class BlockConjugateGradient : public OperatorFunction<Field> {
|
||||
public:
|
||||
|
||||
|
||||
typedef typename Field::scalar_type scomplex;
|
||||
|
||||
int blockDim ;
|
||||
@@ -54,21 +53,15 @@ class BlockConjugateGradient : public OperatorFunction<Field> {
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
Integer PrintInterval; //GridLogMessages or Iterative
|
||||
|
||||
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv)
|
||||
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
|
||||
{};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Thin QR factorisation (google it)
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
Eigen::MatrixXcd &C,
|
||||
Eigen::MatrixXcd &Cinv,
|
||||
Field & Q,
|
||||
const Field & R)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//Dimensions
|
||||
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
|
||||
@@ -85,22 +78,20 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
// Cdag C = Rdag R ; passes.
|
||||
// QdagQ = 1 ; passes
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
Eigen::MatrixXcd &C,
|
||||
Eigen::MatrixXcd &Cinv,
|
||||
Field & Q,
|
||||
const Field & R)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
||||
|
||||
// Force manifest hermitian to avoid rounding related
|
||||
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||
|
||||
#if 0
|
||||
std::cout << " Calling Cholesky ldlt on m_rr " << m_rr <<std::endl;
|
||||
Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();
|
||||
std::cout << " Called Cholesky ldlt on m_rr " << L_ldlt <<std::endl;
|
||||
auto D_ldlt = m_rr.ldlt().vectorD();
|
||||
std::cout << " Called Cholesky ldlt on m_rr " << D_ldlt <<std::endl;
|
||||
#endif
|
||||
|
||||
// std::cout << " Calling Cholesky llt on m_rr " <<std::endl;
|
||||
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||
// std::cout << " Called Cholesky llt on m_rr " << L <<std::endl;
|
||||
|
||||
C = L.adjoint();
|
||||
Cinv = C.inverse();
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -112,6 +103,25 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
sliceMulMatrix(Q,Cinv,R,Orthog);
|
||||
}
|
||||
// see comments above
|
||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
Eigen::MatrixXcd &C,
|
||||
Eigen::MatrixXcd &Cinv,
|
||||
std::vector<Field> & Q,
|
||||
const std::vector<Field> & R)
|
||||
{
|
||||
InnerProductMatrix(m_rr,R,R);
|
||||
|
||||
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||
|
||||
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||
|
||||
C = L.adjoint();
|
||||
Cinv = C.inverse();
|
||||
|
||||
MulMatrix(Q,Cinv,R);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Call one of several implementations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -119,14 +129,20 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
if ( CGtype == BlockCGrQ ) {
|
||||
BlockCGrQsolve(Linop,Src,Psi);
|
||||
} else if (CGtype == BlockCG ) {
|
||||
BlockCGsolve(Linop,Src,Psi);
|
||||
} else if (CGtype == CGmultiRHS ) {
|
||||
CGmultiRHSsolve(Linop,Src,Psi);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi)
|
||||
{
|
||||
if ( CGtype == BlockCGrQVec ) {
|
||||
BlockCGrQsolveVec(Linop,Src,Psi);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// BlockCGrQ implementation:
|
||||
@@ -139,7 +155,8 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
Nblock = B._grid->_fdimensions[Orthog];
|
||||
|
||||
/* FAKE */
|
||||
Nblock=8;
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
X.checkerboard = B.checkerboard;
|
||||
@@ -202,15 +219,10 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
|
||||
|
||||
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
|
||||
Linop.HermOp(X, AD);
|
||||
tmp = B - AD;
|
||||
//std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
|
||||
|
||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||
//std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
|
||||
//std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl;
|
||||
//std::cout << GridLogMessage << " m_C " << m_C<<std::endl;
|
||||
//std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl;
|
||||
D=Q;
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
|
||||
@@ -232,14 +244,12 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(D, Z);
|
||||
MatrixTimer.Stop();
|
||||
//std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl;
|
||||
|
||||
//4. M = [D^dag Z]^{-1}
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
m_M = m_DZ.inverse();
|
||||
//std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl;
|
||||
|
||||
//5. X = X + D MC
|
||||
m_tmp = m_M * m_C;
|
||||
@@ -257,6 +267,7 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
|
||||
//7. D = Q + D S^dag
|
||||
m_tmp = m_S.adjoint();
|
||||
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
@@ -317,152 +328,6 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
Nblock = Src._grid->_fdimensions[Orthog];
|
||||
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
Psi.checkerboard = Src.checkerboard;
|
||||
conformable(Psi, Src);
|
||||
|
||||
Field P(Src);
|
||||
Field AP(Src);
|
||||
Field R(Src);
|
||||
|
||||
Eigen::MatrixXcd m_pAp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_alpha = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_beta = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
sliceNorm(ssq,Src,Orthog);
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
sliceNorm(residuals,Src,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
sliceNorm(residuals,Psi,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
// Initial search dir is guess
|
||||
Linop.HermOp(Psi, AP);
|
||||
|
||||
|
||||
/************************************************************************
|
||||
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
|
||||
************************************************************************
|
||||
* O'Leary : R = B - A X
|
||||
* O'Leary : P = M R ; preconditioner M = 1
|
||||
* O'Leary : alpha = PAP^{-1} RMR
|
||||
* O'Leary : beta = RMR^{-1}_old RMR_new
|
||||
* O'Leary : X=X+Palpha
|
||||
* O'Leary : R_new=R_old-AP alpha
|
||||
* O'Leary : P=MR_new+P beta
|
||||
*/
|
||||
|
||||
R = Src - AP;
|
||||
P = R;
|
||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
||||
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
RealD rrsum=0;
|
||||
for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
|
||||
|
||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
||||
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(P, AP);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
// Alpha
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
m_pAp_inv = m_pAp.inverse();
|
||||
m_alpha = m_pAp_inv * m_rr ;
|
||||
|
||||
// Psi, R update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog); // add alpha * P to psi
|
||||
sliceMaddMatrix(R ,m_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
// Beta
|
||||
m_rr_inv = m_rr.inverse();
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
m_beta = m_rr_inv *m_rr;
|
||||
|
||||
// Search update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(AP,m_beta,P,R,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
P= AP;
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
RealD max_resid=0;
|
||||
RealD rr;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
|
||||
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(Psi, AP);
|
||||
AP = AP-Src;
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// multiRHS conjugate gradient. Dimension zero should be the block direction
|
||||
// Use this for spread out across nodes
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -600,6 +465,233 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
|
||||
void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){
|
||||
for(int b=0;b<Nblock;b++){
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
m(b,bp) = innerProduct(X[b],Y[bp]);
|
||||
}}
|
||||
}
|
||||
void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){
|
||||
// Should make this cache friendly with site outermost, parallel_for
|
||||
// Deal with case AP aliases with either Y or X
|
||||
std::vector<Field> tmp(Nblock,X[0]);
|
||||
for(int b=0;b<Nblock;b++){
|
||||
tmp[b] = Y[b];
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
tmp[b] = tmp[b] + (scale*m(bp,b))*X[bp];
|
||||
}
|
||||
}
|
||||
for(int b=0;b<Nblock;b++){
|
||||
AP[b] = tmp[b];
|
||||
}
|
||||
}
|
||||
void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){
|
||||
// Should make this cache friendly with site outermost, parallel_for
|
||||
for(int b=0;b<Nblock;b++){
|
||||
AP[b] = zero;
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
AP[b] += (m(bp,b))*X[bp];
|
||||
}
|
||||
}
|
||||
}
|
||||
double normv(const std::vector<Field> &P){
|
||||
double nn = 0.0;
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
nn+=norm2(P[b]);
|
||||
}
|
||||
return nn;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// BlockCGrQvec implementation:
|
||||
//--------------------------
|
||||
// X is guess/Solution
|
||||
// B is RHS
|
||||
// Solve A X_i = B_i ; i refers to Nblock index
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X)
|
||||
{
|
||||
Nblock = B.size();
|
||||
assert(Nblock == X.size());
|
||||
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
X[b].checkerboard = B[b].checkerboard;
|
||||
conformable(X[b], B[b]);
|
||||
conformable(X[b], X[0]);
|
||||
}
|
||||
|
||||
Field Fake(B[0]);
|
||||
|
||||
std::vector<Field> tmp(Nblock,Fake);
|
||||
std::vector<Field> Q(Nblock,Fake);
|
||||
std::vector<Field> D(Nblock,Fake);
|
||||
std::vector<Field> Z(Nblock,Fake);
|
||||
std::vector<Field> AD(Nblock,Fake);
|
||||
|
||||
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);}
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);}
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);}
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
/************************************************************************
|
||||
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
||||
************************************************************************
|
||||
* Dimensions:
|
||||
*
|
||||
* X,B==(Nferm x Nblock)
|
||||
* A==(Nferm x Nferm)
|
||||
*
|
||||
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
|
||||
*
|
||||
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
* for k:
|
||||
* Z = AD
|
||||
* M = [D^dag Z]^{-1}
|
||||
* X = X + D MC
|
||||
* QS = Q - ZM
|
||||
* D = Q + D S^dag
|
||||
* C = S C
|
||||
*/
|
||||
///////////////////////////////////////
|
||||
// Initial block: initial search dir is guess
|
||||
///////////////////////////////////////
|
||||
std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl;
|
||||
|
||||
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
Linop.HermOp(X[b], AD[b]);
|
||||
tmp[b] = B[b] - AD[b];
|
||||
}
|
||||
|
||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||
|
||||
for(int b=0;b<Nblock;b++) D[b]=Q[b];
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl;
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
//3. Z = AD
|
||||
MatrixTimer.Start();
|
||||
for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
//4. M = [D^dag Z]^{-1}
|
||||
sliceInnerTimer.Start();
|
||||
InnerProductMatrix(m_DZ,D,Z);
|
||||
sliceInnerTimer.Stop();
|
||||
m_M = m_DZ.inverse();
|
||||
|
||||
//5. X = X + D MC
|
||||
m_tmp = m_M * m_C;
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(X,m_tmp, D,X);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//6. QS = Q - ZM
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(tmp,m_M,Z,Q,-1.0);
|
||||
sliceMaddTimer.Stop();
|
||||
QRTimer.Start();
|
||||
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
|
||||
QRTimer.Stop();
|
||||
|
||||
//7. D = Q + D S^dag
|
||||
m_tmp = m_S.adjoint();
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(D,m_tmp,D,Q);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//8. C = S C
|
||||
m_C = m_S*m_C;
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
m_rr = m_C.adjoint() * m_C;
|
||||
|
||||
RealD max_resid=0;
|
||||
RealD rrsum=0;
|
||||
RealD rr;
|
||||
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
rrsum+=real(m_rr(b,b));
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
|
||||
for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
@@ -54,6 +54,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
||||
|
||||
|
||||
psi.checkerboard = src.checkerboard;
|
||||
conformable(psi, src);
|
||||
|
||||
@@ -70,7 +71,6 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
|
||||
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||
|
||||
|
||||
r = src - mmp;
|
||||
p = r;
|
||||
|
||||
@@ -96,38 +96,44 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch InnerTimer;
|
||||
GridStopWatch AxpyNormTimer;
|
||||
GridStopWatch LinearCombTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
for (k = 1; k <= MaxIterations*1000; k++) {
|
||||
c = cp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOpAndNorm(p, mmp, d, qq);
|
||||
Linop.HermOp(p, mmp);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
// RealD qqck = norm2(mmp);
|
||||
// ComplexD dck = innerProduct(p,mmp);
|
||||
|
||||
InnerTimer.Start();
|
||||
ComplexD dc = innerProduct(p,mmp);
|
||||
InnerTimer.Stop();
|
||||
d = dc.real();
|
||||
a = c / d;
|
||||
b_pred = a * (a * qq - d) / c;
|
||||
|
||||
AxpyNormTimer.Start();
|
||||
cp = axpy_norm(r, -a, mmp, r);
|
||||
AxpyNormTimer.Stop();
|
||||
b = cp / c;
|
||||
|
||||
// Fuse these loops ; should be really easy
|
||||
psi = a * p + psi;
|
||||
p = p * b + r;
|
||||
|
||||
LinearCombTimer.Start();
|
||||
parallel_for(int ss=0;ss<src._grid->oSites();ss++){
|
||||
vstream(psi[ss], a * p[ss] + psi[ss]);
|
||||
vstream(p [ss], b * p[ss] + r[ss]);
|
||||
}
|
||||
LinearCombTimer.Stop();
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
|
||||
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
|
||||
<< " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
@@ -148,6 +154,9 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
@@ -43,6 +43,7 @@ namespace Grid {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
int verbose;
|
||||
MultiShiftFunction shifts;
|
||||
|
||||
@@ -164,6 +165,15 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch AXPYTimer;
|
||||
GridStopWatch ShiftTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
// Iteration loop
|
||||
int k;
|
||||
@@ -171,7 +181,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
for (k=1;k<=MaxIterations;k++){
|
||||
|
||||
a = c /cp;
|
||||
AXPYTimer.Start();
|
||||
axpy(p,a,p,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Note to self - direction ps is iterated seperately
|
||||
// for each shift. Does not appear to have any scope
|
||||
@@ -180,6 +192,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
// However SAME r is used. Could load "r" and update
|
||||
// ALL ps[s]. 2/3 Bandwidth saving
|
||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||
AXPYTimer.Start();
|
||||
for(int s=0;s<nshift;s++){
|
||||
if ( ! converged[s] ) {
|
||||
if (s==0){
|
||||
@@ -190,22 +203,34 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
}
|
||||
}
|
||||
}
|
||||
AXPYTimer.Stop();
|
||||
|
||||
cp=c;
|
||||
MatrixTimer.Start();
|
||||
//Linop.HermOpAndNorm(p,mmp,d,qq); // d is used
|
||||
// The below is faster on KNL
|
||||
Linop.HermOp(p,mmp);
|
||||
d=real(innerProduct(p,mmp));
|
||||
|
||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
AXPYTimer.Stop();
|
||||
RealD rn = norm2(p);
|
||||
d += rn*mass[0];
|
||||
|
||||
bp=b;
|
||||
b=-cp/d;
|
||||
|
||||
AXPYTimer.Start();
|
||||
c=axpy_norm(r,b,mmp,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Toggle the recurrence history
|
||||
bs[0] = b;
|
||||
iz = 1-iz;
|
||||
ShiftTimer.Start();
|
||||
for(int s=1;s<nshift;s++){
|
||||
if((!converged[s])){
|
||||
RealD z0 = z[s][1-iz];
|
||||
@@ -215,6 +240,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||
}
|
||||
}
|
||||
ShiftTimer.Stop();
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
int ss = s;
|
||||
@@ -257,6 +283,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
|
||||
if ( all_converged ){
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
|
||||
|
||||
@@ -269,8 +298,19 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
RealD cn = norm2(src);
|
||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMarix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
// ugly hack
|
||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
104
Grid/algorithms/iterative/Deflation.h
Normal file
104
Grid/algorithms/iterative/Deflation.h
Normal file
@@ -0,0 +1,104 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_DEFLATION_H
|
||||
#define GRID_DEFLATION_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class ZeroGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = zero; };
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class SourceGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = src; };
|
||||
};
|
||||
|
||||
////////////////////////////////
|
||||
// Fine grid deflation
|
||||
////////////////////////////////
|
||||
template<class Field>
|
||||
class DeflatedGuesser: public LinearFunction<Field> {
|
||||
private:
|
||||
const std::vector<Field> &evec;
|
||||
const std::vector<RealD> &eval;
|
||||
|
||||
public:
|
||||
|
||||
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
|
||||
|
||||
virtual void operator()(const Field &src,Field &guess) {
|
||||
guess = zero;
|
||||
assert(evec.size()==eval.size());
|
||||
auto N = evec.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
const Field& tmp = evec[i];
|
||||
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
||||
}
|
||||
guess.checkerboard = src.checkerboard;
|
||||
}
|
||||
};
|
||||
|
||||
template<class FineField, class CoarseField>
|
||||
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
|
||||
private:
|
||||
const std::vector<FineField> &subspace;
|
||||
const std::vector<CoarseField> &evec_coarse;
|
||||
const std::vector<RealD> &eval_coarse;
|
||||
public:
|
||||
|
||||
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
|
||||
const std::vector<CoarseField> &_evec_coarse,
|
||||
const std::vector<RealD> &_eval_coarse)
|
||||
: subspace(_subspace),
|
||||
evec_coarse(_evec_coarse),
|
||||
eval_coarse(_eval_coarse)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(const FineField &src,FineField &guess) {
|
||||
int N = (int)evec_coarse.size();
|
||||
CoarseField src_coarse(evec_coarse[0]._grid);
|
||||
CoarseField guess_coarse(evec_coarse[0]._grid); guess_coarse = zero;
|
||||
blockProject(src_coarse,src,subspace);
|
||||
for (int i=0;i<N;i++) {
|
||||
const CoarseField & tmp = evec_coarse[i];
|
||||
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
|
||||
}
|
||||
blockPromote(guess_coarse,guess,subspace);
|
||||
guess.checkerboard = src.checkerboard;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
@@ -57,7 +57,8 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
|
||||
|
||||
parallel_region
|
||||
{
|
||||
std::vector < vobj > B(Nm); // Thread private
|
||||
|
||||
std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
|
||||
|
||||
parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
|
||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
||||
@@ -149,19 +150,6 @@ void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, boo
|
||||
basisReorderInPlace(_v,sort_vals,idx);
|
||||
}
|
||||
|
||||
// PAB: faster to compute the inner products first then fuse loops.
|
||||
// If performance critical can improve.
|
||||
template<class Field>
|
||||
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
|
||||
result = zero;
|
||||
assert(_v.size()==eval.size());
|
||||
int N = (int)_v.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
Field& tmp = _v[i];
|
||||
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Implicitly restarted lanczos
|
||||
/////////////////////////////////////////////////////////////
|
||||
@@ -181,6 +169,7 @@ enum IRLdiagonalisation {
|
||||
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
|
||||
{
|
||||
public:
|
||||
|
||||
LinearFunction<Field> &_HermOp;
|
||||
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
|
||||
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
|
||||
@@ -243,6 +232,7 @@ class ImplicitlyRestartedLanczos {
|
||||
/////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// PAB:
|
||||
//////////////////////////////////////////////////////////////////
|
||||
@@ -490,15 +480,13 @@ until convergence
|
||||
Field B(grid); B.checkerboard = evec[0].checkerboard;
|
||||
|
||||
// power of two search pattern; not every evalue in eval2 is assessed.
|
||||
int allconv =1;
|
||||
for(int jj = 1; jj<=Nstop; jj*=2){
|
||||
int j = Nstop-jj;
|
||||
RealD e = eval2_copy[j]; // Discard the evalue
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||
if ( j > Nconv ) {
|
||||
Nconv=j+1;
|
||||
jj=Nstop; // Terminate the scan
|
||||
}
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||
allconv=0;
|
||||
}
|
||||
}
|
||||
// Do evec[0] for good measure
|
||||
@@ -506,8 +494,10 @@ until convergence
|
||||
int j=0;
|
||||
RealD e = eval2_copy[0];
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox);
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
|
||||
}
|
||||
if ( allconv ) Nconv = Nstop;
|
||||
|
||||
// test if we converged, if so, terminate
|
||||
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
|
||||
// if( Nconv>=Nstop || beta_k < betastp){
|
@@ -28,7 +28,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LOCAL_COHERENCE_IRL_H
|
||||
#define GRID_LOCAL_COHERENCE_IRL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
struct LanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
|
||||
@@ -45,6 +48,7 @@ struct LanczosParams : Serializable {
|
||||
struct LocalCoherenceLanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
||||
bool, saveEvecs,
|
||||
bool, doFine,
|
||||
bool, doFineRead,
|
||||
bool, doCoarse,
|
||||
@@ -70,21 +74,24 @@ public:
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedHermOp(LinearOperatorBase<FineField>& linop, Aggregation<Fobj,CComplex,nbasis> &aggregate) :
|
||||
_Linop(linop),
|
||||
_Aggregate(aggregate) { };
|
||||
ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) :
|
||||
_Linop(linop), subspace(_subspace)
|
||||
{
|
||||
assert(subspace.size() >0);
|
||||
};
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
GridBase *FineGrid = subspace[0]._grid;
|
||||
int checkerboard = subspace[0].checkerboard;
|
||||
|
||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
||||
FineField fin(FineGrid);
|
||||
FineField fout(FineGrid);
|
||||
FineField fin (FineGrid); fin.checkerboard= checkerboard;
|
||||
FineField fout(FineGrid); fout.checkerboard = checkerboard;
|
||||
|
||||
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
|
||||
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
|
||||
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -99,24 +106,27 @@ public:
|
||||
|
||||
OperatorFunction<FineField> & _poly;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> &aggregate) :
|
||||
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
|
||||
LinearOperatorBase<FineField>& linop,
|
||||
std::vector<FineField> & _subspace) :
|
||||
_poly(poly),
|
||||
_Linop(linop),
|
||||
_Aggregate(aggregate) { };
|
||||
subspace(_subspace)
|
||||
{ };
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
|
||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
||||
GridBase *FineGrid = subspace[0]._grid;
|
||||
int checkerboard = subspace[0].checkerboard;
|
||||
|
||||
FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard;
|
||||
FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard;
|
||||
FineField fin (FineGrid); fin.checkerboard =checkerboard;
|
||||
FineField fout(FineGrid);fout.checkerboard =checkerboard;
|
||||
|
||||
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
|
||||
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
|
||||
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -132,19 +142,23 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc
|
||||
LinearFunction<CoarseField> & _Poly;
|
||||
OperatorFunction<FineField> & _smoother;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
||||
RealD _coarse_relax_tol;
|
||||
std::vector<FineField> &_subspace;
|
||||
|
||||
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
|
||||
OperatorFunction<FineField> &smoother,
|
||||
LinearOperatorBase<FineField> &Linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> &Aggregate,
|
||||
std::vector<FineField> &subspace,
|
||||
RealD coarse_relax_tol=5.0e3)
|
||||
: _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { };
|
||||
: _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
|
||||
_coarse_relax_tol(coarse_relax_tol)
|
||||
{ };
|
||||
|
||||
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
CoarseField v(B);
|
||||
RealD eval_poly = eval;
|
||||
|
||||
// Apply operator
|
||||
_Poly(B,v);
|
||||
|
||||
@@ -168,14 +182,13 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc
|
||||
}
|
||||
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
||||
|
||||
int checkerboard = _Aggregate.checkerboard;
|
||||
|
||||
GridBase *FineGrid = _subspace[0]._grid;
|
||||
int checkerboard = _subspace[0].checkerboard;
|
||||
FineField fB(FineGrid);fB.checkerboard =checkerboard;
|
||||
FineField fv(FineGrid);fv.checkerboard =checkerboard;
|
||||
|
||||
_Aggregate.PromoteFromSubspace(B,fv);
|
||||
blockPromote(B,fv,_subspace);
|
||||
|
||||
_smoother(_Linop,fv,fB);
|
||||
|
||||
RealD eval_poly = eval;
|
||||
@@ -217,27 +230,67 @@ protected:
|
||||
int _checkerboard;
|
||||
LinearOperatorBase<FineField> & _FineOp;
|
||||
|
||||
// FIXME replace Aggregation with vector of fine; the code reuse is too small for
|
||||
// the hassle and complexity of cross coupling.
|
||||
Aggregation<Fobj,CComplex,nbasis> _Aggregate;
|
||||
std::vector<RealD> evals_fine;
|
||||
std::vector<RealD> evals_coarse;
|
||||
std::vector<CoarseField> evec_coarse;
|
||||
std::vector<RealD> &evals_fine;
|
||||
std::vector<RealD> &evals_coarse;
|
||||
std::vector<FineField> &subspace;
|
||||
std::vector<CoarseField> &evec_coarse;
|
||||
|
||||
private:
|
||||
std::vector<RealD> _evals_fine;
|
||||
std::vector<RealD> _evals_coarse;
|
||||
std::vector<FineField> _subspace;
|
||||
std::vector<CoarseField> _evec_coarse;
|
||||
|
||||
public:
|
||||
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_Aggregate(CoarseGrid,FineGrid,checkerboard),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard)
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (_evals_fine),
|
||||
evals_coarse(_evals_coarse),
|
||||
subspace (_subspace),
|
||||
evec_coarse(_evec_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
void Orthogonalise(void ) { _Aggregate.Orthogonalise(); }
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Alternate constructore, external storage for use by Hadrons module
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard,
|
||||
std::vector<FineField> &ext_subspace,
|
||||
std::vector<CoarseField> &ext_coarse,
|
||||
std::vector<RealD> &ext_eval_fine,
|
||||
std::vector<RealD> &ext_eval_coarse
|
||||
) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (ext_eval_fine),
|
||||
evals_coarse(ext_eval_coarse),
|
||||
subspace (ext_subspace),
|
||||
evec_coarse (ext_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
|
||||
void Orthogonalise(void ) {
|
||||
CoarseScalar InnerProd(_CoarseGrid);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
};
|
||||
|
||||
template<typename T> static RealD normalise(T& v)
|
||||
{
|
||||
@@ -246,43 +299,44 @@ public:
|
||||
v = v * (1.0/nn);
|
||||
return nn;
|
||||
}
|
||||
|
||||
/*
|
||||
void fakeFine(void)
|
||||
{
|
||||
int Nk = nbasis;
|
||||
_Aggregate.subspace.resize(Nk,_FineGrid);
|
||||
_Aggregate.subspace[0]=1.0;
|
||||
_Aggregate.subspace[0].checkerboard=_checkerboard;
|
||||
normalise(_Aggregate.subspace[0]);
|
||||
subspace.resize(Nk,_FineGrid);
|
||||
subspace[0]=1.0;
|
||||
subspace[0].checkerboard=_checkerboard;
|
||||
normalise(subspace[0]);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
for(int k=1;k<Nk;k++){
|
||||
_Aggregate.subspace[k].checkerboard=_checkerboard;
|
||||
Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]);
|
||||
normalise(_Aggregate.subspace[k]);
|
||||
subspace[k].checkerboard=_checkerboard;
|
||||
Op(subspace[k-1],subspace[k]);
|
||||
normalise(subspace[k]);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
void testFine(RealD resid)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(_Aggregate.subspace.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
|
||||
for(int k=0;k<nbasis;k++){
|
||||
assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1);
|
||||
assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
|
||||
}
|
||||
}
|
||||
|
||||
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(_Aggregate.subspace.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
for(int k=0;k<evec_coarse.size();k++){
|
||||
if ( k < nbasis ) {
|
||||
@@ -302,34 +356,34 @@ public:
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
|
||||
evals_fine.resize(Nm);
|
||||
_Aggregate.subspace.resize(Nm,_FineGrid);
|
||||
subspace.resize(Nm,_FineGrid);
|
||||
|
||||
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
||||
|
||||
FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false);
|
||||
IRL.calc(evals_fine,subspace,src,Nconv,false);
|
||||
|
||||
// Shrink down to number saved
|
||||
assert(Nstop>=nbasis);
|
||||
assert(Nconv>=nbasis);
|
||||
evals_fine.resize(nbasis);
|
||||
_Aggregate.subspace.resize(nbasis,_FineGrid);
|
||||
subspace.resize(nbasis,_FineGrid);
|
||||
}
|
||||
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
|
||||
int Nstop, int Nk, int Nm,RealD resid,
|
||||
RealD MaxIt, RealD betastp, int MinRes)
|
||||
{
|
||||
Chebyshev<FineField> Cheby(cheby_op);
|
||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_Aggregate);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate);
|
||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
evals_coarse.resize(Nm);
|
||||
evec_coarse.resize(Nm,_CoarseGrid);
|
473
Grid/algorithms/iterative/SchurRedBlack.h
Normal file
473
Grid/algorithms/iterative/SchurRedBlack.h
Normal file
@@ -0,0 +1,473 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_SCHUR_RED_BLACK_H
|
||||
#define GRID_SCHUR_RED_BLACK_H
|
||||
|
||||
|
||||
/*
|
||||
* Red black Schur decomposition
|
||||
*
|
||||
* M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo)
|
||||
* (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 )
|
||||
* = L D U
|
||||
*
|
||||
* L^-1 = (1 0 )
|
||||
* (-MoeMee^{-1} 1 )
|
||||
* L^{dag} = ( 1 Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
* L^{-d} = ( 1 -Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
*
|
||||
* U^-1 = (1 -Mee^{-1} Meo)
|
||||
* (0 1 )
|
||||
* U^{dag} = ( 1 0)
|
||||
* (Meo^dag Mee^{-dag} 1)
|
||||
* U^{-dag} = ( 1 0)
|
||||
* (-Meo^dag Mee^{-dag} 1)
|
||||
***********************
|
||||
* M psi = eta
|
||||
***********************
|
||||
*Odd
|
||||
* i) D_oo psi_o = L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Wilson:
|
||||
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
|
||||
* Stag:
|
||||
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* L^-1 eta_o= (1 0 ) (e
|
||||
* (-MoeMee^{-1} 1 )
|
||||
*
|
||||
*Even
|
||||
* ii) Mee psi_e + Meo psi_o = src_e
|
||||
*
|
||||
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
*
|
||||
*
|
||||
* TODO: Other options:
|
||||
*
|
||||
* a) change checkerboards for Schur e<->o
|
||||
*
|
||||
* Left precon by Moo^-1
|
||||
* b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Right precon by Moo^-1
|
||||
* c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o
|
||||
* eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
* psi_o = M_oo^-1 phi_o
|
||||
* TODO: Deflation
|
||||
*/
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Use base class to share code
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackBase {
|
||||
protected:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise = 0;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Shared code
|
||||
/////////////////////////////////////////////////////////////
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out)
|
||||
{
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
|
||||
template<class Guesser>
|
||||
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
int nblock = in.size();
|
||||
|
||||
std::vector<Field> src_o(nblock,grid);
|
||||
std::vector<Field> sol_o(nblock,grid);
|
||||
|
||||
std::vector<Field> guess_save;
|
||||
|
||||
Field resid(fgrid);
|
||||
Field tmp(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Prepare RedBlack source
|
||||
////////////////////////////////////////////////
|
||||
for(int b=0;b<nblock;b++){
|
||||
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
|
||||
}
|
||||
////////////////////////////////////////////////
|
||||
// Make the guesses
|
||||
////////////////////////////////////////////////
|
||||
if ( subGuess ) guess_save.resize(nblock,grid);
|
||||
|
||||
for(int b=0;b<nblock;b++){
|
||||
guess(src_o[b],sol_o[b]);
|
||||
|
||||
if ( subGuess ) {
|
||||
guess_save[b] = sol_o[b];
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the block solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
|
||||
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// A2A boolean behavioural control & reconstruct other checkerboard
|
||||
////////////////////////////////////////////////
|
||||
for(int b=0;b<nblock;b++) {
|
||||
|
||||
if (subGuess) sol_o[b] = sol_o[b] - guess_save[b];
|
||||
|
||||
///////// Needs even source //////////////
|
||||
pickCheckerboard(Even,tmp,in[b]);
|
||||
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Check unprec residual if possible
|
||||
/////////////////////////////////////////////////
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out[b],resid);
|
||||
resid = resid-in[b];
|
||||
RealD ns = norm2(in[b]);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
|
||||
} else {
|
||||
std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
template<class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field resid(fgrid);
|
||||
Field src_o(grid);
|
||||
Field src_e(grid);
|
||||
Field sol_o(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// RedBlack source
|
||||
////////////////////////////////////////////////
|
||||
RedBlackSource(_Matrix,in,src_e,src_o);
|
||||
|
||||
////////////////////////////////
|
||||
// Construct the guess
|
||||
////////////////////////////////
|
||||
Field tmp(grid);
|
||||
guess(src_o,sol_o);
|
||||
|
||||
Field guess_save(grid);
|
||||
guess_save = sol_o;
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Fionn A2A boolean behavioural control
|
||||
////////////////////////////////////////////////
|
||||
if (subGuess) sol_o= sol_o-guess_save;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// RedBlack solution needs the even source
|
||||
///////////////////////////////////////////////////
|
||||
RedBlackSolution(_Matrix,sol_o,src_e,out);
|
||||
|
||||
// Verify the unprec residual
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Override in derived. Not virtual as template methods
|
||||
/////////////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0;
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0;
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) =0;
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)=0;
|
||||
|
||||
};
|
||||
|
||||
template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess)
|
||||
{
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
|
||||
}
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e(grid);
|
||||
|
||||
src_e = src_e_c; // Const correctness
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
}
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal has Mooee on it.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess) {};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||
|
||||
}
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e_i(grid);
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e_i = src_e-tmp; assert( src_e_i.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
}
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal is identity, right preconditioned by Mee^inv
|
||||
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
|
||||
//=> psi = MeeInv phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess) {};
|
||||
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||
}
|
||||
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field sol_o_i(grid);
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// MooeeInv due to pecond
|
||||
////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(sol_o,tmp);
|
||||
sol_o_i = tmp;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o_i,tmp); assert( tmp.checkerboard ==Even);
|
||||
tmp = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(tmp,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(sol,sol_o_i); assert( sol_o_i.checkerboard ==Odd );
|
||||
};
|
||||
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -277,7 +277,9 @@ public:
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
if ( ptr ) {
|
||||
// One touch per 4k page, static OMP loop to catch same loop order
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp parallel for schedule(static)
|
||||
#endif
|
||||
for(size_type n=0;n<bytes;n+=4096){
|
||||
cp[n]=0;
|
||||
}
|
@@ -44,11 +44,13 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
|
||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||
if ( !flag ) {
|
||||
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
|
||||
assert (provided == MPI_THREAD_MULTIPLE);
|
||||
//If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
|
||||
if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
|
||||
(nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) )
|
||||
assert(0);
|
||||
}
|
||||
|
||||
Grid_quiesce_nodes();
|
||||
|
||||
// Never clean up as done once.
|
||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||
|
||||
GlobalSharedMemory::Init(communicator_world);
|
||||
@@ -85,9 +87,17 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
MPI_Comm optimal_comm;
|
||||
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine
|
||||
////////////////////////////////////////////////////
|
||||
// Remap using the shared memory optimising routine
|
||||
// The remap creates a comm which must be freed
|
||||
////////////////////////////////////////////////////
|
||||
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm);
|
||||
InitFromMPICommunicator(processors,optimal_comm);
|
||||
SetCommunicator(optimal_comm);
|
||||
///////////////////////////////////////////////////
|
||||
// Free the temp communicator
|
||||
///////////////////////////////////////////////////
|
||||
MPI_Comm_free(&optimal_comm);
|
||||
}
|
||||
|
||||
//////////////////////////////////
|
||||
@@ -112,10 +122,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
||||
// split the communicator
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// int Nparent = parent._processors ;
|
||||
// std::cout << " splitting from communicator "<<parent.communicator <<std::endl;
|
||||
int Nparent;
|
||||
MPI_Comm_size(parent.communicator,&Nparent);
|
||||
// std::cout << " Parent size "<<Nparent <<std::endl;
|
||||
|
||||
int childsize=1;
|
||||
for(int d=0;d<processors.size();d++) {
|
||||
@@ -124,8 +132,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
||||
int Nchild = Nparent/childsize;
|
||||
assert (childsize * Nchild == Nparent);
|
||||
|
||||
// std::cout << " child size "<<childsize <<std::endl;
|
||||
|
||||
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
|
||||
std::vector<int> scoor(_ndimension); // coor of split within parent
|
||||
std::vector<int> ssize(_ndimension); // coor of split within parent
|
||||
@@ -183,8 +189,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
||||
|
||||
} else {
|
||||
srank = 0;
|
||||
comm_split = parent.communicator;
|
||||
// std::cout << " Inherited communicator " <<comm_split <<std::endl;
|
||||
int ierr = MPI_Comm_dup (parent.communicator,&comm_split);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -197,6 +203,11 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
SetCommunicator(comm_split);
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Free the temp communicator
|
||||
///////////////////////////////////////////////
|
||||
MPI_Comm_free(&comm_split);
|
||||
|
||||
if(0){
|
||||
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
||||
for(int d=0;d<processors.size();d++){
|
||||
@@ -210,6 +221,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
||||
|
||||
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
|
||||
{
|
||||
////////////////////////////////////////////////////
|
||||
// Creates communicator, and the communicator_halo
|
||||
////////////////////////////////////////////////////
|
||||
_ndimension = processors.size();
|
||||
_processor_coor.resize(_ndimension);
|
||||
|
@@ -133,6 +133,7 @@ class SharedMemory
|
||||
|
||||
public:
|
||||
SharedMemory() {};
|
||||
~SharedMemory();
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// set the buffers & sizes
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
@@ -27,6 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <pwd.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
@@ -113,19 +114,150 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
||||
assert(WorldNode!=-1);
|
||||
_ShmSetup=1;
|
||||
}
|
||||
|
||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||
// Gray encode support
|
||||
int BinaryToGray (int binary) {
|
||||
int gray = (binary>>1)^binary;
|
||||
return gray;
|
||||
}
|
||||
int Log2Size(int TwoToPower,int MAXLOG2)
|
||||
{
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = -1;
|
||||
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
|
||||
if ( (0x1<<i) == WorldShmSize ) {
|
||||
for(int i=0;i<=MAXLOG2;i++){
|
||||
if ( (0x1<<i) == TwoToPower ) {
|
||||
log2size = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return log2size;
|
||||
}
|
||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||
{
|
||||
#ifdef HYPERCUBE
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify the hypercube coordinate of this node using hostname
|
||||
////////////////////////////////////////////////////////////////
|
||||
// n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits
|
||||
// i runs 0..7 3 bits
|
||||
// r runs 0..3 2 bits
|
||||
// 2^10 = 1024 nodes
|
||||
const int maxhdim = 10;
|
||||
std::vector<int> HyperCubeCoords(maxhdim,0);
|
||||
std::vector<int> RootHyperCubeCoords(maxhdim,0);
|
||||
int R;
|
||||
int I;
|
||||
int N;
|
||||
const int namelen = _POSIX_HOST_NAME_MAX;
|
||||
char name[namelen];
|
||||
|
||||
// Parse ICE-XA hostname to get hypercube location
|
||||
gethostname(name,namelen);
|
||||
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
|
||||
assert(nscan==3);
|
||||
|
||||
int nlo = N%9;
|
||||
int nhi = N/9;
|
||||
uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
|
||||
uint32_t rootcoor = hypercoor;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Print debug info
|
||||
//////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
std::string hname(name);
|
||||
std::cout << "hostname "<<hname<<std::endl;
|
||||
std::cout << "R " << R << " I " << I << " N "<< N
|
||||
<< " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// broadcast node 0's base coordinate for this partition.
|
||||
//////////////////////////////////////////////////////////////////
|
||||
MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm);
|
||||
hypercoor=hypercoor-rootcoor;
|
||||
assert(hypercoor<WorldSize);
|
||||
assert(hypercoor>=0);
|
||||
|
||||
//////////////////////////////////////
|
||||
// Printing
|
||||
//////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify subblock of ranks on node spreading across dims
|
||||
// in a maximally symmetrical way
|
||||
////////////////////////////////////////////////////////////////
|
||||
int ndimension = processors.size();
|
||||
std::vector<int> processor_coor(ndimension);
|
||||
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
|
||||
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
|
||||
std::vector<int> HyperCoor(ndimension);
|
||||
int dim = 0;
|
||||
for(int l2=0;l2<log2size;l2++){
|
||||
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
|
||||
ShmDims[dim]*=2;
|
||||
dim=(dim+1)%ndimension;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish torus of processes and nodes with sub-blockings
|
||||
////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<ndimension;d++){
|
||||
NodeDims[d] = WorldDims[d]/ShmDims[d];
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Map Hcube according to physical lattice
|
||||
// must partition. Loop over dims and find out who would join.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int hcoor = hypercoor;
|
||||
for(int d=0;d<ndimension;d++){
|
||||
int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
|
||||
int msk = (0x1<<bits)-1;
|
||||
HyperCoor[d]=hcoor & msk;
|
||||
HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
|
||||
hcoor = hcoor >> bits;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Check processor counts match
|
||||
////////////////////////////////////////////////////////////////
|
||||
int Nprocessors=1;
|
||||
for(int i=0;i<ndimension;i++){
|
||||
Nprocessors*=processors[i];
|
||||
}
|
||||
assert(WorldSize==Nprocessors);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish mapping between lexico physics coord and WorldRank
|
||||
////////////////////////////////////////////////////////////////
|
||||
int rank;
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
|
||||
|
||||
for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
|
||||
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Build the new communicator
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#else
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
@@ -174,7 +306,69 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#endif
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SHMGET
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMGET
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
std::vector<int> shmids(WorldShmSize);
|
||||
|
||||
if ( WorldShmRank == 0 ) {
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
size_t size = bytes;
|
||||
key_t key = IPC_PRIVATE;
|
||||
int flags = IPC_CREAT | SHM_R | SHM_W;
|
||||
#ifdef SHM_HUGETLB
|
||||
if (Hugepages) flags|=SHM_HUGETLB;
|
||||
#endif
|
||||
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
|
||||
int errsv = errno;
|
||||
printf("Errno %d\n",errsv);
|
||||
printf("key %d\n",key);
|
||||
printf("size %lld\n",size);
|
||||
printf("flags %d\n",flags);
|
||||
perror("shmget");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
|
||||
if (WorldShmCommBufs[r] == (uint64_t *)-1) {
|
||||
perror("Shared memory attach failure");
|
||||
shmctl(shmids[r], IPC_RMID, NULL);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
///////////////////////////////////
|
||||
// Mark for clean up
|
||||
///////////////////////////////////
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbfs mapping intended
|
||||
@@ -182,6 +376,7 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
|
||||
#ifdef GRID_MPI3_SHMMMAP
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -191,7 +386,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbf and others map filesystems as mappable huge pages
|
||||
// Hugetlbfs and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
char shm_name [NAME_MAX];
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
@@ -218,6 +413,49 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
close(fd);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
};
|
||||
#endif // MMAP
|
||||
|
||||
#ifdef GRID_MPI3_SHM_NONE
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbf and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
char shm_name [NAME_MAX];
|
||||
assert(WorldShmSize == 1);
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
|
||||
int fd=-1;
|
||||
int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag|=MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( flags ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
|
||||
if ( ptr == (void *)MAP_FAILED ) {
|
||||
printf("mmap %s failed\n",shm_name);
|
||||
perror("failed mmap"); assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
close(fd);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
@@ -232,6 +470,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
MPI_Barrier(WorldShmComm);
|
||||
@@ -243,7 +482,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
|
||||
size_t size = bytes;
|
||||
|
||||
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||
|
||||
shm_unlink(shm_name);
|
||||
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
|
||||
@@ -259,7 +499,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
#endif
|
||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
|
||||
|
||||
if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
||||
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
|
||||
if ( ptr == (void * )MAP_FAILED ) {
|
||||
perror("failed mmap");
|
||||
assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
@@ -274,7 +518,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
|
||||
size_t size = bytes ;
|
||||
|
||||
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||
|
||||
int fd=shm_open(shm_name,O_RDWR,0666);
|
||||
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
|
||||
@@ -292,6 +537,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Global shared functionality finished
|
||||
// Now move to per communicator functionality
|
||||
@@ -318,11 +566,12 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
|
||||
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ;
|
||||
uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
|
||||
|
||||
MPI_Allreduce(MPI_IN_PLACE,&sr,1,MPI_UINT32_T,MPI_SUM,comm);
|
||||
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
|
||||
|
||||
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[sr];
|
||||
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
|
||||
// std::cout << "SetCommunicator ShmCommBufs ["<< r<< "] = "<< ShmCommBufs[r]<< " wsr = "<<wsr<<std::endl;
|
||||
}
|
||||
ShmBufferFreeAll();
|
||||
|
||||
@@ -391,5 +640,12 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
||||
return (void *) remote;
|
||||
}
|
||||
}
|
||||
SharedMemory::~SharedMemory()
|
||||
{
|
||||
int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised);
|
||||
if ( !MPI_is_finalised ) {
|
||||
MPI_Comm_free(&ShmComm);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@@ -122,5 +122,7 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
SharedMemory::~SharedMemory()
|
||||
{};
|
||||
|
||||
}
|
@@ -45,31 +45,33 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
||||
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int ent = 0;
|
||||
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
|
||||
int stride=rhs._grid->_slice_stride[dimension];
|
||||
if ( cbmask == 0x3 ) {
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int bo = n*e2;
|
||||
buffer[off+bo+b]=rhs._odata[so+o+b];
|
||||
table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int bo=0;
|
||||
std::vector<std::pair<int,int> > table;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb &cbmask ) {
|
||||
table.push_back(std::pair<int,int> (bo++,o+b));
|
||||
table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
|
||||
}
|
||||
}
|
||||
}
|
||||
parallel_for(int i=0;i<table.size();i++){
|
||||
buffer[off+table[i].first]=rhs._odata[so+table[i].second];
|
||||
}
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
buffer[table[i].first]=rhs._odata[table[i].second];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,31 +143,35 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int stride=rhs._grid->_slice_stride[dimension];
|
||||
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent =0;
|
||||
|
||||
if ( cbmask ==0x3 ) {
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int bo =n*rhs._grid->_slice_block[dimension];
|
||||
rhs._odata[so+o+b]=buffer[bo+b];
|
||||
table[ent++] = std::pair<int,int>(so+o+b,bo+b);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
std::vector<std::pair<int,int> > table;
|
||||
int bo=0;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
if ( ocb & cbmask ) {
|
||||
table.push_back(std::pair<int,int> (so+o+b,bo++));
|
||||
table[ent++]=std::pair<int,int> (so+o+b,bo++);
|
||||
}
|
||||
}
|
||||
}
|
||||
parallel_for(int i=0;i<table.size();i++){
|
||||
// std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
|
||||
}
|
||||
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
rhs._odata[table[i].first]=buffer[table[i].second];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
@@ -228,29 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
||||
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int stride = rhs._grid->_slice_stride[dimension];
|
||||
if(cbmask == 0x3 ){
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent=0;
|
||||
|
||||
if(cbmask == 0x3 ){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride+b;
|
||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
||||
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
|
||||
int o =n*stride+b;
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
||||
if ( ocb&cbmask ) {
|
||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
||||
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
lhs._odata[table[i].first]=rhs._odata[table[i].second];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
||||
@@ -269,16 +278,28 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
|
||||
int e2=rhs._grid->_slice_block [dimension];
|
||||
int stride = rhs._grid->_slice_stride[dimension];
|
||||
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent=0;
|
||||
|
||||
double t_tab,t_perm;
|
||||
if ( cbmask == 0x3 ) {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
} else {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb&cbmask ) {
|
||||
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
|
||||
if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
}
|
||||
|
||||
}}
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
@@ -291,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||
|
||||
double t_local;
|
||||
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
Cshift_local(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
@@ -299,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
GridBase *grid = rhs._grid;
|
||||
int fd = grid->_fdimensions[dimension];
|
||||
@@ -326,10 +349,6 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
||||
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||
int sx = (x+sshift)%rd;
|
||||
|
||||
// FIXME : This must change where we have a
|
||||
// Rotate slice.
|
||||
|
||||
// Document how this works ; why didn't I do this when I first wrote it...
|
||||
// wrap is whether sshift > rd.
|
||||
// num is sshift mod rd.
|
||||
//
|
||||
@@ -366,9 +385,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
||||
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
||||
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
|
||||
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
@@ -54,13 +54,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
||||
|
||||
|
||||
if ( !comm_dim ) {
|
||||
// std::cout << "Cshift_local" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
|
||||
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
||||
} else if ( splice_dim ) {
|
||||
// std::cout << "Cshift_comms_simd" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift);
|
||||
} else {
|
||||
// std::cout << "Cshift_comms" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift);
|
||||
}
|
||||
return ret;
|
||||
@@ -91,9 +91,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vob
|
||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
//std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
//std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||
}
|
||||
@@ -175,6 +178,10 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
||||
int simd_layout = grid->_simd_layout[dimension];
|
||||
int comm_dim = grid->_processors[dimension] >1 ;
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
|
||||
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
|
||||
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
|
||||
|
||||
assert(comm_dim==1);
|
||||
assert(simd_layout==2);
|
||||
assert(shift>=0);
|
File diff suppressed because it is too large
Load Diff
@@ -244,19 +244,11 @@ namespace Grid {
|
||||
|
||||
template<class sobj,class vobj> strong_inline
|
||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
axpy(ret,a,x,y);
|
||||
return norm2(ret);
|
||||
return axpy_norm_fast(ret,a,x,y);
|
||||
}
|
||||
template<class sobj,class vobj> strong_inline
|
||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
axpby(ret,a,b,x,y);
|
||||
return norm2(ret); // FIXME implement parallel norm in ss loop
|
||||
return axpby_norm_fast(ret,a,b,x,y);
|
||||
}
|
||||
|
||||
}
|
@@ -257,7 +257,40 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
Lattice(Lattice&& r){ // move constructor
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata=std::move(r._odata);
|
||||
}
|
||||
|
||||
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
||||
{
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata =std::move(r._odata);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata.resize(_grid->oSites());// essential
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~Lattice(void) = default;
|
||||
|
||||
@@ -277,15 +310,6 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
@@ -179,7 +179,7 @@ namespace Grid {
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
#define DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
|
||||
{\
|
||||
@@ -198,11 +198,6 @@ namespace Grid {
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
} \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||
{ \
|
||||
@@ -212,14 +207,21 @@ namespace Grid {
|
||||
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs op rhs._internal; \
|
||||
}
|
||||
} \
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
}
|
||||
|
||||
DECLARE_RELATIONAL(<,slt);
|
||||
DECLARE_RELATIONAL(<=,sle);
|
||||
DECLARE_RELATIONAL(>,sgt);
|
||||
DECLARE_RELATIONAL(>=,sge);
|
||||
DECLARE_RELATIONAL(==,seq);
|
||||
DECLARE_RELATIONAL_EQ(==,seq);
|
||||
DECLARE_RELATIONAL(!=,sne);
|
||||
|
||||
#undef DECLARE_RELATIONAL
|
@@ -52,23 +52,5 @@ namespace Grid {
|
||||
}
|
||||
};
|
||||
|
||||
// LatticeCoordinate();
|
||||
// FIXME for debug; deprecate this; made obscelete by
|
||||
template<class vobj> void lex_sites(Lattice<vobj> &l){
|
||||
Real *v_ptr = (Real *)&l._odata[0];
|
||||
size_t o_len = l._grid->oSites();
|
||||
size_t v_len = sizeof(vobj)/sizeof(vRealF);
|
||||
size_t vec_len = vRealF::Nsimd();
|
||||
|
||||
for(int i=0;i<o_len;i++){
|
||||
for(int j=0;j<v_len;j++){
|
||||
for(int vv=0;vv<vec_len;vv+=2){
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv ]= i+vv*500;
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
@@ -33,7 +33,7 @@ namespace Grid {
|
||||
// Deterministic Reduction operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||
ComplexD nrm = innerProduct(arg,arg);
|
||||
auto nrm = innerProduct(arg,arg);
|
||||
return std::real(nrm);
|
||||
}
|
||||
|
||||
@@ -43,32 +43,85 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
scalar_type nrm;
|
||||
|
||||
GridBase *grid = left._grid;
|
||||
const int pad = 8;
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
|
||||
ComplexD inner;
|
||||
Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
|
||||
decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]);
|
||||
vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
|
||||
}
|
||||
sumarray[thr]=TensorRemove(vnrm) ;
|
||||
// All threads sum across SIMD; reduce serial work at end
|
||||
// one write per cacheline with streaming store
|
||||
ComplexD tmp = Reduce(TensorRemove(vinner)) ;
|
||||
vstream(sumarray[thr*pad],tmp);
|
||||
}
|
||||
|
||||
vector_type vvnrm; vvnrm=zero; // sum across threads
|
||||
inner=0.0;
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vvnrm = vvnrm+sumarray[i];
|
||||
inner = inner+sumarray[i*pad];
|
||||
}
|
||||
nrm = Reduce(vvnrm);// sum across simd
|
||||
right._grid->GlobalSum(nrm);
|
||||
right._grid->GlobalSum(inner);
|
||||
return inner;
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
// Fast axpby_norm
|
||||
// z = a x + b y
|
||||
// return norm z
|
||||
/////////////////////////
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
sobj one(1.0);
|
||||
return axpby_norm_fast(z,a,one,x,y);
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
const int pad = 8;
|
||||
z.checkerboard = x.checkerboard;
|
||||
conformable(z,x);
|
||||
conformable(x,y);
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
RealD nrm;
|
||||
|
||||
GridBase *grid = x._grid;
|
||||
|
||||
Vector<RealD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(x._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
// private to thread; sub summation
|
||||
decltype(innerProductD(z._odata[0],z._odata[0])) vnrm=zero;
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
|
||||
vnrm = vnrm + innerProductD(tmp,tmp);
|
||||
vstream(z._odata[ss],tmp);
|
||||
}
|
||||
vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
|
||||
}
|
||||
|
||||
nrm = 0.0; // sum across threads; linear in thread count but fast
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
nrm = nrm+sumarray[i*pad];
|
||||
}
|
||||
z._grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
|
||||
@@ -221,6 +274,115 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
std::vector<scalar_type> lsSum;
|
||||
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
|
||||
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
|
||||
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start prep" << std::endl;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs._grid);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
// std::cout << GridLogMessage << "Start alloc" << std::endl;
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
|
||||
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
std::vector<iScalar<scalar_type>> extracted(Nsimd); // splitting the SIMD
|
||||
// std::cout << GridLogMessage << "End alloc" << std::endl;
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
// std::cout << GridLogMessage << "End prep" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
|
||||
vector_type vv;
|
||||
parallel_for(int r=0;r<rd;r++)
|
||||
{
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss = so + n * stride + b;
|
||||
vv = TensorRemove(innerProduct(lhs._odata[ss], rhs._odata[ss]));
|
||||
lvSum[r] = lvSum[r] + vv;
|
||||
}
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
|
||||
}
|
||||
template <class vobj>
|
||||
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
int fd = result.size();
|
||||
int ld = lsSum.size();
|
||||
// sum over nodes.
|
||||
std::vector<scalar_type> gsum;
|
||||
gsum.resize(fd, scalar_type(0.0));
|
||||
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum[t]=lsSum[lt];
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
|
||||
grid->GlobalSumVector(&gsum[0], fd);
|
||||
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
|
||||
|
||||
result = gsum;
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
@@ -158,10 +158,19 @@ namespace Grid {
|
||||
// tens of seconds per trajectory so this is clean in all reasonable cases,
|
||||
// and margin of safety is orders of magnitude.
|
||||
// We could hack Sitmo to skip in the higher order words of state if necessary
|
||||
//
|
||||
// Replace with 2^30 ; avoid problem on large volumes
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
|
||||
const int shift = 30;
|
||||
|
||||
uint64_t skip = site;
|
||||
skip = skip<<40;
|
||||
|
||||
skip = skip<<shift;
|
||||
|
||||
assert((skip >> shift)==site); // check for overflow
|
||||
|
||||
eng.discard(skip);
|
||||
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
|
||||
}
|
||||
@@ -308,6 +317,19 @@ namespace Grid {
|
||||
std::seed_seq src(seeds.begin(),seeds.end());
|
||||
Seed(src,0);
|
||||
}
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
std::stringstream sha;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
for(int i=0;i<seeds.size();i++) {
|
||||
sha << std::hex << seeds[i];
|
||||
}
|
||||
std::cout << GridLogMessage << "Intialising serial RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
};
|
||||
|
||||
class GridParallelRNG : public GridRNGbase {
|
||||
@@ -368,6 +390,14 @@ namespace Grid {
|
||||
_time_counter += usecond()- inner_time_counter;
|
||||
};
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||
|
||||
// Everyone generates the same seed_seq based on input seeds
|
@@ -464,9 +464,11 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
||||
assert(orthog>=0);
|
||||
|
||||
for(int d=0;d<nh;d++){
|
||||
if ( d!=orthog ) {
|
||||
assert(lg->_processors[d] == hg->_processors[d]);
|
||||
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||
}
|
||||
}
|
||||
|
||||
// the above should guarantee that the operations are local
|
||||
parallel_for(int idx=0;idx<lg->lSites();idx++){
|
||||
@@ -485,7 +487,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
||||
|
||||
|
||||
template<class vobj>
|
||||
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||
void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
@@ -499,9 +501,11 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slic
|
||||
assert(orthog>=0);
|
||||
|
||||
for(int d=0;d<nh;d++){
|
||||
if ( d!=orthog ) {
|
||||
assert(lg->_processors[d] == hg->_processors[d]);
|
||||
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||
}
|
||||
}
|
||||
|
||||
// the above should guarantee that the operations are local
|
||||
parallel_for(int idx=0;idx<lg->lSites();idx++){
|
||||
@@ -599,6 +603,51 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
extract1(in_vobj, out_ptrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type
|
||||
unvectorizeToRevLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
{
|
||||
|
||||
typedef typename vobj::vector_type vtype;
|
||||
|
||||
GridBase* in_grid = in._grid;
|
||||
out.resize(in_grid->lSites());
|
||||
|
||||
int ndim = in_grid->Nd();
|
||||
int in_nsimd = vtype::Nsimd();
|
||||
|
||||
std::vector<std::vector<int> > in_icoor(in_nsimd);
|
||||
|
||||
for(int lane=0; lane < in_nsimd; lane++){
|
||||
in_icoor[lane].resize(ndim);
|
||||
in_grid->iCoorFromIindex(in_icoor[lane], lane);
|
||||
}
|
||||
|
||||
parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
|
||||
//Assemble vector of pointers to output elements
|
||||
std::vector<sobj*> out_ptrs(in_nsimd);
|
||||
|
||||
std::vector<int> in_ocoor(ndim);
|
||||
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
|
||||
|
||||
std::vector<int> lcoor(in_grid->Nd());
|
||||
|
||||
for(int lane=0; lane < in_nsimd; lane++){
|
||||
for(int mu=0;mu<ndim;mu++)
|
||||
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
|
||||
|
||||
int lex;
|
||||
Lexicographic::IndexFromCoorReversed(lcoor, lex, in_grid->_ldimensions);
|
||||
out_ptrs[lane] = &out[lex];
|
||||
}
|
||||
|
||||
//Unpack into those ptrs
|
||||
const vobj & in_vobj = in._odata[in_oidx];
|
||||
extract1(in_vobj, out_ptrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||
@@ -648,10 +697,59 @@ vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||
&& !isSIMDvectorized<sobj>::value, void>::type
|
||||
vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
{
|
||||
|
||||
typedef typename vobj::vector_type vtype;
|
||||
|
||||
GridBase* grid = out._grid;
|
||||
assert(in.size()==grid->lSites());
|
||||
|
||||
int ndim = grid->Nd();
|
||||
int nsimd = vtype::Nsimd();
|
||||
|
||||
std::vector<std::vector<int> > icoor(nsimd);
|
||||
|
||||
for(int lane=0; lane < nsimd; lane++){
|
||||
icoor[lane].resize(ndim);
|
||||
grid->iCoorFromIindex(icoor[lane],lane);
|
||||
}
|
||||
|
||||
parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index
|
||||
//Assemble vector of pointers to output elements
|
||||
std::vector<sobj*> ptrs(nsimd);
|
||||
|
||||
std::vector<int> ocoor(ndim);
|
||||
grid->oCoorFromOindex(ocoor, oidx);
|
||||
|
||||
std::vector<int> lcoor(grid->Nd());
|
||||
|
||||
for(int lane=0; lane < nsimd; lane++){
|
||||
|
||||
for(int mu=0;mu<ndim;mu++){
|
||||
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
|
||||
}
|
||||
|
||||
int lex;
|
||||
Lexicographic::IndexFromCoorReversed(lcoor, lex, grid->_ldimensions);
|
||||
ptrs[lane] = &in[lex];
|
||||
}
|
||||
|
||||
//pack from those ptrs
|
||||
vobj vecobj;
|
||||
merge1(vecobj, ptrs, 0);
|
||||
out._odata[oidx] = vecobj;
|
||||
}
|
||||
}
|
||||
|
||||
//Convert a Lattice from one precision to another
|
||||
template<class VobjOut, class VobjIn>
|
||||
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
assert(out._grid->Nd() == in._grid->Nd());
|
||||
assert(out._grid->FullDimensions() == in._grid->FullDimensions());
|
||||
out.checkerboard = in.checkerboard;
|
||||
GridBase *in_grid=in._grid;
|
||||
GridBase *out_grid = out._grid;
|
@@ -86,7 +86,7 @@ protected:
|
||||
Colours &Painter;
|
||||
int active;
|
||||
int timing_mode;
|
||||
int topWidth{-1};
|
||||
int topWidth{-1}, chanWidth{-1};
|
||||
static int timestamp;
|
||||
std::string name, topName;
|
||||
std::string COLOUR;
|
||||
@@ -126,6 +126,7 @@ public:
|
||||
}
|
||||
}
|
||||
void setTopWidth(const int w) {topWidth = w;}
|
||||
void setChanWidth(const int w) {chanWidth = w;}
|
||||
|
||||
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||
|
||||
@@ -136,13 +137,20 @@ public:
|
||||
stream << std::setw(log.topWidth);
|
||||
}
|
||||
stream << log.topName << log.background()<< " : ";
|
||||
stream << log.colour() << std::left << log.name << log.background() << " : ";
|
||||
stream << log.colour() << std::left;
|
||||
if (log.chanWidth > 0)
|
||||
{
|
||||
stream << std::setw(log.chanWidth);
|
||||
}
|
||||
stream << log.name << log.background() << " : ";
|
||||
if ( log.timestamp ) {
|
||||
log.StopWatch->Stop();
|
||||
GridTime now = log.StopWatch->Elapsed();
|
||||
|
||||
if ( log.timing_mode==1 ) log.StopWatch->Reset();
|
||||
log.StopWatch->Start();
|
||||
stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ;
|
||||
stream << log.evidence()
|
||||
<< now << log.background() << " : " ;
|
||||
}
|
||||
stream << log.colour();
|
||||
return stream;
|
3
Grid/parallelIO/BinaryIO.cc
Normal file
3
Grid/parallelIO/BinaryIO.cc
Normal file
@@ -0,0 +1,3 @@
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
int Grid::BinaryIO::latticeWriteMaxRetry = -1;
|
@@ -81,6 +81,7 @@ inline void removeWhitespace(std::string &key)
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class BinaryIO {
|
||||
public:
|
||||
static int latticeWriteMaxRetry;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// more byte manipulation helpers
|
||||
@@ -91,7 +92,7 @@ class BinaryIO {
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
GridBase *grid = lat._grid;
|
||||
int lsites = grid->lSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
unvectorizeToLexOrdArray(scalardata,lat);
|
||||
@@ -110,11 +111,11 @@ class BinaryIO {
|
||||
lsites = 1;
|
||||
}
|
||||
|
||||
#pragma omp parallel
|
||||
PARALLEL_REGION
|
||||
{
|
||||
uint32_t nersc_csum_thr = 0;
|
||||
|
||||
#pragma omp for
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for (uint64_t local_site = 0; local_site < lsites; local_site++)
|
||||
{
|
||||
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
||||
@@ -124,7 +125,7 @@ class BinaryIO {
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp critical
|
||||
PARALLEL_CRITICAL
|
||||
{
|
||||
nersc_csum += nersc_csum_thr;
|
||||
}
|
||||
@@ -146,21 +147,23 @@ class BinaryIO {
|
||||
std::vector<int> local_start =grid->LocalStarts();
|
||||
std::vector<int> global_vol =grid->FullDimensions();
|
||||
|
||||
#pragma omp parallel
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> coor(nd);
|
||||
uint32_t scidac_csuma_thr=0;
|
||||
uint32_t scidac_csumb_thr=0;
|
||||
uint32_t site_crc=0;
|
||||
|
||||
#pragma omp for
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for(uint64_t local_site=0;local_site<lsites;local_site++){
|
||||
|
||||
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
||||
|
||||
/*
|
||||
* Scidac csum is rather more heavyweight
|
||||
* FIXME -- 128^3 x 256 x 16 will overflow.
|
||||
*/
|
||||
|
||||
int global_site;
|
||||
|
||||
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
|
||||
@@ -181,7 +184,7 @@ class BinaryIO {
|
||||
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
||||
}
|
||||
|
||||
#pragma omp critical
|
||||
PARALLEL_CRITICAL
|
||||
{
|
||||
scidac_csuma^= scidac_csuma_thr;
|
||||
scidac_csumb^= scidac_csumb_thr;
|
||||
@@ -261,7 +264,7 @@ class BinaryIO {
|
||||
GridBase *grid,
|
||||
std::vector<fobj> &iodata,
|
||||
std::string file,
|
||||
Integer offset,
|
||||
uint64_t& offset,
|
||||
const std::string &format, int control,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
@@ -368,7 +371,7 @@ class BinaryIO {
|
||||
#endif
|
||||
} else {
|
||||
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
|
||||
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
|
||||
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
|
||||
std::ifstream fin;
|
||||
fin.open(file, std::ios::binary | std::ios::in);
|
||||
if (control & BINARYIO_MASTER_APPEND)
|
||||
@@ -429,14 +432,20 @@ class BinaryIO {
|
||||
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
|
||||
}
|
||||
|
||||
std::cout << GridLogDebug << "MPI read I/O set view " << file << std::endl;
|
||||
std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
|
||||
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
|
||||
assert(ierr == 0);
|
||||
|
||||
std::cout << GridLogDebug << "MPI read I/O write all " << file << std::endl;
|
||||
std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
|
||||
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
|
||||
assert(ierr == 0);
|
||||
|
||||
MPI_Offset os;
|
||||
MPI_File_get_position(fh, &os);
|
||||
MPI_File_get_byte_offset(fh, os, &disp);
|
||||
offset = disp;
|
||||
|
||||
|
||||
MPI_File_close(&fh);
|
||||
MPI_Type_free(&fileArray);
|
||||
MPI_Type_free(&localArray);
|
||||
@@ -446,16 +455,20 @@ class BinaryIO {
|
||||
} else {
|
||||
|
||||
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
|
||||
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
|
||||
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
|
||||
|
||||
std::ofstream fout;
|
||||
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
|
||||
try {
|
||||
if (offset) { // Must already exist and contain data
|
||||
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
|
||||
} else { // Allow create
|
||||
fout.open(file,std::ios::binary|std::ios::out);
|
||||
}
|
||||
} catch (const std::fstream::failure& exc) {
|
||||
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
|
||||
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
|
||||
std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
|
||||
// std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
|
||||
#ifdef USE_MPI_IO
|
||||
MPI_Abort(MPI_COMM_WORLD,1);
|
||||
#else
|
||||
@@ -489,6 +502,7 @@ class BinaryIO {
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
offset = fout.tellp();
|
||||
fout.close();
|
||||
}
|
||||
timer.Stop();
|
||||
@@ -523,7 +537,7 @@ class BinaryIO {
|
||||
static inline void readLatticeObject(Lattice<vobj> &Umu,
|
||||
std::string file,
|
||||
munger munge,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
const std::string &format,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
@@ -533,7 +547,7 @@ class BinaryIO {
|
||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
int lsites = grid->lSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||
@@ -544,7 +558,7 @@ class BinaryIO {
|
||||
GridStopWatch timer;
|
||||
timer.Start();
|
||||
|
||||
parallel_for(int x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
|
||||
parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
|
||||
|
||||
vectorizeFromLexOrdArray(scalardata,Umu);
|
||||
grid->Barrier();
|
||||
@@ -560,7 +574,7 @@ class BinaryIO {
|
||||
static inline void writeLatticeObject(Lattice<vobj> &Umu,
|
||||
std::string file,
|
||||
munger munge,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
const std::string &format,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
@@ -569,7 +583,9 @@ class BinaryIO {
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||
GridBase *grid = Umu._grid;
|
||||
int lsites = grid->lSites();
|
||||
uint64_t lsites = grid->lSites(), offsetCopy = offset;
|
||||
int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry);
|
||||
bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0);
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||
@@ -580,13 +596,39 @@ class BinaryIO {
|
||||
GridStopWatch timer; timer.Start();
|
||||
unvectorizeToLexOrdArray(scalardata,Umu);
|
||||
|
||||
parallel_for(int x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
|
||||
parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
|
||||
|
||||
grid->Barrier();
|
||||
timer.Stop();
|
||||
|
||||
while (attemptsLeft >= 0)
|
||||
{
|
||||
grid->Barrier();
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
if (checkWrite)
|
||||
{
|
||||
std::vector<fobj> ckiodata(lsites);
|
||||
uint32_t cknersc_csum, ckscidac_csuma, ckscidac_csumb;
|
||||
uint64_t ckoffset = offsetCopy;
|
||||
|
||||
std::cout << GridLogMessage << "writeLatticeObject: read back object" << std::endl;
|
||||
grid->Barrier();
|
||||
IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
|
||||
cknersc_csum,ckscidac_csuma,ckscidac_csumb);
|
||||
if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb))
|
||||
{
|
||||
std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl;
|
||||
offset = offsetCopy;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << GridLogMessage << "writeLatticeObject: read test checksum correct" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
attemptsLeft--;
|
||||
}
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl;
|
||||
}
|
||||
@@ -597,7 +639,7 @@ class BinaryIO {
|
||||
static inline void readRNG(GridSerialRNG &serial,
|
||||
GridParallelRNG ¶llel,
|
||||
std::string file,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
@@ -610,8 +652,8 @@ class BinaryIO {
|
||||
std::string format = "IEEE32BIG";
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
int gsites = grid->gSites();
|
||||
int lsites = grid->lSites();
|
||||
uint64_t gsites = grid->gSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
uint32_t nersc_csum_tmp = 0;
|
||||
uint32_t scidac_csuma_tmp = 0;
|
||||
@@ -626,7 +668,7 @@ class BinaryIO {
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
timer.Start();
|
||||
parallel_for(int lidx=0;lidx<lsites;lidx++){
|
||||
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
|
||||
parallel.SetState(tmp,lidx);
|
||||
@@ -659,7 +701,7 @@ class BinaryIO {
|
||||
static inline void writeRNG(GridSerialRNG &serial,
|
||||
GridParallelRNG ¶llel,
|
||||
std::string file,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
@@ -670,8 +712,8 @@ class BinaryIO {
|
||||
typedef std::array<RngStateType,RngStateCount> RNGstate;
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
int gsites = grid->gSites();
|
||||
int lsites = grid->lSites();
|
||||
uint64_t gsites = grid->gSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
uint32_t nersc_csum_tmp;
|
||||
uint32_t scidac_csuma_tmp;
|
||||
@@ -684,7 +726,7 @@ class BinaryIO {
|
||||
|
||||
timer.Start();
|
||||
std::vector<RNGstate> iodata(lsites);
|
||||
parallel_for(int lidx=0;lidx<lsites;lidx++){
|
||||
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
parallel.GetState(tmp,lidx);
|
||||
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
|
||||
@@ -693,7 +735,6 @@ class BinaryIO {
|
||||
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
iodata.resize(1);
|
||||
{
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
@@ -713,5 +754,6 @@ class BinaryIO {
|
||||
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@@ -182,6 +182,11 @@ class GridLimeReader : public BinaryIO {
|
||||
{
|
||||
filename= _filename;
|
||||
File = fopen(filename.c_str(), "r");
|
||||
if (File == nullptr)
|
||||
{
|
||||
std::cerr << "cannot open file '" << filename << "'" << std::endl;
|
||||
abort();
|
||||
}
|
||||
LimeR = limeCreateReader(File);
|
||||
}
|
||||
/////////////////////////////////////////////
|
||||
@@ -228,7 +233,8 @@ class GridLimeReader : public BinaryIO {
|
||||
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
|
||||
BinarySimpleMunger<sobj,sobj> munge;
|
||||
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
|
||||
/////////////////////////////////////////////
|
||||
// Insist checksum is next record
|
||||
/////////////////////////////////////////////
|
||||
@@ -245,10 +251,8 @@ class GridLimeReader : public BinaryIO {
|
||||
////////////////////////////////////////////
|
||||
// Read a generic serialisable object
|
||||
////////////////////////////////////////////
|
||||
template<class serialisable_object>
|
||||
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
|
||||
void readLimeObject(std::string &xmlstring,std::string record_name)
|
||||
{
|
||||
std::string xmlstring;
|
||||
// should this be a do while; can we miss a first record??
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
|
||||
@@ -262,18 +266,29 @@ class GridLimeReader : public BinaryIO {
|
||||
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
|
||||
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
read(RD,object_name,object);
|
||||
xmlstring = std::string(&xmlc[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class serialisable_object>
|
||||
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
|
||||
{
|
||||
std::string xmlstring;
|
||||
|
||||
readLimeObject(xmlstring, record_name);
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,object_name,object);
|
||||
}
|
||||
};
|
||||
|
||||
class GridLimeWriter : public BinaryIO {
|
||||
class GridLimeWriter : public BinaryIO
|
||||
{
|
||||
public:
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// FIXME: format for RNG? Now just binary out instead
|
||||
// FIXME: collective calls or not ?
|
||||
@@ -282,17 +297,24 @@ class GridLimeWriter : public BinaryIO {
|
||||
FILE *File;
|
||||
LimeWriter *LimeW;
|
||||
std::string filename;
|
||||
|
||||
bool boss_node;
|
||||
GridLimeWriter( bool isboss = true) {
|
||||
boss_node = isboss;
|
||||
}
|
||||
void open(const std::string &_filename) {
|
||||
filename= _filename;
|
||||
if ( boss_node ) {
|
||||
File = fopen(filename.c_str(), "w");
|
||||
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
|
||||
}
|
||||
}
|
||||
/////////////////////////////////////////////
|
||||
// Close the file
|
||||
/////////////////////////////////////////////
|
||||
void close(void) {
|
||||
if ( boss_node ) {
|
||||
fclose(File);
|
||||
}
|
||||
// limeDestroyWriter(LimeW);
|
||||
}
|
||||
///////////////////////////////////////////////////////
|
||||
@@ -300,24 +322,22 @@ class GridLimeWriter : public BinaryIO {
|
||||
///////////////////////////////////////////////////////
|
||||
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
|
||||
{
|
||||
if ( boss_node ) {
|
||||
LimeRecordHeader *h;
|
||||
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
|
||||
assert(limeWriteRecordHeader(h, LimeW) >= 0);
|
||||
limeDestroyHeader(h);
|
||||
}
|
||||
return LIME_SUCCESS;
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
// Write a generic serialisable object
|
||||
////////////////////////////////////////////
|
||||
template<class serialisable_object>
|
||||
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name)
|
||||
void writeLimeObject(int MB,int ME,XmlWriter &writer,std::string object_name,std::string record_name)
|
||||
{
|
||||
std::string xmlstring;
|
||||
{
|
||||
XmlWriter WR("","");
|
||||
write(WR,object_name,object);
|
||||
xmlstring = WR.XmlString();
|
||||
}
|
||||
if ( boss_node ) {
|
||||
std::string xmlstring = writer.docString();
|
||||
|
||||
// std::cout << "WriteLimeObject" << record_name <<std::endl;
|
||||
uint64_t nbytes = xmlstring.size();
|
||||
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
|
||||
@@ -329,48 +349,95 @@ class GridLimeWriter : public BinaryIO {
|
||||
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
limeDestroyHeader(h);
|
||||
// std::cout << " File offset is now"<<ftello(File) << std::endl;
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
}
|
||||
|
||||
template<class serialisable_object>
|
||||
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, const unsigned int scientificPrec = 0)
|
||||
{
|
||||
XmlWriter WR("","");
|
||||
|
||||
if (scientificPrec)
|
||||
{
|
||||
WR.scientificFormat(true);
|
||||
WR.setPrecision(scientificPrec);
|
||||
}
|
||||
write(WR,object_name,object);
|
||||
writeLimeObject(MB, ME, WR, object_name, record_name);
|
||||
}
|
||||
////////////////////////////////////////////////////
|
||||
// Write a generic lattice field and csum
|
||||
////////////////////////////////////////////
|
||||
// This routine is Collectively called by all nodes
|
||||
// in communicator used by the field._grid
|
||||
////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
|
||||
{
|
||||
////////////////////////////////////////////
|
||||
// Create record header
|
||||
////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
int err;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
|
||||
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
|
||||
|
||||
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
|
||||
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
|
||||
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// NB: FILE and iostream are jointly writing disjoint sequences in the
|
||||
// the same file through different file handles (integer units).
|
||||
//
|
||||
// These are both buffered, so why I think this code is right is as follows.
|
||||
//
|
||||
// i) write record header to FILE *File, telegraphing the size.
|
||||
// i) write record header to FILE *File, telegraphing the size; flush
|
||||
// ii) ftello reads the offset from FILE *File .
|
||||
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
|
||||
// Closes iostream and flushes.
|
||||
// iv) fseek on FILE * to end of this disjoint section.
|
||||
// v) Continue writing scidac record.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
uint64_t offset = ftello(File);
|
||||
// std::cout << " Writing to offset "<<offset << std::endl;
|
||||
|
||||
GridBase *grid = field._grid;
|
||||
assert(boss_node == field._grid->IsBoss() );
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Create record header
|
||||
////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
int err;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
uint64_t PayloadSize = sizeof(sobj) * grid->_gsites;
|
||||
if ( boss_node ) {
|
||||
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
|
||||
fflush(File);
|
||||
}
|
||||
|
||||
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
|
||||
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
|
||||
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Check all nodes agree on file position
|
||||
////////////////////////////////////////////////
|
||||
uint64_t offset1;
|
||||
if ( boss_node ) {
|
||||
offset1 = ftello(File);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset1,sizeof(offset1));
|
||||
|
||||
///////////////////////////////////////////
|
||||
// The above is collective. Write by other means into the binary record
|
||||
///////////////////////////////////////////
|
||||
std::string format = getFormatString<vobj>();
|
||||
BinarySimpleMunger<sobj,sobj> munge;
|
||||
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
// fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<<offset << std::endl;
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Wind forward and close the record
|
||||
///////////////////////////////////////////
|
||||
if ( boss_node ) {
|
||||
fseek(File,0,SEEK_END);
|
||||
uint64_t offset2 = ftello(File); // std::cout << " now at offset "<<offset2 << std::endl;
|
||||
assert( (offset2-offset1) == PayloadSize);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Check MPI-2 I/O did what we expect to file
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
if ( boss_node ) {
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
}
|
||||
////////////////////////////////////////
|
||||
// Write checksum element, propagaing forward from the BinaryIO
|
||||
// Always pair a checksum with a binary object, and close message
|
||||
@@ -380,26 +447,32 @@ class GridLimeWriter : public BinaryIO {
|
||||
std::stringstream streamb; streamb << std::hex << scidac_csumb;
|
||||
checksum.suma= streama.str();
|
||||
checksum.sumb= streamb.str();
|
||||
// std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
|
||||
if ( boss_node ) {
|
||||
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class ScidacWriter : public GridLimeWriter {
|
||||
public:
|
||||
|
||||
ScidacWriter(bool isboss =true ) : GridLimeWriter(isboss) { };
|
||||
|
||||
template<class SerialisableUserFile>
|
||||
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
|
||||
{
|
||||
scidacFile _scidacFile(grid);
|
||||
if ( this->boss_node ) {
|
||||
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
||||
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////////
|
||||
// Write generic lattice field in scidac format
|
||||
////////////////////////////////////////////////
|
||||
template <class vobj, class userRecord>
|
||||
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)
|
||||
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
|
||||
const unsigned int recordScientificPrec = 0)
|
||||
{
|
||||
GridBase * grid = field._grid;
|
||||
|
||||
@@ -415,9 +488,12 @@ class ScidacWriter : public GridLimeWriter {
|
||||
//////////////////////////////////////////////
|
||||
// Fill the Lime file record by record
|
||||
//////////////////////////////////////////////
|
||||
if ( this->boss_node ) {
|
||||
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
||||
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
|
||||
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML), recordScientificPrec);
|
||||
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
}
|
||||
// Collective call
|
||||
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||
}
|
||||
};
|
||||
@@ -485,6 +561,8 @@ class ScidacReader : public GridLimeReader {
|
||||
class IldgWriter : public ScidacWriter {
|
||||
public:
|
||||
|
||||
IldgWriter(bool isboss) : ScidacWriter(isboss) {};
|
||||
|
||||
///////////////////////////////////
|
||||
// A little helper
|
||||
///////////////////////////////////
|
||||
@@ -568,7 +646,6 @@ class IldgWriter : public ScidacWriter {
|
||||
writeLimeIldgLFN(header.ildg_lfn); // rec
|
||||
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||
// limeDestroyWriter(LimeW);
|
||||
fclose(File);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -644,9 +721,11 @@ class IldgReader : public GridLimeReader {
|
||||
|
||||
//////////////////////////////////
|
||||
// ILDG format record
|
||||
|
||||
std::string xmlstring(&xmlc[0]);
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"ildgFormat",ildgFormat_);
|
||||
|
||||
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
|
||||
@@ -661,13 +740,13 @@ class IldgReader : public GridLimeReader {
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
|
||||
FieldMetaData_.ildg_lfn = std::string(&xmlc[0]);
|
||||
FieldMetaData_.ildg_lfn = xmlstring;
|
||||
found_ildgLFN = 1;
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"FieldMetaData",FieldMetaData_);
|
||||
|
||||
format = FieldMetaData_.floating_point;
|
||||
@@ -681,18 +760,17 @@ class IldgReader : public GridLimeReader {
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
|
||||
std::string xmls(&xmlc[0]);
|
||||
// is it a USQCD info field
|
||||
if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {
|
||||
if ( xmlstring.find(std::string("usqcdInfo")) != std::string::npos ) {
|
||||
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"usqcdInfo",usqcdInfo_);
|
||||
found_usqcdInfo = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"scidacChecksum",scidacChecksum_);
|
||||
found_scidacChecksum = 1;
|
||||
}
|
@@ -136,8 +136,9 @@ struct scidacRecord : Serializable {
|
||||
int, typesize,
|
||||
int, datacount);
|
||||
|
||||
scidacRecord() { version =1.0; }
|
||||
|
||||
scidacRecord()
|
||||
: version(1.0), recordtype(0), colors(0), spins(0), typesize(0), datacount(0)
|
||||
{}
|
||||
};
|
||||
|
||||
////////////////////////
|
@@ -81,18 +81,16 @@ namespace Grid {
|
||||
std::string, creation_date,
|
||||
std::string, archive_date,
|
||||
std::string, floating_point);
|
||||
FieldMetaData(void) {
|
||||
nd=4;
|
||||
dimension.resize(4);
|
||||
boundary.resize(4);
|
||||
scidac_checksuma=0;
|
||||
scidac_checksumb=0;
|
||||
checksum=0;
|
||||
}
|
||||
// WARNING: non-initialised values might lead to twisted parallel IO
|
||||
// issues, std::string are fine because they initliase to size 0
|
||||
// as per C++ standard.
|
||||
FieldMetaData(void)
|
||||
: nd(4), dimension(4,0), boundary(4, ""), data_start(0),
|
||||
link_trace(0.), plaquette(0.), checksum(0),
|
||||
scidac_checksuma(0), scidac_checksumb(0), sequence_number(0)
|
||||
{}
|
||||
};
|
||||
|
||||
|
||||
|
||||
namespace QCD {
|
||||
|
||||
using namespace Grid;
|
@@ -57,7 +57,7 @@ namespace Grid {
|
||||
// for the header-reader
|
||||
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
|
||||
{
|
||||
int offset=0;
|
||||
uint64_t offset=0;
|
||||
std::map<std::string,std::string> header;
|
||||
std::string line;
|
||||
|
||||
@@ -139,7 +139,7 @@ namespace Grid {
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
int offset = readHeader(file,Umu._grid,header);
|
||||
uint64_t offset = readHeader(file,Umu._grid,header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
@@ -236,21 +236,25 @@ namespace Grid {
|
||||
GaugeStatistics(Umu,header);
|
||||
MachineCharacteristics(header);
|
||||
|
||||
int offset;
|
||||
|
||||
truncate(file);
|
||||
uint64_t offset;
|
||||
|
||||
// Sod it -- always write 3x3 double
|
||||
header.floating_point = std::string("IEEE64BIG");
|
||||
header.data_type = std::string("4D_SU3_GAUGE_3x3");
|
||||
GaugeSimpleUnmunger<fobj3D,sobj> munge;
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
if ( grid->IsBoss() ) {
|
||||
writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
|
||||
<<std::hex<<header.checksum
|
||||
@@ -278,7 +282,7 @@ namespace Grid {
|
||||
header.plaquette=0.0;
|
||||
MachineCharacteristics(header);
|
||||
|
||||
int offset;
|
||||
uint64_t offset;
|
||||
|
||||
#ifdef RNG_RANLUX
|
||||
header.floating_point = std::string("UINT64");
|
||||
@@ -293,12 +297,18 @@ namespace Grid {
|
||||
header.data_type = std::string("SITMO");
|
||||
#endif
|
||||
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
if ( grid->IsBoss() ) {
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage
|
||||
<<"Written NERSC RNG STATE "<<file<< " checksum "
|
||||
@@ -313,7 +323,7 @@ namespace Grid {
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
|
||||
int offset = readHeader(file,grid,header);
|
||||
uint64_t offset = readHeader(file,grid,header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
@@ -49,14 +49,38 @@ inline double usecond(void) {
|
||||
|
||||
typedef std::chrono::system_clock GridClock;
|
||||
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
||||
typedef std::chrono::milliseconds GridTime;
|
||||
typedef std::chrono::microseconds GridUsecs;
|
||||
|
||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
|
||||
typedef std::chrono::seconds GridSecs;
|
||||
typedef std::chrono::milliseconds GridMillisecs;
|
||||
typedef std::chrono::microseconds GridUsecs;
|
||||
typedef std::chrono::microseconds GridTime;
|
||||
|
||||
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
|
||||
{
|
||||
stream << time.count()<<" ms";
|
||||
stream << time.count()<<" s";
|
||||
return stream;
|
||||
}
|
||||
inline std::ostream& operator<< (std::ostream & stream, const GridMillisecs & now)
|
||||
{
|
||||
GridSecs second(1);
|
||||
auto secs = now/second ;
|
||||
auto subseconds = now%second ;
|
||||
auto fill = stream.fill();
|
||||
stream << secs<<"."<<std::setw(3)<<std::setfill('0')<<subseconds.count()<<" s";
|
||||
stream.fill(fill);
|
||||
return stream;
|
||||
}
|
||||
inline std::ostream& operator<< (std::ostream & stream, const GridUsecs & now)
|
||||
{
|
||||
GridSecs second(1);
|
||||
auto seconds = now/second ;
|
||||
auto subseconds = now%second ;
|
||||
auto fill = stream.fill();
|
||||
stream << seconds<<"."<<std::setw(6)<<std::setfill('0')<<subseconds.count()<<" s";
|
||||
stream.fill(fill);
|
||||
return stream;
|
||||
}
|
||||
|
||||
|
||||
class GridStopWatch {
|
||||
private:
|
||||
@@ -96,6 +120,9 @@ public:
|
||||
assert(running == false);
|
||||
return (uint64_t) accumulator.count();
|
||||
}
|
||||
bool isRunning(void){
|
||||
return running;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* pugixml parser - version 1.6
|
||||
* pugixml parser - version 1.9
|
||||
* --------------------------------------------------------
|
||||
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at http://pugixml.org/
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end
|
||||
@@ -17,6 +17,9 @@
|
||||
// Uncomment this to enable wchar_t mode
|
||||
// #define PUGIXML_WCHAR_MODE
|
||||
|
||||
// Uncomment this to enable compact mode
|
||||
// #define PUGIXML_COMPACT
|
||||
|
||||
// Uncomment this to disable XPath
|
||||
// #define PUGIXML_NO_XPATH
|
||||
|
||||
@@ -46,7 +49,7 @@
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
* Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* pugixml parser - version 1.6
|
||||
* pugixml parser - version 1.9
|
||||
* --------------------------------------------------------
|
||||
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at http://pugixml.org/
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
#ifndef PUGIXML_VERSION
|
||||
// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
|
||||
# define PUGIXML_VERSION 160
|
||||
# define PUGIXML_VERSION 190
|
||||
#endif
|
||||
|
||||
// Include user configuration file (this can define various configuration macros)
|
||||
@@ -72,6 +72,44 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// If the platform is known to have move semantics support, compile move ctor/operator implementation
|
||||
#ifndef PUGIXML_HAS_MOVE
|
||||
# if __cplusplus >= 201103
|
||||
# define PUGIXML_HAS_MOVE
|
||||
# elif defined(_MSC_VER) && _MSC_VER >= 1600
|
||||
# define PUGIXML_HAS_MOVE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// If C++ is 2011 or higher, add 'noexcept' specifiers
|
||||
#ifndef PUGIXML_NOEXCEPT
|
||||
# if __cplusplus >= 201103
|
||||
# define PUGIXML_NOEXCEPT noexcept
|
||||
# elif defined(_MSC_VER) && _MSC_VER >= 1900
|
||||
# define PUGIXML_NOEXCEPT noexcept
|
||||
# else
|
||||
# define PUGIXML_NOEXCEPT
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Some functions can not be noexcept in compact mode
|
||||
#ifdef PUGIXML_COMPACT
|
||||
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT
|
||||
#else
|
||||
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT PUGIXML_NOEXCEPT
|
||||
#endif
|
||||
|
||||
// If C++ is 2011 or higher, add 'override' qualifiers
|
||||
#ifndef PUGIXML_OVERRIDE
|
||||
# if __cplusplus >= 201103
|
||||
# define PUGIXML_OVERRIDE override
|
||||
# elif defined(_MSC_VER) && _MSC_VER >= 1700
|
||||
# define PUGIXML_OVERRIDE override
|
||||
# else
|
||||
# define PUGIXML_OVERRIDE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Character interface macros
|
||||
#ifdef PUGIXML_WCHAR_MODE
|
||||
# define PUGIXML_TEXT(t) L ## t
|
||||
@@ -158,6 +196,11 @@ namespace pugi
|
||||
// is a valid document. This flag is off by default.
|
||||
const unsigned int parse_fragment = 0x1000;
|
||||
|
||||
// This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of
|
||||
// the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments.
|
||||
// This flag is off by default.
|
||||
const unsigned int parse_embed_pcdata = 0x2000;
|
||||
|
||||
// The default parsing mode.
|
||||
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
|
||||
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
|
||||
@@ -206,6 +249,9 @@ namespace pugi
|
||||
// Write every attribute on a new line with appropriate indentation. This flag is off by default.
|
||||
const unsigned int format_indent_attributes = 0x40;
|
||||
|
||||
// Don't output empty element tags, instead writing an explicit start and end tag even if there are no children. This flag is off by default.
|
||||
const unsigned int format_no_empty_element_tags = 0x80;
|
||||
|
||||
// The default set of formatting flags.
|
||||
// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
|
||||
const unsigned int format_default = format_indent;
|
||||
@@ -268,7 +314,7 @@ namespace pugi
|
||||
// Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
|
||||
xml_writer_file(void* file);
|
||||
|
||||
virtual void write(const void* data, size_t size);
|
||||
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
|
||||
|
||||
private:
|
||||
void* file;
|
||||
@@ -283,7 +329,7 @@ namespace pugi
|
||||
xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
|
||||
xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
|
||||
|
||||
virtual void write(const void* data, size_t size);
|
||||
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
|
||||
|
||||
private:
|
||||
std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
|
||||
@@ -354,6 +400,8 @@ namespace pugi
|
||||
// Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
|
||||
bool set_value(int rhs);
|
||||
bool set_value(unsigned int rhs);
|
||||
bool set_value(long rhs);
|
||||
bool set_value(unsigned long rhs);
|
||||
bool set_value(double rhs);
|
||||
bool set_value(float rhs);
|
||||
bool set_value(bool rhs);
|
||||
@@ -367,6 +415,8 @@ namespace pugi
|
||||
xml_attribute& operator=(const char_t* rhs);
|
||||
xml_attribute& operator=(int rhs);
|
||||
xml_attribute& operator=(unsigned int rhs);
|
||||
xml_attribute& operator=(long rhs);
|
||||
xml_attribute& operator=(unsigned long rhs);
|
||||
xml_attribute& operator=(double rhs);
|
||||
xml_attribute& operator=(float rhs);
|
||||
xml_attribute& operator=(bool rhs);
|
||||
@@ -601,8 +651,8 @@ namespace pugi
|
||||
xpath_node_set select_nodes(const xpath_query& query) const;
|
||||
|
||||
// (deprecated: use select_node instead) Select single node by evaluating XPath query.
|
||||
xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
||||
xpath_node select_single_node(const xpath_query& query) const;
|
||||
PUGIXML_DEPRECATED xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
||||
PUGIXML_DEPRECATED xpath_node select_single_node(const xpath_query& query) const;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -701,6 +751,8 @@ namespace pugi
|
||||
// Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
|
||||
bool set(int rhs);
|
||||
bool set(unsigned int rhs);
|
||||
bool set(long rhs);
|
||||
bool set(unsigned long rhs);
|
||||
bool set(double rhs);
|
||||
bool set(float rhs);
|
||||
bool set(bool rhs);
|
||||
@@ -714,6 +766,8 @@ namespace pugi
|
||||
xml_text& operator=(const char_t* rhs);
|
||||
xml_text& operator=(int rhs);
|
||||
xml_text& operator=(unsigned int rhs);
|
||||
xml_text& operator=(long rhs);
|
||||
xml_text& operator=(unsigned long rhs);
|
||||
xml_text& operator=(double rhs);
|
||||
xml_text& operator=(float rhs);
|
||||
xml_text& operator=(bool rhs);
|
||||
@@ -945,10 +999,11 @@ namespace pugi
|
||||
|
||||
// Non-copyable semantics
|
||||
xml_document(const xml_document&);
|
||||
const xml_document& operator=(const xml_document&);
|
||||
xml_document& operator=(const xml_document&);
|
||||
|
||||
void create();
|
||||
void destroy();
|
||||
void _create();
|
||||
void _destroy();
|
||||
void _move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
|
||||
public:
|
||||
// Default constructor, makes empty document
|
||||
@@ -957,6 +1012,12 @@ namespace pugi
|
||||
// Destructor, invalidates all node/attribute handles to this document
|
||||
~xml_document();
|
||||
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
xml_document& operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
#endif
|
||||
|
||||
// Removes all nodes, leaving the empty document
|
||||
void reset();
|
||||
|
||||
@@ -970,7 +1031,7 @@ namespace pugi
|
||||
#endif
|
||||
|
||||
// (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
|
||||
xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
|
||||
PUGIXML_DEPRECATED xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
|
||||
|
||||
// Load document from zero-terminated string. No encoding conversions are applied.
|
||||
xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
|
||||
@@ -1095,10 +1156,10 @@ namespace pugi
|
||||
xpath_variable_set(const xpath_variable_set& rhs);
|
||||
xpath_variable_set& operator=(const xpath_variable_set& rhs);
|
||||
|
||||
#if __cplusplus >= 201103
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_variable_set(xpath_variable_set&& rhs);
|
||||
xpath_variable_set& operator=(xpath_variable_set&& rhs);
|
||||
xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_variable_set& operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Add a new variable or get the existing one, if the types match
|
||||
@@ -1139,10 +1200,10 @@ namespace pugi
|
||||
// Destructor
|
||||
~xpath_query();
|
||||
|
||||
#if __cplusplus >= 201103
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_query(xpath_query&& rhs);
|
||||
xpath_query& operator=(xpath_query&& rhs);
|
||||
xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_query& operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Get query expression return type
|
||||
@@ -1201,7 +1262,7 @@ namespace pugi
|
||||
explicit xpath_exception(const xpath_parse_result& result);
|
||||
|
||||
// Get error message
|
||||
virtual const char* what() const throw();
|
||||
virtual const char* what() const throw() PUGIXML_OVERRIDE;
|
||||
|
||||
// Get parse result
|
||||
const xpath_parse_result& result() const;
|
||||
@@ -1280,10 +1341,10 @@ namespace pugi
|
||||
xpath_node_set(const xpath_node_set& ns);
|
||||
xpath_node_set& operator=(const xpath_node_set& ns);
|
||||
|
||||
#if __cplusplus >= 201103
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_node_set(xpath_node_set&& rhs);
|
||||
xpath_node_set& operator=(xpath_node_set&& rhs);
|
||||
xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_node_set& operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Get collection type
|
||||
@@ -1317,7 +1378,7 @@ namespace pugi
|
||||
xpath_node* _end;
|
||||
|
||||
void _assign(const_iterator begin, const_iterator end, type_t type);
|
||||
void _move(xpath_node_set& rhs);
|
||||
void _move(xpath_node_set& rhs) PUGIXML_NOEXCEPT;
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -1375,7 +1436,7 @@ namespace std
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
* Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
@@ -1,6 +1,6 @@
|
||||
pugixml 1.6 - an XML processing library
|
||||
pugixml 1.9 - an XML processing library
|
||||
|
||||
Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
Report bugs and download new versions at http://pugixml.org/
|
||||
|
||||
This is the distribution of pugixml, which is a C++ XML processing library,
|
||||
@@ -28,7 +28,7 @@ The distribution contains the following folders:
|
||||
|
||||
This library is distributed under the MIT License:
|
||||
|
||||
Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
@@ -90,6 +90,7 @@ namespace QCD {
|
||||
// That probably makes for GridRedBlack4dCartesian grid.
|
||||
|
||||
// s,sp,c,spc,lc
|
||||
|
||||
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
||||
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
||||
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
|
||||
@@ -101,6 +102,8 @@ namespace QCD {
|
||||
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
||||
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||
template<typename vtype> using iSpinColourSpinColourMatrix = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
|
||||
|
||||
|
||||
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
|
||||
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||
@@ -132,6 +135,24 @@ namespace QCD {
|
||||
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
|
||||
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
|
||||
|
||||
// SpinColourSpinColour matrix
|
||||
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||
|
||||
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||
|
||||
// SpinColourSpinColour matrix
|
||||
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||
|
||||
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||
|
||||
// LorentzColour
|
||||
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
|
||||
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
|
||||
@@ -229,6 +250,9 @@ namespace QCD {
|
||||
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
|
||||
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
|
||||
|
||||
typedef Lattice<vSpinColourSpinColourMatrix> LatticeSpinColourSpinColourMatrix;
|
||||
typedef Lattice<vSpinColourSpinColourMatrixF> LatticeSpinColourSpinColourMatrixF;
|
||||
typedef Lattice<vSpinColourSpinColourMatrixD> LatticeSpinColourSpinColourMatrixD;
|
||||
|
||||
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
|
||||
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user