mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-14 13:57:07 +01:00
Compare commits
608 Commits
feature/cl
...
feature/np
Author | SHA1 | Date | |
---|---|---|---|
ac1d655de8 | |||
3023287fd9 | |||
b3d6805638 | |||
291bc2a1f0 | |||
2f368c33fc | |||
9592115341 | |||
24c07694bc | |||
f0229025e2 | |||
6de9a45a09 | |||
03c3d495a2 | |||
49f25e08e8 | |||
efc0c65056 | |||
936eaac8e1 | |||
fe6a372f75 | |||
148fc052bd | |||
c073341a10 | |||
78299daaac | |||
866449c804 | |||
d69a52079f | |||
9f4f8a14a3 | |||
f6593dc881 | |||
b46d31d4b6 | |||
58567fc650 | |||
7c57cac670 | |||
d0b21bf1ff | |||
a1825d1f59 | |||
5a3e83ff7b | |||
52569d98d8 | |||
b351103c29 | |||
118cca4681 | |||
44de727cd2 | |||
888ebc3cf9 | |||
6c031a1b81 | |||
02aa4bd762 | |||
9aafa8ee60 | |||
430b98b354 | |||
84189867ef | |||
4ab8cfbe2a | |||
aadd9f4468 | |||
8fbb27ce13 | |||
21bba95909 | |||
6448fe7121 | |||
2458a11d1d | |||
d0ca7c3fe6 | |||
57f899d79c | |||
e881a0c157 | |||
f411657118 | |||
7458c6174b | |||
21b269d0f9 | |||
083af92ac2 | |||
2c162577b5 | |||
b1c4e96382 | |||
a55c6f34f3 | |||
beed527ea3 | |||
eaa633cf69 | |||
c632455129 | |||
c012899ed5 | |||
8bab544c2f | |||
76fc06a5dc | |||
4af6c7e7aa | |||
f60fbcfc4d | |||
464c81706e | |||
408130b808 | |||
375edd1370 | |||
6d912f6c67 | |||
6d1d28955e | |||
920b471761 | |||
63c21767ba | |||
7b6b712565 | |||
35abd05ee9 | |||
dd36e60f6a | |||
cb6c548e21 | |||
02c4ccf621 | |||
fd24588212 | |||
b800bb3ecb | |||
f8abd0978b | |||
12c7c493bf | |||
c7c9072313 | |||
2bf3be5fae | |||
3a40e4fc69 | |||
2e69e03f6f | |||
a09f9bb528 | |||
f0e341d726 | |||
6f09df0daf | |||
26cee605b8 | |||
b3fa18c229 | |||
2940c9bcfd | |||
0bb532f72b | |||
fada2aa0f7 | |||
c193e4e675 | |||
3ee682f676 | |||
d85ec3bac2 | |||
b52d8eb1e3 | |||
ee630d2e8b | |||
2f0af79869 | |||
1b7fb79ec0 | |||
2db1a4628c | |||
6aa047d842 | |||
8779c32ae1 | |||
c527dc3358 | |||
6b42577b6b | |||
fb3596f968 | |||
f3a0158213 | |||
0250aa9347 | |||
3df6743396 | |||
fb7d021b9d | |||
5f206df775 | |||
7727e81113 | |||
c4115544a5 | |||
08c47328ba | |||
09001aedca | |||
2c67304716 | |||
dc6d8686de | |||
cc2780bea3 | |||
6e5a2b7922 | |||
f4878d3a13 | |||
89d2fac92e | |||
f2d3e41cf2 | |||
3c27bb36d4 | |||
603d59f389 | |||
07a0ef3f95 | |||
503259f9c9 | |||
5be6a51044 | |||
ac69f042b1 | |||
133d5c2e34 | |||
2a94244890 | |||
a15a2dfd29 | |||
093bb02633 | |||
99a85116f8 | |||
27cdb79063 | |||
f4cbfd63ff | |||
2b794b6aa7 | |||
d0244a059f | |||
dcdd891d7d | |||
6d2df9de79 | |||
41d4e37bae | |||
ee5c0cc9b6 | |||
0a4020eb4d | |||
b2de26589b | |||
0677adb4dd | |||
231cc95be6 | |||
639f9cab82 | |||
4eac4e575e | |||
3f0f92cda6 | |||
d2650e89bd | |||
2962123cba | |||
830168ec37 | |||
584c921ca0 | |||
81347b4d16 | |||
2cfa0b0e6b | |||
fa5dee76b1 | |||
8d1679c6b8 | |||
3791a38f7c | |||
142f7b0c86 | |||
891ad66eab | |||
60c43151c5 | |||
e036800261 | |||
62900def36 | |||
e3a309a73f | |||
ad6c1c0c4e | |||
00b92a91b5 | |||
65533741f7 | |||
dc0259fbda | |||
131a6785d4 | |||
44f4f5c8e2 | |||
2679df034f | |||
bf71162b97 | |||
299e828d83 | |||
ef5452cddf | |||
80de748737 | |||
71e1006ba8 | |||
00f31ae83f | |||
cce339deaf | |||
24128ff109 | |||
34e9d3f0ca | |||
c995788259 | |||
94c7198001 | |||
04d86fe9f3 | |||
b78074b6a0 | |||
7dfd3cdae8 | |||
cecee1ef2c | |||
355d4b58be | |||
2c54a536f3 | |||
d868a45120 | |||
9deae8c962 | |||
db86cdd7bd | |||
ec9939c1ba | |||
f74617c124 | |||
8c6a3921ed | |||
a8a15dd9d0 | |||
3ce68a751a | |||
daa0977d01 | |||
a2929f4384 | |||
7fe3974c0a | |||
f7e86f81a0 | |||
fecec803d9 | |||
8fe9a13cdd | |||
d2c42e6f42 | |||
049cc518f4 | |||
2e1c66897f | |||
adcef36189 | |||
2f121c41c9 | |||
e0ed7e300f | |||
485207901b | |||
c760f0a4c3 | |||
c84eeedec3 | |||
1ac3526f33 | |||
0de090ee74 | |||
91405de3f7 | |||
8fccda301a | |||
7a0abfac89 | |||
ae37fda699 | |||
b5fc5e2030 | |||
8db0ef9736 | |||
95d4b46446 | |||
5dfd216a34 | |||
c2e8d0aa88 | |||
0fe5aeffbb | |||
7fbc469046 | |||
bf96a4bdbf | |||
84685c9bc3 | |||
a8d4156997 | |||
c18074869b | |||
f4c6d39238 | |||
200d35b38a | |||
eb52e84d09 | |||
72abc34764 | |||
e3164d4c7b | |||
f5db386c55 | |||
294ee70a7a | |||
013ea4e8d1 | |||
7fbbb31a50 | |||
0e127b1fc7 | |||
68c028b0a6 | |||
255d4992e1 | |||
a0d399e5ce | |||
fd3b2e945a | |||
b999984501 | |||
7836cc2d74 | |||
a61e0df54b | |||
9d835afa35 | |||
5e3be47117 | |||
48de706dd5 | |||
f871fb0c6d | |||
93771f3099 | |||
8cb205725b | |||
9ad580d82f | |||
899f961d0d | |||
54d789204f | |||
25828746f3 | |||
f362c00739 | |||
25d1cadd3b | |||
c24d53bbd1 | |||
2017e4e3b4 | |||
27a4d4c951 | |||
2f92721249 | |||
3c7a4106ed | |||
3252059daf | |||
6eed167f0c | |||
4ad0df6fde | |||
661381e881 | |||
68a5079f33 | |||
8634e19f1b | |||
9ada378e38 | |||
9d9692d439 | |||
0659ae4014 | |||
bfbf2f1fa0 | |||
dd6b796a01 | |||
52a856b4a8 | |||
04190ee7f3 | |||
587bfcc0f4 | |||
2700992ef5 | |||
8c658de179 | |||
ba37d51ee9 | |||
4f4181c54a | |||
4d4ac2517b | |||
e568c24d1d | |||
b458326744 | |||
6e7d5e2243 | |||
b35169f1dd | |||
441ad7498d | |||
6f6c5c549a | |||
1584e17b54 | |||
12982a4455 | |||
172f412102 | |||
a64497265d | |||
ca639c195f | |||
edc28dcfbf | |||
c45f24a1b5 | |||
aaf37ee4d7 | |||
1dddd17e3c | |||
661f1d3e8e | |||
edcf9b9293 | |||
fe6860b4dd | |||
d6406b13e1 | |||
e369d7306d | |||
9f8d63e104 | |||
9b0240d101 | |||
b27f0e5a53 | |||
75e4483407 | |||
0734e9ddd4 | |||
809b1cdd58 | |||
1be8089604 | |||
3e0eff6468 | |||
7ecc47ac89 | |||
e9f1ac09de | |||
fa0d8feff4 | |||
49b8501fd4 | |||
d47484717e | |||
05b44aef6b | |||
03e9832efa | |||
28a375d35d | |||
3b06381745 | |||
91a0a3f820 | |||
8f44c799a6 | |||
96272f3841 | |||
5c936d88a0 | |||
1c64ee926e | |||
2cbb72a81c | |||
31d83ee046 | |||
a9e8758a01 | |||
3e125c5b61 | |||
eac6ec4b5e | |||
213f8db6a2 | |||
6358f35b7e | |||
43f5a0df50 | |||
c897878776 | |||
cc6eb51e3e | |||
507009089b | |||
2baf193031 | |||
362ba0443a | |||
276a2353df | |||
b234784c8e | |||
6ea2a8b7ca | |||
c1d0359aaa | |||
047ee4ad0b | |||
a13106da0c | |||
75113e6523 | |||
325c73d051 | |||
b25a59e95e | |||
c5b9147b53 | |||
64ac815fd9 | |||
a1be533329 | |||
7c4533797f | |||
af84fd65bb | |||
1a2613086a | |||
4f110c09a5 | |||
6764362237 | |||
2fa2b0e0b1 | |||
b61292f735 | |||
ce7720e221 | |||
853a5528dc | |||
169f405c9c | |||
c6125b01ce | |||
b0b5b34bff | |||
1c9722357d | |||
141da3ae71 | |||
94edf9cf8b | |||
c11a3ca0a7 | |||
870b1a85ae | |||
b5510427f9 | |||
26ed65c8f8 | |||
f7f043d8cf | |||
ddcaa6ad29 | |||
334da7f452 | |||
4669ecd4ba | |||
4573b34cac | |||
17f57e85d1 | |||
c8d4d184ee | |||
17f27b1ebd | |||
a16bbecb8a | |||
7c9b0dd842 | |||
6b7228b3e6 | |||
f117552334 | |||
a21a160029 | |||
1569a374a9 | |||
eddf023b8a | |||
6b8ffbe735 | |||
81050535a5 | |||
7dcf5c90e3 | |||
9ce00f26f9 | |||
85c253ed4a | |||
ccfc0a5a89 | |||
d3f857b1c9 | |||
fb62035aa0 | |||
0260bc7705 | |||
68e6a58f12 | |||
640515e3d8 | |||
f089bf5629 | |||
276f113f28 | |||
97c579f637 | |||
a13c109111 | |||
ab6afd18ac | |||
5bde64d48b | |||
2f5add4d5f | |||
c5a885dcd6 | |||
a4d8512fb8 | |||
5ec903044d | |||
8a0cf0194f | |||
1c680d4b7a | |||
c9c073eee4 | |||
f290b2e908 | |||
5f8225461b | |||
e9323460c7 | |||
20e186a1e0 | |||
6ef4af989b | |||
ccde8b817f | |||
68168bf72d | |||
e93d0feaa7 | |||
8f601d9b39 | |||
5436308e4a | |||
07fe7d0cbe | |||
60b57706c4 | |||
58c2f60b69 | |||
bfa3a7b3b0 | |||
954e38bebe | |||
b1a38bde7a | |||
2581875edc | |||
f212b0a963 | |||
62702dbcb8 | |||
41d6cab033 | |||
5a31e747c9 | |||
cbc73a3fd1 | |||
6c6d43eb4e | |||
e1dcfd3553 | |||
888838473a | |||
01568b0e62 | |||
d5ce66f6ab | |||
d86936a3de | |||
d516938707 | |||
72344d1418 | |||
7ecf6ab38b | |||
2d4d70d3ec | |||
78f8d47528 | |||
b85f987b0b | |||
f57afe2079 | |||
0fb84fa34b | |||
8462bbfe63 | |||
229977c955 | |||
e485a07133 | |||
0880747edb | |||
b801e1fcd6 | |||
70ec2faa98 | |||
2f849ee252 | |||
bb6ed44339 | |||
360cface33 | |||
80302e95a8 | |||
caf2f6b274 | |||
c49be8988b | |||
971c2379bd | |||
94b0d66e4c | |||
5e8af396fd | |||
9942723189 | |||
a7d19dbb64 | |||
90dbe03e17 | |||
8b14096990 | |||
b938202081 | |||
e79ef469ac | |||
485c5db0fe | |||
c793947209 | |||
3e9ee053a1 | |||
dda6c69d5b | |||
cd51b9af99 | |||
c399c2b44d | |||
af7de7a294 | |||
1dc86efd26 | |||
f32555dcc5 | |||
30391cb2eb | |||
e93c883470 | |||
2e88408f5c | |||
fcac5c0772 | |||
90f4000935 | |||
480708b9a0 | |||
c4baf876d4 | |||
2f4dac3531 | |||
3ec6890850 | |||
018801d973 | |||
1d83521daa | |||
fc5670c6a4 | |||
d9c435e282 | |||
614a0e8277 | |||
aaf39222c3 | |||
550142bd6a | |||
c0a929aef7 | |||
37fe944224 | |||
315a42843f | |||
83a101db83 | |||
c4274e1660 | |||
ba6db55cb0 | |||
e5ea84d531 | |||
15767a1491 | |||
4d2a32ae7a | |||
5b937e3644 | |||
e418b044f7 | |||
b8b05f143f | |||
6ec42b4b82 | |||
abb7d4d2f5 | |||
16ebbfff29 | |||
4828226095 | |||
8a049f27b8 | |||
43578a3eb4 | |||
fdbd42e542 | |||
e7e4cee4f3 | |||
ec3954ff5f | |||
0f468e2179 | |||
8e61286741 | |||
4790e99817 | |||
2dd63aa7a4 | |||
559a501140 | |||
945684c470 | |||
e30a80a234 | |||
69e4ecc1d2 | |||
5f483df16b | |||
4680a977c3 | |||
de42456171 | |||
d55212c998 | |||
c96483e3bd | |||
c6e1f64573 | |||
ae31a6a760 | |||
dd8f2a64fe | |||
724cf02d4a | |||
7b8b2731e7 | |||
237a8ec918 | |||
49a0ae73eb | |||
315f1146cd | |||
9f202782c5 | |||
594a262dcc | |||
7f8ca54285 | |||
c5b23c367e | |||
b6fe03eb26 | |||
f37ed4958b | |||
896f3a8002 | |||
5f85473d6b | |||
ac3b0ebc58 | |||
f0fcdf75b5 | |||
53bffb83d4 | |||
cd44e851f1 | |||
fb24e3a7d2 | |||
655a69259a | |||
4e0cf0cc28 | |||
507c4e9efc | |||
cdf550845f | |||
3db7a5387b | |||
f8a5194c70 | |||
cff3bae155 | |||
90dffc73c8 | |||
a1151fc734 | |||
ab3baeb38f | |||
389731d373 | |||
6e3ce7423e | |||
15f15a7cfd | |||
0e5f626226 | |||
97b9c6f03d | |||
63982819c6 | |||
6fec507bef | |||
219b3bd34f | |||
b00d2d2c39 | |||
f1b3e21830 | |||
24162c9ead | |||
935cd1e173 | |||
55e39df30f | |||
581be32ed2 | |||
6bc136b1d0 | |||
0c668bf46a | |||
840814c776 | |||
95af55128e | |||
9f2a57e334 | |||
c645d33db5 | |||
e0f1349524 | |||
79b761f923 | |||
0d4e31ca58 | |||
b07a354a33 | |||
c433939795 | |||
b6a4c31b48 | |||
98b1439ff9 | |||
564738b1ff | |||
a80e43dbcf | |||
b99622d9fb | |||
937c77ead2 | |||
95e5a2ade3 | |||
91676d1dda | |||
ac3611bb19 | |||
cc4afb978d | |||
20e92a7009 | |||
42f0afcbfa | |||
20ac13fdf3 | |||
e38612e6fa | |||
c2b2b71c5d | |||
009f48a904 | |||
5cfc0180aa | |||
914f180fa3 | |||
6cb563a40c | |||
db3837be22 | |||
2f0dd83016 | |||
3ac27e5596 | |||
bd466a55a8 | |||
c8e6f58e24 | |||
888988ad37 | |||
e4a105a30b | |||
26ebe41fef | |||
1e496fee74 | |||
9f755e0379 | |||
4512dbdf58 | |||
483fd3cfa1 | |||
85516e9c7c | |||
0c006fbfaa | |||
54c10a42cc | |||
ef0fe2bcc1 |
25
.gitignore
vendored
25
.gitignore
vendored
@ -83,6 +83,7 @@ ltmain.sh
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
.dirstamp
|
||||
|
||||
# build directory #
|
||||
###################
|
||||
@ -97,11 +98,8 @@ build.sh
|
||||
|
||||
# Eigen source #
|
||||
################
|
||||
lib/Eigen/*
|
||||
|
||||
# FFTW source #
|
||||
################
|
||||
lib/fftw/*
|
||||
Grid/Eigen
|
||||
Eigen/*
|
||||
|
||||
# libtool macros #
|
||||
##################
|
||||
@ -112,20 +110,7 @@ m4/libtool.m4
|
||||
################
|
||||
gh-pages/
|
||||
|
||||
# Buck files #
|
||||
##############
|
||||
.buck*
|
||||
buck-out
|
||||
BUCK
|
||||
make-bin-BUCK.sh
|
||||
|
||||
# generated sources #
|
||||
#####################
|
||||
lib/qcd/spin/gamma-gen/*.h
|
||||
lib/qcd/spin/gamma-gen/*.cc
|
||||
|
||||
# vs code editor files #
|
||||
########################
|
||||
.vscode/
|
||||
.vscode/settings.json
|
||||
settings.json
|
||||
Grid/qcd/spin/gamma-gen/*.h
|
||||
Grid/qcd/spin/gamma-gen/*.cc
|
||||
|
28
.travis.yml
28
.travis.yml
@ -9,6 +9,11 @@ matrix:
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
env: PREC=single
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
env: PREC=double
|
||||
|
||||
before_install:
|
||||
- export GRIDDIR=`pwd`
|
||||
@ -16,9 +21,11 @@ before_install:
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
|
||||
|
||||
install:
|
||||
- export CWD=`pwd`
|
||||
- echo $CWD
|
||||
- export CC=$CC$VERSION
|
||||
- export CXX=$CXX$VERSION
|
||||
- echo $PATH
|
||||
@ -31,17 +38,24 @@ install:
|
||||
- which $CXX
|
||||
- $CXX --version
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
|
||||
|
||||
script:
|
||||
- ./bootstrap.sh
|
||||
- mkdir build
|
||||
- cd build
|
||||
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
||||
- mkdir lime
|
||||
- cd lime
|
||||
- mkdir build
|
||||
- cd build
|
||||
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
|
||||
- tar xf lime-1.3.2.tar.gz
|
||||
- cd lime-1.3.2
|
||||
- ./configure --prefix=$CWD/build/lime/install
|
||||
- make -j4
|
||||
- make install
|
||||
- cd $CWD/build
|
||||
- ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- echo make clean
|
||||
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- make check
|
||||
|
||||
|
@ -48,6 +48,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/serialisation/Serialisation.h>
|
||||
#include <Grid/threads/Threads.h>
|
||||
#include <Grid/util/Util.h>
|
||||
#include <Grid/util/Sha.h>
|
||||
#include <Grid/communicator/Communicator.h>
|
||||
#include <Grid/cartesian/Cartesian.h>
|
||||
#include <Grid/tensors/Tensors.h>
|
@ -1,4 +1,9 @@
|
||||
#pragma once
|
||||
// Force Eigen to use MKL if Grid has been configured with --enable-mkl
|
||||
#ifdef USE_MKL
|
||||
#define EIGEN_USE_MKL_ALL
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
@ -21,6 +21,32 @@ if BUILD_HDF5
|
||||
extra_headers+=serialisation/Hdf5Type.h
|
||||
endif
|
||||
|
||||
all: version-cache
|
||||
|
||||
version-cache:
|
||||
@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
|
||||
a="uncommited changes";\
|
||||
else\
|
||||
a="clean";\
|
||||
fi;\
|
||||
echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
|
||||
if [ -e version-cache ]; then\
|
||||
d=`diff vertmp version-cache`;\
|
||||
if [ "$${d}" != "" ]; then\
|
||||
mv vertmp version-cache;\
|
||||
rm -f Version.h;\
|
||||
fi;\
|
||||
else\
|
||||
mv vertmp version-cache;\
|
||||
rm -f Version.h;\
|
||||
fi;\
|
||||
rm -f vertmp
|
||||
|
||||
Version.h:
|
||||
cp version-cache Version.h
|
||||
|
||||
.PHONY: version-cache
|
||||
|
||||
#
|
||||
# Libraries
|
||||
#
|
||||
@ -30,8 +56,8 @@ include Eigen.inc
|
||||
lib_LIBRARIES = libGrid.a
|
||||
|
||||
CCFILES += $(extra_sources)
|
||||
HFILES += $(extra_headers)
|
||||
HFILES += $(extra_headers) Config.h Version.h
|
||||
|
||||
libGrid_a_SOURCES = $(CCFILES)
|
||||
libGrid_adir = $(pkgincludedir)
|
||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
|
||||
libGrid_adir = $(includedir)/Grid
|
||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)
|
@ -39,6 +39,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||
#include <Grid/algorithms/approx/Forecast.h>
|
||||
|
||||
#include <Grid/algorithms/iterative/Deflation.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||
#include <Grid/algorithms/iterative/NormalEquations.h>
|
@ -51,7 +51,7 @@ namespace Grid {
|
||||
|
||||
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0;
|
||||
virtual void HermOp(const Field &in, Field &out)=0;
|
||||
};
|
||||
|
||||
@ -309,36 +309,59 @@ namespace Grid {
|
||||
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
Field tmp;
|
||||
RealD mass;
|
||||
double tMpc;
|
||||
double tIP;
|
||||
double tMeo;
|
||||
double taxpby_norm;
|
||||
uint64_t ncall;
|
||||
public:
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
|
||||
void Report(void)
|
||||
{
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.IP "<< tIP /ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.MeoMoe "<< tMeo/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.axpby_norm "<< taxpby_norm/ncall<<" usec "<<std::endl;
|
||||
}
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||
{
|
||||
assert( _Mat.isTrivialEE() );
|
||||
mass = _Mat.Mass();
|
||||
tMpc=0;
|
||||
tIP =0;
|
||||
tMeo=0;
|
||||
taxpby_norm=0;
|
||||
ncall=0;
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
GridLogIterative.TimingMode(1);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl;
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
n2 = Mpc(in,out);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl;
|
||||
tMpc+=usecond();
|
||||
tIP-=usecond();
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl;
|
||||
tIP+=usecond();
|
||||
n1 = real(dot);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
std::cout << GridLogIterative << " HermOp "<<std::endl;
|
||||
Mpc(in,out);
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMpc+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
axpby(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
}
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
Field tmp2(in._grid);
|
||||
|
||||
std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
|
||||
_Mat.Mooee(in,out);
|
||||
_Mat.Mooee(out,tmp);
|
||||
std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
|
||||
|
||||
tMeo-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp2);
|
||||
std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl;
|
||||
|
||||
RealD nn=axpy_norm(out,-1.0,tmp2,tmp);
|
||||
std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl;
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMeo+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
return nn;
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
@ -54,6 +54,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
||||
|
||||
|
||||
psi.checkerboard = src.checkerboard;
|
||||
conformable(psi, src);
|
||||
|
||||
@ -69,7 +70,6 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
|
||||
|
||||
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||
|
||||
|
||||
r = src - mmp;
|
||||
p = r;
|
||||
@ -96,38 +96,44 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch InnerTimer;
|
||||
GridStopWatch AxpyNormTimer;
|
||||
GridStopWatch LinearCombTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
for (k = 1; k <= MaxIterations*1000; k++) {
|
||||
c = cp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOpAndNorm(p, mmp, d, qq);
|
||||
Linop.HermOp(p, mmp);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
// RealD qqck = norm2(mmp);
|
||||
// ComplexD dck = innerProduct(p,mmp);
|
||||
|
||||
InnerTimer.Start();
|
||||
ComplexD dc = innerProduct(p,mmp);
|
||||
InnerTimer.Stop();
|
||||
d = dc.real();
|
||||
a = c / d;
|
||||
b_pred = a * (a * qq - d) / c;
|
||||
|
||||
AxpyNormTimer.Start();
|
||||
cp = axpy_norm(r, -a, mmp, r);
|
||||
AxpyNormTimer.Stop();
|
||||
b = cp / c;
|
||||
|
||||
// Fuse these loops ; should be really easy
|
||||
psi = a * p + psi;
|
||||
p = p * b + r;
|
||||
|
||||
LinearCombTimer.Start();
|
||||
parallel_for(int ss=0;ss<src._grid->oSites();ss++){
|
||||
vstream(psi[ss], a * p[ss] + psi[ss]);
|
||||
vstream(p [ss], b * p[ss] + r[ss]);
|
||||
}
|
||||
LinearCombTimer.Stop();
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
|
||||
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
@ -144,10 +150,13 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogPerformance << "Time breakdown "<<std::endl;
|
||||
std::cout << GridLogPerformance << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogPerformance << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogPerformance << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogPerformance << "\tInner " << InnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogPerformance << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogPerformance << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
@ -43,6 +43,7 @@ namespace Grid {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
int verbose;
|
||||
MultiShiftFunction shifts;
|
||||
|
||||
@ -163,7 +164,16 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
for(int s=0;s<nshift;s++) {
|
||||
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch AXPYTimer;
|
||||
GridStopWatch ShiftTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
// Iteration loop
|
||||
int k;
|
||||
@ -171,7 +181,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
for (k=1;k<=MaxIterations;k++){
|
||||
|
||||
a = c /cp;
|
||||
AXPYTimer.Start();
|
||||
axpy(p,a,p,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Note to self - direction ps is iterated seperately
|
||||
// for each shift. Does not appear to have any scope
|
||||
@ -180,6 +192,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
// However SAME r is used. Could load "r" and update
|
||||
// ALL ps[s]. 2/3 Bandwidth saving
|
||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||
AXPYTimer.Start();
|
||||
for(int s=0;s<nshift;s++){
|
||||
if ( ! converged[s] ) {
|
||||
if (s==0){
|
||||
@ -190,22 +203,34 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
}
|
||||
}
|
||||
}
|
||||
AXPYTimer.Stop();
|
||||
|
||||
cp=c;
|
||||
MatrixTimer.Start();
|
||||
//Linop.HermOpAndNorm(p,mmp,d,qq); // d is used
|
||||
// The below is faster on KNL
|
||||
Linop.HermOp(p,mmp);
|
||||
d=real(innerProduct(p,mmp));
|
||||
|
||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
AXPYTimer.Stop();
|
||||
RealD rn = norm2(p);
|
||||
d += rn*mass[0];
|
||||
|
||||
bp=b;
|
||||
b=-cp/d;
|
||||
|
||||
AXPYTimer.Start();
|
||||
c=axpy_norm(r,b,mmp,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Toggle the recurrence history
|
||||
bs[0] = b;
|
||||
iz = 1-iz;
|
||||
ShiftTimer.Start();
|
||||
for(int s=1;s<nshift;s++){
|
||||
if((!converged[s])){
|
||||
RealD z0 = z[s][1-iz];
|
||||
@ -215,6 +240,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||
}
|
||||
}
|
||||
ShiftTimer.Stop();
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
int ss = s;
|
||||
@ -257,6 +283,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
|
||||
if ( all_converged ){
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
|
||||
|
||||
@ -269,8 +298,19 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
RealD cn = norm2(src);
|
||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMarix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
// ugly hack
|
||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
104
Grid/algorithms/iterative/Deflation.h
Normal file
104
Grid/algorithms/iterative/Deflation.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_DEFLATION_H
|
||||
#define GRID_DEFLATION_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class ZeroGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = zero; };
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class SourceGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = src; };
|
||||
};
|
||||
|
||||
////////////////////////////////
|
||||
// Fine grid deflation
|
||||
////////////////////////////////
|
||||
template<class Field>
|
||||
class DeflatedGuesser: public LinearFunction<Field> {
|
||||
private:
|
||||
const std::vector<Field> &evec;
|
||||
const std::vector<RealD> &eval;
|
||||
|
||||
public:
|
||||
|
||||
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
|
||||
|
||||
virtual void operator()(const Field &src,Field &guess) {
|
||||
guess = zero;
|
||||
assert(evec.size()==eval.size());
|
||||
auto N = evec.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
const Field& tmp = evec[i];
|
||||
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
||||
}
|
||||
guess.checkerboard = src.checkerboard;
|
||||
}
|
||||
};
|
||||
|
||||
template<class FineField, class CoarseField>
|
||||
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
|
||||
private:
|
||||
const std::vector<FineField> &subspace;
|
||||
const std::vector<CoarseField> &evec_coarse;
|
||||
const std::vector<RealD> &eval_coarse;
|
||||
public:
|
||||
|
||||
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
|
||||
const std::vector<CoarseField> &_evec_coarse,
|
||||
const std::vector<RealD> &_eval_coarse)
|
||||
: subspace(_subspace),
|
||||
evec_coarse(_evec_coarse),
|
||||
eval_coarse(_eval_coarse)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(const FineField &src,FineField &guess) {
|
||||
int N = (int)evec_coarse.size();
|
||||
CoarseField src_coarse(evec_coarse[0]._grid);
|
||||
CoarseField guess_coarse(evec_coarse[0]._grid); guess_coarse = zero;
|
||||
blockProject(src_coarse,src,subspace);
|
||||
for (int i=0;i<N;i++) {
|
||||
const CoarseField & tmp = evec_coarse[i];
|
||||
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
|
||||
}
|
||||
blockPromote(guess_coarse,guess,subspace);
|
||||
guess.checkerboard = src.checkerboard;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
@ -57,8 +57,9 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
|
||||
|
||||
parallel_region
|
||||
{
|
||||
std::vector < vobj > B(Nm); // Thread private
|
||||
|
||||
|
||||
std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
|
||||
|
||||
parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
|
||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
||||
|
||||
@ -149,19 +150,6 @@ void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, boo
|
||||
basisReorderInPlace(_v,sort_vals,idx);
|
||||
}
|
||||
|
||||
// PAB: faster to compute the inner products first then fuse loops.
|
||||
// If performance critical can improve.
|
||||
template<class Field>
|
||||
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
|
||||
result = zero;
|
||||
assert(_v.size()==eval.size());
|
||||
int N = (int)_v.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
Field& tmp = _v[i];
|
||||
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Implicitly restarted lanczos
|
||||
/////////////////////////////////////////////////////////////
|
||||
@ -181,6 +169,7 @@ enum IRLdiagonalisation {
|
||||
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
|
||||
{
|
||||
public:
|
||||
|
||||
LinearFunction<Field> &_HermOp;
|
||||
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
|
||||
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
|
||||
@ -243,6 +232,7 @@ class ImplicitlyRestartedLanczos {
|
||||
/////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// PAB:
|
||||
//////////////////////////////////////////////////////////////////
|
||||
@ -490,15 +480,13 @@ until convergence
|
||||
Field B(grid); B.checkerboard = evec[0].checkerboard;
|
||||
|
||||
// power of two search pattern; not every evalue in eval2 is assessed.
|
||||
int allconv =1;
|
||||
for(int jj = 1; jj<=Nstop; jj*=2){
|
||||
int j = Nstop-jj;
|
||||
RealD e = eval2_copy[j]; // Discard the evalue
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||
if ( j > Nconv ) {
|
||||
Nconv=j+1;
|
||||
jj=Nstop; // Terminate the scan
|
||||
}
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||
allconv=0;
|
||||
}
|
||||
}
|
||||
// Do evec[0] for good measure
|
||||
@ -506,8 +494,10 @@ until convergence
|
||||
int j=0;
|
||||
RealD e = eval2_copy[0];
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox);
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
|
||||
}
|
||||
if ( allconv ) Nconv = Nstop;
|
||||
|
||||
// test if we converged, if so, terminate
|
||||
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
|
||||
// if( Nconv>=Nstop || beta_k < betastp){
|
@ -28,7 +28,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LOCAL_COHERENCE_IRL_H
|
||||
#define GRID_LOCAL_COHERENCE_IRL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
struct LanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
|
||||
@ -45,6 +48,7 @@ struct LanczosParams : Serializable {
|
||||
struct LocalCoherenceLanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
||||
bool, saveEvecs,
|
||||
bool, doFine,
|
||||
bool, doFineRead,
|
||||
bool, doCoarse,
|
||||
@ -70,21 +74,24 @@ public:
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedHermOp(LinearOperatorBase<FineField>& linop, Aggregation<Fobj,CComplex,nbasis> &aggregate) :
|
||||
_Linop(linop),
|
||||
_Aggregate(aggregate) { };
|
||||
ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) :
|
||||
_Linop(linop), subspace(_subspace)
|
||||
{
|
||||
assert(subspace.size() >0);
|
||||
};
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
GridBase *FineGrid = subspace[0]._grid;
|
||||
int checkerboard = subspace[0].checkerboard;
|
||||
|
||||
FineField fin (FineGrid); fin.checkerboard= checkerboard;
|
||||
FineField fout(FineGrid); fout.checkerboard = checkerboard;
|
||||
|
||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
||||
FineField fin(FineGrid);
|
||||
FineField fout(FineGrid);
|
||||
|
||||
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
|
||||
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
|
||||
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
|
||||
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
@ -99,24 +106,27 @@ public:
|
||||
|
||||
OperatorFunction<FineField> & _poly;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> &aggregate) :
|
||||
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
|
||||
LinearOperatorBase<FineField>& linop,
|
||||
std::vector<FineField> & _subspace) :
|
||||
_poly(poly),
|
||||
_Linop(linop),
|
||||
_Aggregate(aggregate) { };
|
||||
subspace(_subspace)
|
||||
{ };
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
|
||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
||||
|
||||
FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard;
|
||||
FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard;
|
||||
|
||||
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
|
||||
GridBase *FineGrid = subspace[0]._grid;
|
||||
int checkerboard = subspace[0].checkerboard;
|
||||
|
||||
FineField fin (FineGrid); fin.checkerboard =checkerboard;
|
||||
FineField fout(FineGrid);fout.checkerboard =checkerboard;
|
||||
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
|
||||
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
|
||||
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
@ -132,19 +142,23 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc
|
||||
LinearFunction<CoarseField> & _Poly;
|
||||
OperatorFunction<FineField> & _smoother;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
||||
RealD _coarse_relax_tol;
|
||||
RealD _coarse_relax_tol;
|
||||
std::vector<FineField> &_subspace;
|
||||
|
||||
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
|
||||
OperatorFunction<FineField> &smoother,
|
||||
LinearOperatorBase<FineField> &Linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> &Aggregate,
|
||||
std::vector<FineField> &subspace,
|
||||
RealD coarse_relax_tol=5.0e3)
|
||||
: _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { };
|
||||
: _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
|
||||
_coarse_relax_tol(coarse_relax_tol)
|
||||
{ };
|
||||
|
||||
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
CoarseField v(B);
|
||||
RealD eval_poly = eval;
|
||||
|
||||
// Apply operator
|
||||
_Poly(B,v);
|
||||
|
||||
@ -168,14 +182,13 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc
|
||||
}
|
||||
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
||||
|
||||
int checkerboard = _Aggregate.checkerboard;
|
||||
|
||||
GridBase *FineGrid = _subspace[0]._grid;
|
||||
int checkerboard = _subspace[0].checkerboard;
|
||||
FineField fB(FineGrid);fB.checkerboard =checkerboard;
|
||||
FineField fv(FineGrid);fv.checkerboard =checkerboard;
|
||||
|
||||
_Aggregate.PromoteFromSubspace(B,fv);
|
||||
blockPromote(B,fv,_subspace);
|
||||
|
||||
_smoother(_Linop,fv,fB);
|
||||
|
||||
RealD eval_poly = eval;
|
||||
@ -217,27 +230,67 @@ protected:
|
||||
int _checkerboard;
|
||||
LinearOperatorBase<FineField> & _FineOp;
|
||||
|
||||
// FIXME replace Aggregation with vector of fine; the code reuse is too small for
|
||||
// the hassle and complexity of cross coupling.
|
||||
Aggregation<Fobj,CComplex,nbasis> _Aggregate;
|
||||
std::vector<RealD> evals_fine;
|
||||
std::vector<RealD> evals_coarse;
|
||||
std::vector<CoarseField> evec_coarse;
|
||||
std::vector<RealD> &evals_fine;
|
||||
std::vector<RealD> &evals_coarse;
|
||||
std::vector<FineField> &subspace;
|
||||
std::vector<CoarseField> &evec_coarse;
|
||||
|
||||
private:
|
||||
std::vector<RealD> _evals_fine;
|
||||
std::vector<RealD> _evals_coarse;
|
||||
std::vector<FineField> _subspace;
|
||||
std::vector<CoarseField> _evec_coarse;
|
||||
|
||||
public:
|
||||
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard) :
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_Aggregate(CoarseGrid,FineGrid,checkerboard),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard)
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (_evals_fine),
|
||||
evals_coarse(_evals_coarse),
|
||||
subspace (_subspace),
|
||||
evec_coarse(_evec_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
void Orthogonalise(void ) { _Aggregate.Orthogonalise(); }
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Alternate constructore, external storage for use by Hadrons module
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard,
|
||||
std::vector<FineField> &ext_subspace,
|
||||
std::vector<CoarseField> &ext_coarse,
|
||||
std::vector<RealD> &ext_eval_fine,
|
||||
std::vector<RealD> &ext_eval_coarse
|
||||
) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (ext_eval_fine),
|
||||
evals_coarse(ext_eval_coarse),
|
||||
subspace (ext_subspace),
|
||||
evec_coarse (ext_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
|
||||
void Orthogonalise(void ) {
|
||||
CoarseScalar InnerProd(_CoarseGrid);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
};
|
||||
|
||||
template<typename T> static RealD normalise(T& v)
|
||||
{
|
||||
@ -246,43 +299,44 @@ public:
|
||||
v = v * (1.0/nn);
|
||||
return nn;
|
||||
}
|
||||
|
||||
/*
|
||||
void fakeFine(void)
|
||||
{
|
||||
int Nk = nbasis;
|
||||
_Aggregate.subspace.resize(Nk,_FineGrid);
|
||||
_Aggregate.subspace[0]=1.0;
|
||||
_Aggregate.subspace[0].checkerboard=_checkerboard;
|
||||
normalise(_Aggregate.subspace[0]);
|
||||
subspace.resize(Nk,_FineGrid);
|
||||
subspace[0]=1.0;
|
||||
subspace[0].checkerboard=_checkerboard;
|
||||
normalise(subspace[0]);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
for(int k=1;k<Nk;k++){
|
||||
_Aggregate.subspace[k].checkerboard=_checkerboard;
|
||||
Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]);
|
||||
normalise(_Aggregate.subspace[k]);
|
||||
subspace[k].checkerboard=_checkerboard;
|
||||
Op(subspace[k-1],subspace[k]);
|
||||
normalise(subspace[k]);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
void testFine(RealD resid)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(_Aggregate.subspace.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
|
||||
for(int k=0;k<nbasis;k++){
|
||||
assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1);
|
||||
assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
|
||||
}
|
||||
}
|
||||
|
||||
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(_Aggregate.subspace.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
for(int k=0;k<evec_coarse.size();k++){
|
||||
if ( k < nbasis ) {
|
||||
@ -302,34 +356,34 @@ public:
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
|
||||
evals_fine.resize(Nm);
|
||||
_Aggregate.subspace.resize(Nm,_FineGrid);
|
||||
subspace.resize(Nm,_FineGrid);
|
||||
|
||||
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
||||
|
||||
FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false);
|
||||
IRL.calc(evals_fine,subspace,src,Nconv,false);
|
||||
|
||||
// Shrink down to number saved
|
||||
assert(Nstop>=nbasis);
|
||||
assert(Nconv>=nbasis);
|
||||
evals_fine.resize(nbasis);
|
||||
_Aggregate.subspace.resize(nbasis,_FineGrid);
|
||||
subspace.resize(nbasis,_FineGrid);
|
||||
}
|
||||
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
|
||||
int Nstop, int Nk, int Nm,RealD resid,
|
||||
RealD MaxIt, RealD betastp, int MinRes)
|
||||
{
|
||||
Chebyshev<FineField> Cheby(cheby_op);
|
||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_Aggregate);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate);
|
||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
evals_coarse.resize(Nm);
|
||||
evec_coarse.resize(Nm,_CoarseGrid);
|
@ -95,19 +95,34 @@ namespace Grid {
|
||||
private:
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver) :
|
||||
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=0;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
template<class Matrix, class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out, Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
@ -129,7 +144,6 @@ namespace Grid {
|
||||
pickCheckerboard(Odd ,src_o,in);
|
||||
pickCheckerboard(Even,sol_e,out);
|
||||
pickCheckerboard(Odd ,sol_o,out);
|
||||
|
||||
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
@ -146,8 +160,12 @@ namespace Grid {
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
|
||||
guess(src_o, sol_o);
|
||||
Mtmp = sol_o;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl;
|
||||
// Fionn A2A boolean behavioural control
|
||||
if (subGuess) sol_o = sol_o-Mtmp;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
@ -162,11 +180,15 @@ namespace Grid {
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl;
|
||||
|
||||
// Verify the unprec residual
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
|
||||
@ -179,17 +201,32 @@ namespace Grid {
|
||||
private:
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0) : _HermitianRBSolver(HermitianRBSolver)
|
||||
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0, const bool initSubGuess = false) : _HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=cb;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
template<class Matrix, class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
@ -225,7 +262,11 @@ namespace Grid {
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||
guess(src_o,sol_o);
|
||||
Mtmp = sol_o;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
// Fionn A2A boolean behavioural control
|
||||
if (subGuess) sol_o = sol_o-Mtmp;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
@ -238,12 +279,16 @@ namespace Grid {
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
|
||||
// Verify the unprec residual
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -256,19 +301,34 @@ namespace Grid {
|
||||
private:
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver) :
|
||||
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=0;
|
||||
CBfactorise = 0;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
template<class Matrix,class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
@ -305,8 +365,12 @@ namespace Grid {
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
guess(src_o,tmp);
|
||||
Mtmp = tmp;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
|
||||
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
|
||||
// Fionn A2A boolean behavioural control
|
||||
if (subGuess) tmp = tmp-Mtmp;
|
||||
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
@ -319,12 +383,16 @@ namespace Grid {
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
|
||||
// Verify the unprec residual
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -335,19 +403,34 @@ namespace Grid {
|
||||
private:
|
||||
LinearFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver) :
|
||||
SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=0;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
template<class Matrix, class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
@ -385,7 +468,11 @@ namespace Grid {
|
||||
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
// _HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
|
||||
_HermitianRBSolver(src_o,tmp); assert(tmp.checkerboard==Odd);
|
||||
guess(src_o,tmp);
|
||||
Mtmp = tmp;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
|
||||
// Fionn A2A boolean behavioural control
|
||||
if (subGuess) tmp = tmp-Mtmp;
|
||||
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
@ -399,12 +486,16 @@ namespace Grid {
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
|
||||
// Verify the unprec residual
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
std::cout << GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid " << std::sqrt(nr / ns) << " nr " << nr << " ns " << ns << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -277,7 +277,9 @@ public:
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
if ( ptr ) {
|
||||
// One touch per 4k page, static OMP loop to catch same loop order
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp parallel for schedule(static)
|
||||
#endif
|
||||
for(size_type n=0;n<bytes;n+=4096){
|
||||
cp[n]=0;
|
||||
}
|
@ -44,11 +44,15 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
|
||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||
if ( !flag ) {
|
||||
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
|
||||
assert (provided == MPI_THREAD_MULTIPLE);
|
||||
//If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
|
||||
if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
|
||||
(nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) )
|
||||
assert(0);
|
||||
}
|
||||
|
||||
Grid_quiesce_nodes();
|
||||
|
||||
// Never clean up as done once.
|
||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||
|
||||
GlobalSharedMemory::Init(communicator_world);
|
||||
@ -85,9 +89,17 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
MPI_Comm optimal_comm;
|
||||
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine
|
||||
////////////////////////////////////////////////////
|
||||
// Remap using the shared memory optimising routine
|
||||
// The remap creates a comm which must be freed
|
||||
////////////////////////////////////////////////////
|
||||
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm);
|
||||
InitFromMPICommunicator(processors,optimal_comm);
|
||||
SetCommunicator(optimal_comm);
|
||||
///////////////////////////////////////////////////
|
||||
// Free the temp communicator
|
||||
///////////////////////////////////////////////////
|
||||
MPI_Comm_free(&optimal_comm);
|
||||
}
|
||||
|
||||
//////////////////////////////////
|
||||
@ -183,8 +195,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
||||
|
||||
} else {
|
||||
srank = 0;
|
||||
comm_split = parent.communicator;
|
||||
// std::cout << " Inherited communicator " <<comm_split <<std::endl;
|
||||
int ierr = MPI_Comm_dup (parent.communicator,&comm_split);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -196,6 +208,11 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
||||
// Take the right SHM buffers
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
SetCommunicator(comm_split);
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Free the temp communicator
|
||||
///////////////////////////////////////////////
|
||||
MPI_Comm_free(&comm_split);
|
||||
|
||||
if(0){
|
||||
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
||||
@ -210,6 +227,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
||||
|
||||
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
|
||||
{
|
||||
////////////////////////////////////////////////////
|
||||
// Creates communicator, and the communicator_halo
|
||||
////////////////////////////////////////////////////
|
||||
_ndimension = processors.size();
|
||||
_processor_coor.resize(_ndimension);
|
||||
|
@ -133,6 +133,7 @@ class SharedMemory
|
||||
|
||||
public:
|
||||
SharedMemory() {};
|
||||
~SharedMemory();
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// set the buffers & sizes
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
@ -27,6 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <pwd.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
@ -113,19 +114,150 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
||||
assert(WorldNode!=-1);
|
||||
_ShmSetup=1;
|
||||
}
|
||||
|
||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||
// Gray encode support
|
||||
int BinaryToGray (int binary) {
|
||||
int gray = (binary>>1)^binary;
|
||||
return gray;
|
||||
}
|
||||
int Log2Size(int TwoToPower,int MAXLOG2)
|
||||
{
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = -1;
|
||||
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
|
||||
if ( (0x1<<i) == WorldShmSize ) {
|
||||
for(int i=0;i<=MAXLOG2;i++){
|
||||
if ( (0x1<<i) == TwoToPower ) {
|
||||
log2size = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return log2size;
|
||||
}
|
||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||
{
|
||||
#ifdef HYPERCUBE
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify the hypercube coordinate of this node using hostname
|
||||
////////////////////////////////////////////////////////////////
|
||||
// n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits
|
||||
// i runs 0..7 3 bits
|
||||
// r runs 0..3 2 bits
|
||||
// 2^10 = 1024 nodes
|
||||
const int maxhdim = 10;
|
||||
std::vector<int> HyperCubeCoords(maxhdim,0);
|
||||
std::vector<int> RootHyperCubeCoords(maxhdim,0);
|
||||
int R;
|
||||
int I;
|
||||
int N;
|
||||
const int namelen = _POSIX_HOST_NAME_MAX;
|
||||
char name[namelen];
|
||||
|
||||
// Parse ICE-XA hostname to get hypercube location
|
||||
gethostname(name,namelen);
|
||||
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
|
||||
assert(nscan==3);
|
||||
|
||||
int nlo = N%9;
|
||||
int nhi = N/9;
|
||||
uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
|
||||
uint32_t rootcoor = hypercoor;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Print debug info
|
||||
//////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
std::string hname(name);
|
||||
std::cout << "hostname "<<hname<<std::endl;
|
||||
std::cout << "R " << R << " I " << I << " N "<< N
|
||||
<< " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// broadcast node 0's base coordinate for this partition.
|
||||
//////////////////////////////////////////////////////////////////
|
||||
MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm);
|
||||
hypercoor=hypercoor-rootcoor;
|
||||
assert(hypercoor<WorldSize);
|
||||
assert(hypercoor>=0);
|
||||
|
||||
//////////////////////////////////////
|
||||
// Printing
|
||||
//////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify subblock of ranks on node spreading across dims
|
||||
// in a maximally symmetrical way
|
||||
////////////////////////////////////////////////////////////////
|
||||
int ndimension = processors.size();
|
||||
std::vector<int> processor_coor(ndimension);
|
||||
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
|
||||
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
|
||||
std::vector<int> HyperCoor(ndimension);
|
||||
int dim = 0;
|
||||
for(int l2=0;l2<log2size;l2++){
|
||||
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
|
||||
ShmDims[dim]*=2;
|
||||
dim=(dim+1)%ndimension;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish torus of processes and nodes with sub-blockings
|
||||
////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<ndimension;d++){
|
||||
NodeDims[d] = WorldDims[d]/ShmDims[d];
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Map Hcube according to physical lattice
|
||||
// must partition. Loop over dims and find out who would join.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int hcoor = hypercoor;
|
||||
for(int d=0;d<ndimension;d++){
|
||||
int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
|
||||
int msk = (0x1<<bits)-1;
|
||||
HyperCoor[d]=hcoor & msk;
|
||||
HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
|
||||
hcoor = hcoor >> bits;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Check processor counts match
|
||||
////////////////////////////////////////////////////////////////
|
||||
int Nprocessors=1;
|
||||
for(int i=0;i<ndimension;i++){
|
||||
Nprocessors*=processors[i];
|
||||
}
|
||||
assert(WorldSize==Nprocessors);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish mapping between lexico physics coord and WorldRank
|
||||
////////////////////////////////////////////////////////////////
|
||||
int rank;
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
|
||||
|
||||
for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
|
||||
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Build the new communicator
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#else
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
@ -174,14 +306,77 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#endif
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SHMGET
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMGET
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
std::vector<int> shmids(WorldShmSize);
|
||||
|
||||
if ( WorldShmRank == 0 ) {
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
size_t size = bytes;
|
||||
key_t key = IPC_PRIVATE;
|
||||
int flags = IPC_CREAT | SHM_R | SHM_W;
|
||||
#ifdef SHM_HUGETLB
|
||||
if (Hugepages) flags|=SHM_HUGETLB;
|
||||
#endif
|
||||
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
|
||||
int errsv = errno;
|
||||
printf("Errno %d\n",errsv);
|
||||
printf("key %d\n",key);
|
||||
printf("size %lld\n",size);
|
||||
printf("flags %d\n",flags);
|
||||
perror("shmget");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
|
||||
if (WorldShmCommBufs[r] == (uint64_t *)-1) {
|
||||
perror("Shared memory attach failure");
|
||||
shmctl(shmids[r], IPC_RMID, NULL);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
///////////////////////////////////
|
||||
// Mark for clean up
|
||||
///////////////////////////////////
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbfs mapping intended
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMMMAP
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -191,7 +386,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbf and others map filesystems as mappable huge pages
|
||||
// Hugetlbfs and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
char shm_name [NAME_MAX];
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
@ -218,6 +413,49 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
close(fd);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
};
|
||||
#endif // MMAP
|
||||
|
||||
#ifdef GRID_MPI3_SHM_NONE
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbf and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
char shm_name [NAME_MAX];
|
||||
assert(WorldShmSize == 1);
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
|
||||
int fd=-1;
|
||||
int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag|=MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( flags ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
|
||||
if ( ptr == (void *)MAP_FAILED ) {
|
||||
printf("mmap %s failed\n",shm_name);
|
||||
perror("failed mmap"); assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
close(fd);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
@ -232,6 +470,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
MPI_Barrier(WorldShmComm);
|
||||
@ -243,7 +482,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
|
||||
size_t size = bytes;
|
||||
|
||||
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||
|
||||
shm_unlink(shm_name);
|
||||
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
|
||||
@ -259,7 +499,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
#endif
|
||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
|
||||
|
||||
if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
||||
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
|
||||
if ( ptr == (void * )MAP_FAILED ) {
|
||||
perror("failed mmap");
|
||||
assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
@ -274,7 +518,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
|
||||
size_t size = bytes ;
|
||||
|
||||
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||
|
||||
int fd=shm_open(shm_name,O_RDWR,0666);
|
||||
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
|
||||
@ -292,6 +537,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Global shared functionality finished
|
||||
// Now move to per communicator functionality
|
||||
@ -318,11 +566,12 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
|
||||
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ;
|
||||
uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
|
||||
|
||||
MPI_Allreduce(MPI_IN_PLACE,&sr,1,MPI_UINT32_T,MPI_SUM,comm);
|
||||
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
|
||||
|
||||
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[sr];
|
||||
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
|
||||
// std::cout << "SetCommunicator ShmCommBufs ["<< r<< "] = "<< ShmCommBufs[r]<< " wsr = "<<wsr<<std::endl;
|
||||
}
|
||||
ShmBufferFreeAll();
|
||||
|
||||
@ -391,5 +640,12 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
||||
return (void *) remote;
|
||||
}
|
||||
}
|
||||
SharedMemory::~SharedMemory()
|
||||
{
|
||||
int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised);
|
||||
if ( !MPI_is_finalised ) {
|
||||
MPI_Comm_free(&ShmComm);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -122,5 +122,7 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
SharedMemory::~SharedMemory()
|
||||
{};
|
||||
|
||||
}
|
@ -45,31 +45,33 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
||||
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int ent = 0;
|
||||
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
|
||||
int stride=rhs._grid->_slice_stride[dimension];
|
||||
if ( cbmask == 0x3 ) {
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int bo = n*e2;
|
||||
buffer[off+bo+b]=rhs._odata[so+o+b];
|
||||
table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int bo=0;
|
||||
std::vector<std::pair<int,int> > table;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb &cbmask ) {
|
||||
table.push_back(std::pair<int,int> (bo++,o+b));
|
||||
table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
|
||||
}
|
||||
}
|
||||
}
|
||||
parallel_for(int i=0;i<table.size();i++){
|
||||
buffer[off+table[i].first]=rhs._odata[so+table[i].second];
|
||||
}
|
||||
}
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
buffer[table[i].first]=rhs._odata[table[i].second];
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,31 +142,35 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int stride=rhs._grid->_slice_stride[dimension];
|
||||
|
||||
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent =0;
|
||||
|
||||
if ( cbmask ==0x3 ) {
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int bo =n*rhs._grid->_slice_block[dimension];
|
||||
rhs._odata[so+o+b]=buffer[bo+b];
|
||||
table[ent++] = std::pair<int,int>(so+o+b,bo+b);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
std::vector<std::pair<int,int> > table;
|
||||
int bo=0;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
if ( ocb & cbmask ) {
|
||||
table.push_back(std::pair<int,int> (so+o+b,bo++));
|
||||
table[ent++]=std::pair<int,int> (so+o+b,bo++);
|
||||
}
|
||||
}
|
||||
}
|
||||
parallel_for(int i=0;i<table.size();i++){
|
||||
// std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
|
||||
rhs._odata[table[i].first]=buffer[table[i].second];
|
||||
}
|
||||
}
|
||||
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
rhs._odata[table[i].first]=buffer[table[i].second];
|
||||
}
|
||||
}
|
||||
|
||||
@ -228,29 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
||||
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int stride = rhs._grid->_slice_stride[dimension];
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent=0;
|
||||
|
||||
if(cbmask == 0x3 ){
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
|
||||
int o =n*stride+b;
|
||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
||||
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
|
||||
int o =n*stride+b;
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
||||
if ( ocb&cbmask ) {
|
||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
||||
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
lhs._odata[table[i].first]=rhs._odata[table[i].second];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
||||
@ -269,16 +278,28 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
|
||||
int e2=rhs._grid->_slice_block [dimension];
|
||||
int stride = rhs._grid->_slice_stride[dimension];
|
||||
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent=0;
|
||||
|
||||
double t_tab,t_perm;
|
||||
if ( cbmask == 0x3 ) {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
} else {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb&cbmask ) {
|
||||
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
|
||||
}
|
||||
if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
}
|
||||
|
||||
}}
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
@ -291,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||
|
||||
double t_local;
|
||||
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
Cshift_local(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
@ -299,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
GridBase *grid = rhs._grid;
|
||||
int fd = grid->_fdimensions[dimension];
|
||||
@ -325,11 +348,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
||||
|
||||
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||
int sx = (x+sshift)%rd;
|
||||
|
||||
// FIXME : This must change where we have a
|
||||
// Rotate slice.
|
||||
|
||||
// Document how this works ; why didn't I do this when I first wrote it...
|
||||
// wrap is whether sshift > rd.
|
||||
// num is sshift mod rd.
|
||||
//
|
||||
@ -365,10 +384,8 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
||||
|
||||
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
||||
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
|
||||
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
@ -54,13 +54,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
||||
|
||||
|
||||
if ( !comm_dim ) {
|
||||
// std::cout << "Cshift_local" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
|
||||
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
||||
} else if ( splice_dim ) {
|
||||
// std::cout << "Cshift_comms_simd" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift);
|
||||
} else {
|
||||
// std::cout << "Cshift_comms" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift);
|
||||
}
|
||||
return ret;
|
||||
@ -91,9 +91,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vob
|
||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
//std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
//std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||
}
|
||||
@ -175,6 +178,10 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
||||
int simd_layout = grid->_simd_layout[dimension];
|
||||
int comm_dim = grid->_processors[dimension] >1 ;
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
|
||||
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
|
||||
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
|
||||
|
||||
assert(comm_dim==1);
|
||||
assert(simd_layout==2);
|
||||
assert(shift>=0);
|
File diff suppressed because it is too large
Load Diff
@ -244,19 +244,11 @@ namespace Grid {
|
||||
|
||||
template<class sobj,class vobj> strong_inline
|
||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
axpy(ret,a,x,y);
|
||||
return norm2(ret);
|
||||
return axpy_norm_fast(ret,a,x,y);
|
||||
}
|
||||
template<class sobj,class vobj> strong_inline
|
||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
axpby(ret,a,b,x,y);
|
||||
return norm2(ret); // FIXME implement parallel norm in ss loop
|
||||
return axpby_norm_fast(ret,a,b,x,y);
|
||||
}
|
||||
|
||||
}
|
@ -256,9 +256,42 @@ public:
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
}
|
||||
|
||||
Lattice(Lattice&& r){ // move constructor
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata=std::move(r._odata);
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
||||
{
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata =std::move(r._odata);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata.resize(_grid->oSites());// essential
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~Lattice(void) = default;
|
||||
|
||||
void reset(GridBase* grid) {
|
||||
@ -277,15 +310,6 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
@ -179,7 +179,7 @@ namespace Grid {
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
#define DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
|
||||
{\
|
||||
@ -198,11 +198,6 @@ namespace Grid {
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
} \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||
{ \
|
||||
@ -212,14 +207,21 @@ namespace Grid {
|
||||
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs op rhs._internal; \
|
||||
}
|
||||
} \
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
}
|
||||
|
||||
DECLARE_RELATIONAL(<,slt);
|
||||
DECLARE_RELATIONAL(<=,sle);
|
||||
DECLARE_RELATIONAL(>,sgt);
|
||||
DECLARE_RELATIONAL(>=,sge);
|
||||
DECLARE_RELATIONAL(==,seq);
|
||||
DECLARE_RELATIONAL_EQ(==,seq);
|
||||
DECLARE_RELATIONAL(!=,sne);
|
||||
|
||||
#undef DECLARE_RELATIONAL
|
@ -52,23 +52,5 @@ namespace Grid {
|
||||
}
|
||||
};
|
||||
|
||||
// LatticeCoordinate();
|
||||
// FIXME for debug; deprecate this; made obscelete by
|
||||
template<class vobj> void lex_sites(Lattice<vobj> &l){
|
||||
Real *v_ptr = (Real *)&l._odata[0];
|
||||
size_t o_len = l._grid->oSites();
|
||||
size_t v_len = sizeof(vobj)/sizeof(vRealF);
|
||||
size_t vec_len = vRealF::Nsimd();
|
||||
|
||||
for(int i=0;i<o_len;i++){
|
||||
for(int j=0;j<v_len;j++){
|
||||
for(int vv=0;vv<vec_len;vv+=2){
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv ]= i+vv*500;
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
@ -33,41 +33,94 @@ namespace Grid {
|
||||
// Deterministic Reduction operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||
ComplexD nrm = innerProduct(arg,arg);
|
||||
auto nrm = innerProduct(arg,arg);
|
||||
return std::real(nrm);
|
||||
}
|
||||
|
||||
// Double inner product
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
scalar_type nrm;
|
||||
|
||||
GridBase *grid = left._grid;
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
|
||||
|
||||
const int pad = 8;
|
||||
|
||||
ComplexD inner;
|
||||
Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
|
||||
decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]);
|
||||
vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
|
||||
}
|
||||
sumarray[thr]=TensorRemove(vnrm) ;
|
||||
// All threads sum across SIMD; reduce serial work at end
|
||||
// one write per cacheline with streaming store
|
||||
ComplexD tmp = Reduce(TensorRemove(vinner)) ;
|
||||
vstream(sumarray[thr*pad],tmp);
|
||||
}
|
||||
|
||||
vector_type vvnrm; vvnrm=zero; // sum across threads
|
||||
inner=0.0;
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vvnrm = vvnrm+sumarray[i];
|
||||
inner = inner+sumarray[i*pad];
|
||||
}
|
||||
nrm = Reduce(vvnrm);// sum across simd
|
||||
right._grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
right._grid->GlobalSum(inner);
|
||||
return inner;
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
// Fast axpby_norm
|
||||
// z = a x + b y
|
||||
// return norm z
|
||||
/////////////////////////
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
sobj one(1.0);
|
||||
return axpby_norm_fast(z,a,one,x,y);
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
const int pad = 8;
|
||||
z.checkerboard = x.checkerboard;
|
||||
conformable(z,x);
|
||||
conformable(x,y);
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
RealD nrm;
|
||||
|
||||
GridBase *grid = x._grid;
|
||||
|
||||
Vector<RealD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(x._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
// private to thread; sub summation
|
||||
decltype(innerProductD(z._odata[0],z._odata[0])) vnrm=zero;
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
|
||||
vnrm = vnrm + innerProductD(tmp,tmp);
|
||||
vstream(z._odata[ss],tmp);
|
||||
}
|
||||
vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
|
||||
}
|
||||
|
||||
nrm = 0.0; // sum across threads; linear in thread count but fast
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
nrm = nrm+sumarray[i*pad];
|
||||
}
|
||||
z._grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
@ -221,6 +274,115 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
std::vector<scalar_type> lsSum;
|
||||
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
|
||||
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
|
||||
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start prep" << std::endl;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs._grid);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
// std::cout << GridLogMessage << "Start alloc" << std::endl;
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
|
||||
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
std::vector<iScalar<scalar_type>> extracted(Nsimd); // splitting the SIMD
|
||||
// std::cout << GridLogMessage << "End alloc" << std::endl;
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
// std::cout << GridLogMessage << "End prep" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
|
||||
vector_type vv;
|
||||
parallel_for(int r=0;r<rd;r++)
|
||||
{
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss = so + n * stride + b;
|
||||
vv = TensorRemove(innerProduct(lhs._odata[ss], rhs._odata[ss]));
|
||||
lvSum[r] = lvSum[r] + vv;
|
||||
}
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
|
||||
}
|
||||
template <class vobj>
|
||||
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
int fd = result.size();
|
||||
int ld = lsSum.size();
|
||||
// sum over nodes.
|
||||
std::vector<scalar_type> gsum;
|
||||
gsum.resize(fd, scalar_type(0.0));
|
||||
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum[t]=lsSum[lt];
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
|
||||
grid->GlobalSumVector(&gsum[0], fd);
|
||||
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
|
||||
|
||||
result = gsum;
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
@ -158,10 +158,19 @@ namespace Grid {
|
||||
// tens of seconds per trajectory so this is clean in all reasonable cases,
|
||||
// and margin of safety is orders of magnitude.
|
||||
// We could hack Sitmo to skip in the higher order words of state if necessary
|
||||
//
|
||||
// Replace with 2^30 ; avoid problem on large volumes
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
|
||||
const int shift = 30;
|
||||
|
||||
uint64_t skip = site;
|
||||
skip = skip<<40;
|
||||
|
||||
skip = skip<<shift;
|
||||
|
||||
assert((skip >> shift)==site); // check for overflow
|
||||
|
||||
eng.discard(skip);
|
||||
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
|
||||
}
|
||||
@ -242,7 +251,7 @@ namespace Grid {
|
||||
|
||||
dist[0].reset();
|
||||
for(int idx=0;idx<words;idx++){
|
||||
fillScalar(buf[idx],dist[0],_generators[0]);
|
||||
fillScalar(buf[idx],dist[0],_generators[0]);
|
||||
}
|
||||
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
@ -274,7 +283,7 @@ namespace Grid {
|
||||
RealF *pointer=(RealF *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<2*vComplexF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
@ -282,7 +291,7 @@ namespace Grid {
|
||||
RealD *pointer=(RealD *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<2*vComplexD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
@ -290,7 +299,7 @@ namespace Grid {
|
||||
RealF *pointer=(RealF *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<vRealF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
@ -308,6 +317,19 @@ namespace Grid {
|
||||
std::seed_seq src(seeds.begin(),seeds.end());
|
||||
Seed(src,0);
|
||||
}
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
std::stringstream sha;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
for(int i=0;i<seeds.size();i++) {
|
||||
sha << std::hex << seeds[i];
|
||||
}
|
||||
std::cout << GridLogMessage << "Intialising serial RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
};
|
||||
|
||||
class GridParallelRNG : public GridRNGbase {
|
||||
@ -368,6 +390,14 @@ namespace Grid {
|
||||
_time_counter += usecond()- inner_time_counter;
|
||||
};
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||
|
||||
// Everyone generates the same seed_seq based on input seeds
|
@ -485,7 +485,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
||||
|
||||
|
||||
template<class vobj>
|
||||
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||
void ExtractSliceLocal(Lattice<vobj> &lowDim, const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
@ -520,7 +520,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slic
|
||||
|
||||
|
||||
template<class vobj>
|
||||
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
||||
void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
@ -599,6 +599,51 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
extract1(in_vobj, out_ptrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type
|
||||
unvectorizeToRevLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
{
|
||||
|
||||
typedef typename vobj::vector_type vtype;
|
||||
|
||||
GridBase* in_grid = in._grid;
|
||||
out.resize(in_grid->lSites());
|
||||
|
||||
int ndim = in_grid->Nd();
|
||||
int in_nsimd = vtype::Nsimd();
|
||||
|
||||
std::vector<std::vector<int> > in_icoor(in_nsimd);
|
||||
|
||||
for(int lane=0; lane < in_nsimd; lane++){
|
||||
in_icoor[lane].resize(ndim);
|
||||
in_grid->iCoorFromIindex(in_icoor[lane], lane);
|
||||
}
|
||||
|
||||
parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
|
||||
//Assemble vector of pointers to output elements
|
||||
std::vector<sobj*> out_ptrs(in_nsimd);
|
||||
|
||||
std::vector<int> in_ocoor(ndim);
|
||||
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
|
||||
|
||||
std::vector<int> lcoor(in_grid->Nd());
|
||||
|
||||
for(int lane=0; lane < in_nsimd; lane++){
|
||||
for(int mu=0;mu<ndim;mu++)
|
||||
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
|
||||
|
||||
int lex;
|
||||
Lexicographic::IndexFromCoorReversed(lcoor, lex, in_grid->_ldimensions);
|
||||
out_ptrs[lane] = &out[lex];
|
||||
}
|
||||
|
||||
//Unpack into those ptrs
|
||||
const vobj & in_vobj = in._odata[in_oidx];
|
||||
extract1(in_vobj, out_ptrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||
@ -648,10 +693,59 @@ vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||
&& !isSIMDvectorized<sobj>::value, void>::type
|
||||
vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
{
|
||||
|
||||
typedef typename vobj::vector_type vtype;
|
||||
|
||||
GridBase* grid = out._grid;
|
||||
assert(in.size()==grid->lSites());
|
||||
|
||||
int ndim = grid->Nd();
|
||||
int nsimd = vtype::Nsimd();
|
||||
|
||||
std::vector<std::vector<int> > icoor(nsimd);
|
||||
|
||||
for(int lane=0; lane < nsimd; lane++){
|
||||
icoor[lane].resize(ndim);
|
||||
grid->iCoorFromIindex(icoor[lane],lane);
|
||||
}
|
||||
|
||||
parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index
|
||||
//Assemble vector of pointers to output elements
|
||||
std::vector<sobj*> ptrs(nsimd);
|
||||
|
||||
std::vector<int> ocoor(ndim);
|
||||
grid->oCoorFromOindex(ocoor, oidx);
|
||||
|
||||
std::vector<int> lcoor(grid->Nd());
|
||||
|
||||
for(int lane=0; lane < nsimd; lane++){
|
||||
|
||||
for(int mu=0;mu<ndim;mu++){
|
||||
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
|
||||
}
|
||||
|
||||
int lex;
|
||||
Lexicographic::IndexFromCoorReversed(lcoor, lex, grid->_ldimensions);
|
||||
ptrs[lane] = &in[lex];
|
||||
}
|
||||
|
||||
//pack from those ptrs
|
||||
vobj vecobj;
|
||||
merge1(vecobj, ptrs, 0);
|
||||
out._odata[oidx] = vecobj;
|
||||
}
|
||||
}
|
||||
|
||||
//Convert a Lattice from one precision to another
|
||||
template<class VobjOut, class VobjIn>
|
||||
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
assert(out._grid->Nd() == in._grid->Nd());
|
||||
assert(out._grid->FullDimensions() == in._grid->FullDimensions());
|
||||
out.checkerboard = in.checkerboard;
|
||||
GridBase *in_grid=in._grid;
|
||||
GridBase *out_grid = out._grid;
|
@ -86,7 +86,7 @@ protected:
|
||||
Colours &Painter;
|
||||
int active;
|
||||
int timing_mode;
|
||||
int topWidth{-1};
|
||||
int topWidth{-1}, chanWidth{-1};
|
||||
static int timestamp;
|
||||
std::string name, topName;
|
||||
std::string COLOUR;
|
||||
@ -126,6 +126,7 @@ public:
|
||||
}
|
||||
}
|
||||
void setTopWidth(const int w) {topWidth = w;}
|
||||
void setChanWidth(const int w) {chanWidth = w;}
|
||||
|
||||
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||
|
||||
@ -136,7 +137,12 @@ public:
|
||||
stream << std::setw(log.topWidth);
|
||||
}
|
||||
stream << log.topName << log.background()<< " : ";
|
||||
stream << log.colour() << std::left << log.name << log.background() << " : ";
|
||||
stream << log.colour() << std::left;
|
||||
if (log.chanWidth > 0)
|
||||
{
|
||||
stream << std::setw(log.chanWidth);
|
||||
}
|
||||
stream << log.name << log.background() << " : ";
|
||||
if ( log.timestamp ) {
|
||||
log.StopWatch->Stop();
|
||||
GridTime now = log.StopWatch->Elapsed();
|
@ -91,7 +91,7 @@ class BinaryIO {
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
GridBase *grid = lat._grid;
|
||||
int lsites = grid->lSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
unvectorizeToLexOrdArray(scalardata,lat);
|
||||
@ -110,11 +110,11 @@ class BinaryIO {
|
||||
lsites = 1;
|
||||
}
|
||||
|
||||
#pragma omp parallel
|
||||
PARALLEL_REGION
|
||||
{
|
||||
uint32_t nersc_csum_thr = 0;
|
||||
|
||||
#pragma omp for
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for (uint64_t local_site = 0; local_site < lsites; local_site++)
|
||||
{
|
||||
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
||||
@ -124,7 +124,7 @@ class BinaryIO {
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp critical
|
||||
PARALLEL_CRITICAL
|
||||
{
|
||||
nersc_csum += nersc_csum_thr;
|
||||
}
|
||||
@ -146,21 +146,23 @@ class BinaryIO {
|
||||
std::vector<int> local_start =grid->LocalStarts();
|
||||
std::vector<int> global_vol =grid->FullDimensions();
|
||||
|
||||
#pragma omp parallel
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> coor(nd);
|
||||
uint32_t scidac_csuma_thr=0;
|
||||
uint32_t scidac_csumb_thr=0;
|
||||
uint32_t site_crc=0;
|
||||
|
||||
#pragma omp for
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for(uint64_t local_site=0;local_site<lsites;local_site++){
|
||||
|
||||
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
||||
|
||||
/*
|
||||
* Scidac csum is rather more heavyweight
|
||||
* FIXME -- 128^3 x 256 x 16 will overflow.
|
||||
*/
|
||||
|
||||
int global_site;
|
||||
|
||||
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
|
||||
@ -181,7 +183,7 @@ class BinaryIO {
|
||||
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
||||
}
|
||||
|
||||
#pragma omp critical
|
||||
PARALLEL_CRITICAL
|
||||
{
|
||||
scidac_csuma^= scidac_csuma_thr;
|
||||
scidac_csumb^= scidac_csumb_thr;
|
||||
@ -261,7 +263,7 @@ class BinaryIO {
|
||||
GridBase *grid,
|
||||
std::vector<fobj> &iodata,
|
||||
std::string file,
|
||||
Integer offset,
|
||||
uint64_t& offset,
|
||||
const std::string &format, int control,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
@ -370,7 +372,7 @@ class BinaryIO {
|
||||
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
|
||||
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
|
||||
std::ifstream fin;
|
||||
fin.open(file, std::ios::binary | std::ios::in);
|
||||
fin.open(file, std::ios::binary | std::ios::in);
|
||||
if (control & BINARYIO_MASTER_APPEND)
|
||||
{
|
||||
fin.seekg(-sizeof(fobj), fin.end);
|
||||
@ -429,14 +431,20 @@ class BinaryIO {
|
||||
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
|
||||
}
|
||||
|
||||
std::cout << GridLogDebug << "MPI read I/O set view " << file << std::endl;
|
||||
std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
|
||||
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
|
||||
assert(ierr == 0);
|
||||
|
||||
std::cout << GridLogDebug << "MPI read I/O write all " << file << std::endl;
|
||||
std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
|
||||
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
|
||||
assert(ierr == 0);
|
||||
|
||||
MPI_Offset os;
|
||||
MPI_File_get_position(fh, &os);
|
||||
MPI_File_get_byte_offset(fh, os, &disp);
|
||||
offset = disp;
|
||||
|
||||
|
||||
MPI_File_close(&fh);
|
||||
MPI_Type_free(&fileArray);
|
||||
MPI_Type_free(&localArray);
|
||||
@ -446,16 +454,20 @@ class BinaryIO {
|
||||
} else {
|
||||
|
||||
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
|
||||
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
|
||||
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
|
||||
|
||||
std::ofstream fout;
|
||||
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
|
||||
try {
|
||||
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
|
||||
if (offset) { // Must already exist and contain data
|
||||
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
|
||||
} else { // Allow create
|
||||
fout.open(file,std::ios::binary|std::ios::out);
|
||||
}
|
||||
} catch (const std::fstream::failure& exc) {
|
||||
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
|
||||
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
|
||||
std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
|
||||
// std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
|
||||
#ifdef USE_MPI_IO
|
||||
MPI_Abort(MPI_COMM_WORLD,1);
|
||||
#else
|
||||
@ -489,6 +501,7 @@ class BinaryIO {
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
offset = fout.tellp();
|
||||
fout.close();
|
||||
}
|
||||
timer.Stop();
|
||||
@ -523,7 +536,7 @@ class BinaryIO {
|
||||
static inline void readLatticeObject(Lattice<vobj> &Umu,
|
||||
std::string file,
|
||||
munger munge,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
const std::string &format,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
@ -533,7 +546,7 @@ class BinaryIO {
|
||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
int lsites = grid->lSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||
@ -544,7 +557,7 @@ class BinaryIO {
|
||||
GridStopWatch timer;
|
||||
timer.Start();
|
||||
|
||||
parallel_for(int x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
|
||||
parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
|
||||
|
||||
vectorizeFromLexOrdArray(scalardata,Umu);
|
||||
grid->Barrier();
|
||||
@ -560,7 +573,7 @@ class BinaryIO {
|
||||
static inline void writeLatticeObject(Lattice<vobj> &Umu,
|
||||
std::string file,
|
||||
munger munge,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
const std::string &format,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
@ -569,7 +582,7 @@ class BinaryIO {
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||
GridBase *grid = Umu._grid;
|
||||
int lsites = grid->lSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||
@ -580,7 +593,7 @@ class BinaryIO {
|
||||
GridStopWatch timer; timer.Start();
|
||||
unvectorizeToLexOrdArray(scalardata,Umu);
|
||||
|
||||
parallel_for(int x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
|
||||
parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
|
||||
|
||||
grid->Barrier();
|
||||
timer.Stop();
|
||||
@ -597,7 +610,7 @@ class BinaryIO {
|
||||
static inline void readRNG(GridSerialRNG &serial,
|
||||
GridParallelRNG ¶llel,
|
||||
std::string file,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
@ -610,8 +623,8 @@ class BinaryIO {
|
||||
std::string format = "IEEE32BIG";
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
int gsites = grid->gSites();
|
||||
int lsites = grid->lSites();
|
||||
uint64_t gsites = grid->gSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
uint32_t nersc_csum_tmp = 0;
|
||||
uint32_t scidac_csuma_tmp = 0;
|
||||
@ -626,7 +639,7 @@ class BinaryIO {
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
timer.Start();
|
||||
parallel_for(int lidx=0;lidx<lsites;lidx++){
|
||||
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
|
||||
parallel.SetState(tmp,lidx);
|
||||
@ -659,7 +672,7 @@ class BinaryIO {
|
||||
static inline void writeRNG(GridSerialRNG &serial,
|
||||
GridParallelRNG ¶llel,
|
||||
std::string file,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
@ -670,8 +683,8 @@ class BinaryIO {
|
||||
typedef std::array<RngStateType,RngStateCount> RNGstate;
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
int gsites = grid->gSites();
|
||||
int lsites = grid->lSites();
|
||||
uint64_t gsites = grid->gSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
uint32_t nersc_csum_tmp;
|
||||
uint32_t scidac_csuma_tmp;
|
||||
@ -684,7 +697,7 @@ class BinaryIO {
|
||||
|
||||
timer.Start();
|
||||
std::vector<RNGstate> iodata(lsites);
|
||||
parallel_for(int lidx=0;lidx<lsites;lidx++){
|
||||
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
parallel.GetState(tmp,lidx);
|
||||
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
|
||||
@ -693,7 +706,6 @@ class BinaryIO {
|
||||
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
iodata.resize(1);
|
||||
{
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
@ -182,6 +182,11 @@ class GridLimeReader : public BinaryIO {
|
||||
{
|
||||
filename= _filename;
|
||||
File = fopen(filename.c_str(), "r");
|
||||
if (File == nullptr)
|
||||
{
|
||||
std::cerr << "cannot open file '" << filename << "'" << std::endl;
|
||||
abort();
|
||||
}
|
||||
LimeR = limeCreateReader(File);
|
||||
}
|
||||
/////////////////////////////////////////////
|
||||
@ -245,10 +250,8 @@ class GridLimeReader : public BinaryIO {
|
||||
////////////////////////////////////////////
|
||||
// Read a generic serialisable object
|
||||
////////////////////////////////////////////
|
||||
template<class serialisable_object>
|
||||
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
|
||||
void readLimeObject(std::string &xmlstring,std::string record_name)
|
||||
{
|
||||
std::string xmlstring;
|
||||
// should this be a do while; can we miss a first record??
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
|
||||
@ -262,18 +265,29 @@ class GridLimeReader : public BinaryIO {
|
||||
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
|
||||
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
read(RD,object_name,object);
|
||||
xmlstring = std::string(&xmlc[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class serialisable_object>
|
||||
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
|
||||
{
|
||||
std::string xmlstring;
|
||||
|
||||
readLimeObject(xmlstring, record_name);
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,object_name,object);
|
||||
}
|
||||
};
|
||||
|
||||
class GridLimeWriter : public BinaryIO {
|
||||
class GridLimeWriter : public BinaryIO
|
||||
{
|
||||
public:
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// FIXME: format for RNG? Now just binary out instead
|
||||
// FIXME: collective calls or not ?
|
||||
@ -282,17 +296,24 @@ class GridLimeWriter : public BinaryIO {
|
||||
FILE *File;
|
||||
LimeWriter *LimeW;
|
||||
std::string filename;
|
||||
|
||||
bool boss_node;
|
||||
GridLimeWriter( bool isboss = true) {
|
||||
boss_node = isboss;
|
||||
}
|
||||
void open(const std::string &_filename) {
|
||||
filename= _filename;
|
||||
File = fopen(filename.c_str(), "w");
|
||||
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
|
||||
if ( boss_node ) {
|
||||
File = fopen(filename.c_str(), "w");
|
||||
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
|
||||
}
|
||||
}
|
||||
/////////////////////////////////////////////
|
||||
// Close the file
|
||||
/////////////////////////////////////////////
|
||||
void close(void) {
|
||||
fclose(File);
|
||||
if ( boss_node ) {
|
||||
fclose(File);
|
||||
}
|
||||
// limeDestroyWriter(LimeW);
|
||||
}
|
||||
///////////////////////////////////////////////////////
|
||||
@ -300,77 +321,122 @@ class GridLimeWriter : public BinaryIO {
|
||||
///////////////////////////////////////////////////////
|
||||
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
|
||||
{
|
||||
LimeRecordHeader *h;
|
||||
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
|
||||
assert(limeWriteRecordHeader(h, LimeW) >= 0);
|
||||
limeDestroyHeader(h);
|
||||
if ( boss_node ) {
|
||||
LimeRecordHeader *h;
|
||||
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
|
||||
assert(limeWriteRecordHeader(h, LimeW) >= 0);
|
||||
limeDestroyHeader(h);
|
||||
}
|
||||
return LIME_SUCCESS;
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
// Write a generic serialisable object
|
||||
////////////////////////////////////////////
|
||||
template<class serialisable_object>
|
||||
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name)
|
||||
void writeLimeObject(int MB,int ME,XmlWriter &writer,std::string object_name,std::string record_name)
|
||||
{
|
||||
std::string xmlstring;
|
||||
{
|
||||
XmlWriter WR("","");
|
||||
write(WR,object_name,object);
|
||||
xmlstring = WR.XmlString();
|
||||
}
|
||||
// std::cout << "WriteLimeObject" << record_name <<std::endl;
|
||||
uint64_t nbytes = xmlstring.size();
|
||||
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
|
||||
int err;
|
||||
LimeRecordHeader *h = limeCreateHeader(MB, ME,const_cast<char *>(record_name.c_str()), nbytes);
|
||||
assert(h!= NULL);
|
||||
if ( boss_node ) {
|
||||
std::string xmlstring = writer.docString();
|
||||
|
||||
err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
|
||||
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
limeDestroyHeader(h);
|
||||
// std::cout << " File offset is now"<<ftello(File) << std::endl;
|
||||
// std::cout << "WriteLimeObject" << record_name <<std::endl;
|
||||
uint64_t nbytes = xmlstring.size();
|
||||
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
|
||||
int err;
|
||||
LimeRecordHeader *h = limeCreateHeader(MB, ME,const_cast<char *>(record_name.c_str()), nbytes);
|
||||
assert(h!= NULL);
|
||||
|
||||
err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
|
||||
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
limeDestroyHeader(h);
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
|
||||
template<class serialisable_object>
|
||||
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, const unsigned int scientificPrec = 0)
|
||||
{
|
||||
XmlWriter WR("","");
|
||||
|
||||
if (scientificPrec)
|
||||
{
|
||||
WR.scientificFormat(true);
|
||||
WR.setPrecision(scientificPrec);
|
||||
}
|
||||
write(WR,object_name,object);
|
||||
writeLimeObject(MB, ME, WR, object_name, record_name);
|
||||
}
|
||||
////////////////////////////////////////////////////
|
||||
// Write a generic lattice field and csum
|
||||
////////////////////////////////////////////
|
||||
// This routine is Collectively called by all nodes
|
||||
// in communicator used by the field._grid
|
||||
////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
|
||||
{
|
||||
////////////////////////////////////////////
|
||||
// Create record header
|
||||
////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
int err;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
|
||||
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
|
||||
|
||||
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
|
||||
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
|
||||
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// NB: FILE and iostream are jointly writing disjoint sequences in the
|
||||
// the same file through different file handles (integer units).
|
||||
//
|
||||
// These are both buffered, so why I think this code is right is as follows.
|
||||
//
|
||||
// i) write record header to FILE *File, telegraphing the size.
|
||||
// ii) ftello reads the offset from FILE *File .
|
||||
// i) write record header to FILE *File, telegraphing the size; flush
|
||||
// ii) ftello reads the offset from FILE *File .
|
||||
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
|
||||
// Closes iostream and flushes.
|
||||
// iv) fseek on FILE * to end of this disjoint section.
|
||||
// v) Continue writing scidac record.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
uint64_t offset = ftello(File);
|
||||
// std::cout << " Writing to offset "<<offset << std::endl;
|
||||
|
||||
GridBase *grid = field._grid;
|
||||
assert(boss_node == field._grid->IsBoss() );
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Create record header
|
||||
////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
int err;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
uint64_t PayloadSize = sizeof(sobj) * grid->_gsites;
|
||||
if ( boss_node ) {
|
||||
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
|
||||
fflush(File);
|
||||
}
|
||||
|
||||
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
|
||||
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
|
||||
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Check all nodes agree on file position
|
||||
////////////////////////////////////////////////
|
||||
uint64_t offset1;
|
||||
if ( boss_node ) {
|
||||
offset1 = ftello(File);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset1,sizeof(offset1));
|
||||
|
||||
///////////////////////////////////////////
|
||||
// The above is collective. Write by other means into the binary record
|
||||
///////////////////////////////////////////
|
||||
std::string format = getFormatString<vobj>();
|
||||
BinarySimpleMunger<sobj,sobj> munge;
|
||||
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
// fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<<offset << std::endl;
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Wind forward and close the record
|
||||
///////////////////////////////////////////
|
||||
if ( boss_node ) {
|
||||
fseek(File,0,SEEK_END);
|
||||
uint64_t offset2 = ftello(File); // std::cout << " now at offset "<<offset2 << std::endl;
|
||||
assert( (offset2-offset1) == PayloadSize);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Check MPI-2 I/O did what we expect to file
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
if ( boss_node ) {
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
}
|
||||
////////////////////////////////////////
|
||||
// Write checksum element, propagaing forward from the BinaryIO
|
||||
// Always pair a checksum with a binary object, and close message
|
||||
@ -380,26 +446,32 @@ class GridLimeWriter : public BinaryIO {
|
||||
std::stringstream streamb; streamb << std::hex << scidac_csumb;
|
||||
checksum.suma= streama.str();
|
||||
checksum.sumb= streamb.str();
|
||||
// std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
|
||||
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
|
||||
if ( boss_node ) {
|
||||
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class ScidacWriter : public GridLimeWriter {
|
||||
public:
|
||||
|
||||
template<class SerialisableUserFile>
|
||||
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
|
||||
{
|
||||
scidacFile _scidacFile(grid);
|
||||
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
||||
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
||||
}
|
||||
ScidacWriter(bool isboss =true ) : GridLimeWriter(isboss) { };
|
||||
|
||||
template<class SerialisableUserFile>
|
||||
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
|
||||
{
|
||||
scidacFile _scidacFile(grid);
|
||||
if ( this->boss_node ) {
|
||||
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
||||
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////////
|
||||
// Write generic lattice field in scidac format
|
||||
////////////////////////////////////////////////
|
||||
template <class vobj, class userRecord>
|
||||
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)
|
||||
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
|
||||
const unsigned int recordScientificPrec = 0)
|
||||
{
|
||||
GridBase * grid = field._grid;
|
||||
|
||||
@ -415,9 +487,12 @@ class ScidacWriter : public GridLimeWriter {
|
||||
//////////////////////////////////////////////
|
||||
// Fill the Lime file record by record
|
||||
//////////////////////////////////////////////
|
||||
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
||||
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
|
||||
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
if ( this->boss_node ) {
|
||||
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
||||
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML), recordScientificPrec);
|
||||
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
}
|
||||
// Collective call
|
||||
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||
}
|
||||
};
|
||||
@ -484,6 +559,8 @@ class ScidacReader : public GridLimeReader {
|
||||
|
||||
class IldgWriter : public ScidacWriter {
|
||||
public:
|
||||
|
||||
IldgWriter(bool isboss) : ScidacWriter(isboss) {};
|
||||
|
||||
///////////////////////////////////
|
||||
// A little helper
|
||||
@ -568,7 +645,6 @@ class IldgWriter : public ScidacWriter {
|
||||
writeLimeIldgLFN(header.ildg_lfn); // rec
|
||||
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||
// limeDestroyWriter(LimeW);
|
||||
fclose(File);
|
||||
}
|
||||
};
|
||||
|
||||
@ -644,9 +720,11 @@ class IldgReader : public GridLimeReader {
|
||||
|
||||
//////////////////////////////////
|
||||
// ILDG format record
|
||||
|
||||
std::string xmlstring(&xmlc[0]);
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"ildgFormat",ildgFormat_);
|
||||
|
||||
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
|
||||
@ -661,13 +739,13 @@ class IldgReader : public GridLimeReader {
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
|
||||
FieldMetaData_.ildg_lfn = std::string(&xmlc[0]);
|
||||
FieldMetaData_.ildg_lfn = xmlstring;
|
||||
found_ildgLFN = 1;
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"FieldMetaData",FieldMetaData_);
|
||||
|
||||
format = FieldMetaData_.floating_point;
|
||||
@ -681,18 +759,17 @@ class IldgReader : public GridLimeReader {
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
|
||||
std::string xmls(&xmlc[0]);
|
||||
// is it a USQCD info field
|
||||
if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {
|
||||
if ( xmlstring.find(std::string("usqcdInfo")) != std::string::npos ) {
|
||||
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"usqcdInfo",usqcdInfo_);
|
||||
found_usqcdInfo = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"scidacChecksum",scidacChecksum_);
|
||||
found_scidacChecksum = 1;
|
||||
}
|
@ -136,8 +136,9 @@ struct scidacRecord : Serializable {
|
||||
int, typesize,
|
||||
int, datacount);
|
||||
|
||||
scidacRecord() { version =1.0; }
|
||||
|
||||
scidacRecord()
|
||||
: version(1.0), recordtype(0), colors(0), spins(0), typesize(0), datacount(0)
|
||||
{}
|
||||
};
|
||||
|
||||
////////////////////////
|
@ -81,18 +81,16 @@ namespace Grid {
|
||||
std::string, creation_date,
|
||||
std::string, archive_date,
|
||||
std::string, floating_point);
|
||||
FieldMetaData(void) {
|
||||
nd=4;
|
||||
dimension.resize(4);
|
||||
boundary.resize(4);
|
||||
scidac_checksuma=0;
|
||||
scidac_checksumb=0;
|
||||
checksum=0;
|
||||
}
|
||||
// WARNING: non-initialised values might lead to twisted parallel IO
|
||||
// issues, std::string are fine because they initliase to size 0
|
||||
// as per C++ standard.
|
||||
FieldMetaData(void)
|
||||
: nd(4), dimension(4,0), boundary(4, ""), data_start(0),
|
||||
link_trace(0.), plaquette(0.), checksum(0),
|
||||
scidac_checksuma(0), scidac_checksumb(0), sequence_number(0)
|
||||
{}
|
||||
};
|
||||
|
||||
|
||||
|
||||
namespace QCD {
|
||||
|
||||
using namespace Grid;
|
@ -57,7 +57,7 @@ namespace Grid {
|
||||
// for the header-reader
|
||||
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
|
||||
{
|
||||
int offset=0;
|
||||
uint64_t offset=0;
|
||||
std::map<std::string,std::string> header;
|
||||
std::string line;
|
||||
|
||||
@ -139,7 +139,7 @@ namespace Grid {
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
int offset = readHeader(file,Umu._grid,header);
|
||||
uint64_t offset = readHeader(file,Umu._grid,header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
@ -236,21 +236,25 @@ namespace Grid {
|
||||
GaugeStatistics(Umu,header);
|
||||
MachineCharacteristics(header);
|
||||
|
||||
int offset;
|
||||
|
||||
truncate(file);
|
||||
uint64_t offset;
|
||||
|
||||
// Sod it -- always write 3x3 double
|
||||
header.floating_point = std::string("IEEE64BIG");
|
||||
header.data_type = std::string("4D_SU3_GAUGE_3x3");
|
||||
GaugeSimpleUnmunger<fobj3D,sobj> munge;
|
||||
offset = writeHeader(header,file);
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
writeHeader(header,file);
|
||||
if ( grid->IsBoss() ) {
|
||||
writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
|
||||
<<std::hex<<header.checksum
|
||||
@ -278,7 +282,7 @@ namespace Grid {
|
||||
header.plaquette=0.0;
|
||||
MachineCharacteristics(header);
|
||||
|
||||
int offset;
|
||||
uint64_t offset;
|
||||
|
||||
#ifdef RNG_RANLUX
|
||||
header.floating_point = std::string("UINT64");
|
||||
@ -293,12 +297,18 @@ namespace Grid {
|
||||
header.data_type = std::string("SITMO");
|
||||
#endif
|
||||
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
offset = writeHeader(header,file);
|
||||
if ( grid->IsBoss() ) {
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage
|
||||
<<"Written NERSC RNG STATE "<<file<< " checksum "
|
||||
@ -313,7 +323,7 @@ namespace Grid {
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
|
||||
int offset = readHeader(file,grid,header);
|
||||
uint64_t offset = readHeader(file,grid,header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
@ -49,7 +49,8 @@ inline double usecond(void) {
|
||||
|
||||
typedef std::chrono::system_clock GridClock;
|
||||
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
||||
typedef std::chrono::milliseconds GridTime;
|
||||
typedef std::chrono::milliseconds GridMillisecs;
|
||||
typedef std::chrono::microseconds GridTime;
|
||||
typedef std::chrono::microseconds GridUsecs;
|
||||
|
||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
|
||||
@ -57,6 +58,11 @@ inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milli
|
||||
stream << time.count()<<" ms";
|
||||
return stream;
|
||||
}
|
||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::microseconds & time)
|
||||
{
|
||||
stream << time.count()<<" usec";
|
||||
return stream;
|
||||
}
|
||||
|
||||
class GridStopWatch {
|
||||
private:
|
||||
@ -96,6 +102,9 @@ public:
|
||||
assert(running == false);
|
||||
return (uint64_t) accumulator.count();
|
||||
}
|
||||
bool isRunning(void){
|
||||
return running;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
/**
|
||||
* pugixml parser - version 1.6
|
||||
* pugixml parser - version 1.9
|
||||
* --------------------------------------------------------
|
||||
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at http://pugixml.org/
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end
|
||||
@ -17,6 +17,9 @@
|
||||
// Uncomment this to enable wchar_t mode
|
||||
// #define PUGIXML_WCHAR_MODE
|
||||
|
||||
// Uncomment this to enable compact mode
|
||||
// #define PUGIXML_COMPACT
|
||||
|
||||
// Uncomment this to disable XPath
|
||||
// #define PUGIXML_NO_XPATH
|
||||
|
||||
@ -46,7 +49,7 @@
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
* Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
@ -59,7 +62,7 @@
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
/**
|
||||
* pugixml parser - version 1.6
|
||||
* pugixml parser - version 1.9
|
||||
* --------------------------------------------------------
|
||||
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at http://pugixml.org/
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end
|
||||
@ -13,7 +13,7 @@
|
||||
|
||||
#ifndef PUGIXML_VERSION
|
||||
// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
|
||||
# define PUGIXML_VERSION 160
|
||||
# define PUGIXML_VERSION 190
|
||||
#endif
|
||||
|
||||
// Include user configuration file (this can define various configuration macros)
|
||||
@ -72,6 +72,44 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// If the platform is known to have move semantics support, compile move ctor/operator implementation
|
||||
#ifndef PUGIXML_HAS_MOVE
|
||||
# if __cplusplus >= 201103
|
||||
# define PUGIXML_HAS_MOVE
|
||||
# elif defined(_MSC_VER) && _MSC_VER >= 1600
|
||||
# define PUGIXML_HAS_MOVE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// If C++ is 2011 or higher, add 'noexcept' specifiers
|
||||
#ifndef PUGIXML_NOEXCEPT
|
||||
# if __cplusplus >= 201103
|
||||
# define PUGIXML_NOEXCEPT noexcept
|
||||
# elif defined(_MSC_VER) && _MSC_VER >= 1900
|
||||
# define PUGIXML_NOEXCEPT noexcept
|
||||
# else
|
||||
# define PUGIXML_NOEXCEPT
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Some functions can not be noexcept in compact mode
|
||||
#ifdef PUGIXML_COMPACT
|
||||
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT
|
||||
#else
|
||||
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT PUGIXML_NOEXCEPT
|
||||
#endif
|
||||
|
||||
// If C++ is 2011 or higher, add 'override' qualifiers
|
||||
#ifndef PUGIXML_OVERRIDE
|
||||
# if __cplusplus >= 201103
|
||||
# define PUGIXML_OVERRIDE override
|
||||
# elif defined(_MSC_VER) && _MSC_VER >= 1700
|
||||
# define PUGIXML_OVERRIDE override
|
||||
# else
|
||||
# define PUGIXML_OVERRIDE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Character interface macros
|
||||
#ifdef PUGIXML_WCHAR_MODE
|
||||
# define PUGIXML_TEXT(t) L ## t
|
||||
@ -133,13 +171,13 @@ namespace pugi
|
||||
|
||||
// This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
|
||||
const unsigned int parse_eol = 0x0020;
|
||||
|
||||
|
||||
// This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
|
||||
const unsigned int parse_wconv_attribute = 0x0040;
|
||||
|
||||
// This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
|
||||
const unsigned int parse_wnorm_attribute = 0x0080;
|
||||
|
||||
|
||||
// This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
|
||||
const unsigned int parse_declaration = 0x0100;
|
||||
|
||||
@ -158,6 +196,11 @@ namespace pugi
|
||||
// is a valid document. This flag is off by default.
|
||||
const unsigned int parse_fragment = 0x1000;
|
||||
|
||||
// This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of
|
||||
// the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments.
|
||||
// This flag is off by default.
|
||||
const unsigned int parse_embed_pcdata = 0x2000;
|
||||
|
||||
// The default parsing mode.
|
||||
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
|
||||
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
|
||||
@ -184,16 +227,16 @@ namespace pugi
|
||||
};
|
||||
|
||||
// Formatting flags
|
||||
|
||||
|
||||
// Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
|
||||
const unsigned int format_indent = 0x01;
|
||||
|
||||
|
||||
// Write encoding-specific BOM to the output stream. This flag is off by default.
|
||||
const unsigned int format_write_bom = 0x02;
|
||||
|
||||
// Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
|
||||
const unsigned int format_raw = 0x04;
|
||||
|
||||
|
||||
// Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
|
||||
const unsigned int format_no_declaration = 0x08;
|
||||
|
||||
@ -206,6 +249,9 @@ namespace pugi
|
||||
// Write every attribute on a new line with appropriate indentation. This flag is off by default.
|
||||
const unsigned int format_indent_attributes = 0x40;
|
||||
|
||||
// Don't output empty element tags, instead writing an explicit start and end tag even if there are no children. This flag is off by default.
|
||||
const unsigned int format_no_empty_element_tags = 0x80;
|
||||
|
||||
// The default set of formatting flags.
|
||||
// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
|
||||
const unsigned int format_default = format_indent;
|
||||
@ -225,7 +271,7 @@ namespace pugi
|
||||
class xml_node;
|
||||
|
||||
class xml_text;
|
||||
|
||||
|
||||
#ifndef PUGIXML_NO_XPATH
|
||||
class xpath_node;
|
||||
class xpath_node_set;
|
||||
@ -268,7 +314,7 @@ namespace pugi
|
||||
// Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
|
||||
xml_writer_file(void* file);
|
||||
|
||||
virtual void write(const void* data, size_t size);
|
||||
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
|
||||
|
||||
private:
|
||||
void* file;
|
||||
@ -283,7 +329,7 @@ namespace pugi
|
||||
xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
|
||||
xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
|
||||
|
||||
virtual void write(const void* data, size_t size);
|
||||
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
|
||||
|
||||
private:
|
||||
std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
|
||||
@ -299,13 +345,13 @@ namespace pugi
|
||||
|
||||
private:
|
||||
xml_attribute_struct* _attr;
|
||||
|
||||
|
||||
typedef void (*unspecified_bool_type)(xml_attribute***);
|
||||
|
||||
public:
|
||||
// Default constructor. Constructs an empty attribute.
|
||||
xml_attribute();
|
||||
|
||||
|
||||
// Constructs attribute from internal pointer
|
||||
explicit xml_attribute(xml_attribute_struct* attr);
|
||||
|
||||
@ -354,6 +400,8 @@ namespace pugi
|
||||
// Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
|
||||
bool set_value(int rhs);
|
||||
bool set_value(unsigned int rhs);
|
||||
bool set_value(long rhs);
|
||||
bool set_value(unsigned long rhs);
|
||||
bool set_value(double rhs);
|
||||
bool set_value(float rhs);
|
||||
bool set_value(bool rhs);
|
||||
@ -367,6 +415,8 @@ namespace pugi
|
||||
xml_attribute& operator=(const char_t* rhs);
|
||||
xml_attribute& operator=(int rhs);
|
||||
xml_attribute& operator=(unsigned int rhs);
|
||||
xml_attribute& operator=(long rhs);
|
||||
xml_attribute& operator=(unsigned long rhs);
|
||||
xml_attribute& operator=(double rhs);
|
||||
xml_attribute& operator=(float rhs);
|
||||
xml_attribute& operator=(bool rhs);
|
||||
@ -417,7 +467,7 @@ namespace pugi
|
||||
|
||||
// Borland C++ workaround
|
||||
bool operator!() const;
|
||||
|
||||
|
||||
// Comparison operators (compares wrapped node pointers)
|
||||
bool operator==(const xml_node& r) const;
|
||||
bool operator!=(const xml_node& r) const;
|
||||
@ -438,7 +488,7 @@ namespace pugi
|
||||
// Get node value, or "" if node is empty or it has no value
|
||||
// Note: For <node>text</node> node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes.
|
||||
const char_t* value() const;
|
||||
|
||||
|
||||
// Get attribute list
|
||||
xml_attribute first_attribute() const;
|
||||
xml_attribute last_attribute() const;
|
||||
@ -450,7 +500,7 @@ namespace pugi
|
||||
// Get next/previous sibling in the children list of the parent node
|
||||
xml_node next_sibling() const;
|
||||
xml_node previous_sibling() const;
|
||||
|
||||
|
||||
// Get parent node
|
||||
xml_node parent() const;
|
||||
|
||||
@ -478,7 +528,7 @@ namespace pugi
|
||||
// Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
|
||||
bool set_name(const char_t* rhs);
|
||||
bool set_value(const char_t* rhs);
|
||||
|
||||
|
||||
// Add attribute with specified name. Returns added attribute, or empty attribute on errors.
|
||||
xml_attribute append_attribute(const char_t* name);
|
||||
xml_attribute prepend_attribute(const char_t* name);
|
||||
@ -532,11 +582,11 @@ namespace pugi
|
||||
template <typename Predicate> xml_attribute find_attribute(Predicate pred) const
|
||||
{
|
||||
if (!_root) return xml_attribute();
|
||||
|
||||
|
||||
for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
|
||||
if (pred(attrib))
|
||||
return attrib;
|
||||
|
||||
|
||||
return xml_attribute();
|
||||
}
|
||||
|
||||
@ -544,11 +594,11 @@ namespace pugi
|
||||
template <typename Predicate> xml_node find_child(Predicate pred) const
|
||||
{
|
||||
if (!_root) return xml_node();
|
||||
|
||||
|
||||
for (xml_node node = first_child(); node; node = node.next_sibling())
|
||||
if (pred(node))
|
||||
return node;
|
||||
|
||||
|
||||
return xml_node();
|
||||
}
|
||||
|
||||
@ -558,7 +608,7 @@ namespace pugi
|
||||
if (!_root) return xml_node();
|
||||
|
||||
xml_node cur = first_child();
|
||||
|
||||
|
||||
while (cur._root && cur._root != _root)
|
||||
{
|
||||
if (pred(cur)) return cur;
|
||||
@ -590,7 +640,7 @@ namespace pugi
|
||||
|
||||
// Recursively traverse subtree with xml_tree_walker
|
||||
bool traverse(xml_tree_walker& walker);
|
||||
|
||||
|
||||
#ifndef PUGIXML_NO_XPATH
|
||||
// Select single node by evaluating XPath query. Returns first node from the resulting node set.
|
||||
xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
||||
@ -601,11 +651,11 @@ namespace pugi
|
||||
xpath_node_set select_nodes(const xpath_query& query) const;
|
||||
|
||||
// (deprecated: use select_node instead) Select single node by evaluating XPath query.
|
||||
xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
||||
xpath_node select_single_node(const xpath_query& query) const;
|
||||
PUGIXML_DEPRECATED xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
||||
PUGIXML_DEPRECATED xpath_node select_single_node(const xpath_query& query) const;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// Print subtree using a writer object
|
||||
void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
|
||||
|
||||
@ -701,6 +751,8 @@ namespace pugi
|
||||
// Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
|
||||
bool set(int rhs);
|
||||
bool set(unsigned int rhs);
|
||||
bool set(long rhs);
|
||||
bool set(unsigned long rhs);
|
||||
bool set(double rhs);
|
||||
bool set(float rhs);
|
||||
bool set(bool rhs);
|
||||
@ -714,6 +766,8 @@ namespace pugi
|
||||
xml_text& operator=(const char_t* rhs);
|
||||
xml_text& operator=(int rhs);
|
||||
xml_text& operator=(unsigned int rhs);
|
||||
xml_text& operator=(long rhs);
|
||||
xml_text& operator=(unsigned long rhs);
|
||||
xml_text& operator=(double rhs);
|
||||
xml_text& operator=(float rhs);
|
||||
xml_text& operator=(bool rhs);
|
||||
@ -867,11 +921,11 @@ namespace pugi
|
||||
|
||||
private:
|
||||
int _depth;
|
||||
|
||||
|
||||
protected:
|
||||
// Get current traversal depth
|
||||
int depth() const;
|
||||
|
||||
|
||||
public:
|
||||
xml_tree_walker();
|
||||
virtual ~xml_tree_walker();
|
||||
@ -942,13 +996,14 @@ namespace pugi
|
||||
char_t* _buffer;
|
||||
|
||||
char _memory[192];
|
||||
|
||||
|
||||
// Non-copyable semantics
|
||||
xml_document(const xml_document&);
|
||||
const xml_document& operator=(const xml_document&);
|
||||
xml_document& operator=(const xml_document&);
|
||||
|
||||
void create();
|
||||
void destroy();
|
||||
void _create();
|
||||
void _destroy();
|
||||
void _move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
|
||||
public:
|
||||
// Default constructor, makes empty document
|
||||
@ -957,6 +1012,12 @@ namespace pugi
|
||||
// Destructor, invalidates all node/attribute handles to this document
|
||||
~xml_document();
|
||||
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
xml_document& operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
#endif
|
||||
|
||||
// Removes all nodes, leaving the empty document
|
||||
void reset();
|
||||
|
||||
@ -970,7 +1031,7 @@ namespace pugi
|
||||
#endif
|
||||
|
||||
// (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
|
||||
xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
|
||||
PUGIXML_DEPRECATED xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
|
||||
|
||||
// Load document from zero-terminated string. No encoding conversions are applied.
|
||||
xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
|
||||
@ -1051,7 +1112,7 @@ namespace pugi
|
||||
// Non-copyable semantics
|
||||
xpath_variable(const xpath_variable&);
|
||||
xpath_variable& operator=(const xpath_variable&);
|
||||
|
||||
|
||||
public:
|
||||
// Get variable name
|
||||
const char_t* name() const;
|
||||
@ -1095,10 +1156,10 @@ namespace pugi
|
||||
xpath_variable_set(const xpath_variable_set& rhs);
|
||||
xpath_variable_set& operator=(const xpath_variable_set& rhs);
|
||||
|
||||
#if __cplusplus >= 201103
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_variable_set(xpath_variable_set&& rhs);
|
||||
xpath_variable_set& operator=(xpath_variable_set&& rhs);
|
||||
xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_variable_set& operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Add a new variable or get the existing one, if the types match
|
||||
@ -1139,29 +1200,29 @@ namespace pugi
|
||||
// Destructor
|
||||
~xpath_query();
|
||||
|
||||
#if __cplusplus >= 201103
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_query(xpath_query&& rhs);
|
||||
xpath_query& operator=(xpath_query&& rhs);
|
||||
xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_query& operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Get query expression return type
|
||||
xpath_value_type return_type() const;
|
||||
|
||||
|
||||
// Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
|
||||
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
|
||||
bool evaluate_boolean(const xpath_node& n) const;
|
||||
|
||||
|
||||
// Evaluate expression as double value in the specified context; performs type conversion if necessary.
|
||||
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
|
||||
double evaluate_number(const xpath_node& n) const;
|
||||
|
||||
|
||||
#ifndef PUGIXML_NO_STL
|
||||
// Evaluate expression as string value in the specified context; performs type conversion if necessary.
|
||||
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
|
||||
string_t evaluate_string(const xpath_node& n) const;
|
||||
#endif
|
||||
|
||||
|
||||
// Evaluate expression as string value in the specified context; performs type conversion if necessary.
|
||||
// At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
|
||||
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
|
||||
@ -1188,7 +1249,7 @@ namespace pugi
|
||||
// Borland C++ workaround
|
||||
bool operator!() const;
|
||||
};
|
||||
|
||||
|
||||
#ifndef PUGIXML_NO_EXCEPTIONS
|
||||
// XPath exception class
|
||||
class PUGIXML_CLASS xpath_exception: public std::exception
|
||||
@ -1201,26 +1262,26 @@ namespace pugi
|
||||
explicit xpath_exception(const xpath_parse_result& result);
|
||||
|
||||
// Get error message
|
||||
virtual const char* what() const throw();
|
||||
virtual const char* what() const throw() PUGIXML_OVERRIDE;
|
||||
|
||||
// Get parse result
|
||||
const xpath_parse_result& result() const;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
// XPath node class (either xml_node or xml_attribute)
|
||||
class PUGIXML_CLASS xpath_node
|
||||
{
|
||||
private:
|
||||
xml_node _node;
|
||||
xml_attribute _attribute;
|
||||
|
||||
|
||||
typedef void (*unspecified_bool_type)(xpath_node***);
|
||||
|
||||
public:
|
||||
// Default constructor; constructs empty XPath node
|
||||
xpath_node();
|
||||
|
||||
|
||||
// Construct XPath node from XML node/attribute
|
||||
xpath_node(const xml_node& node);
|
||||
xpath_node(const xml_attribute& attribute, const xml_node& parent);
|
||||
@ -1228,13 +1289,13 @@ namespace pugi
|
||||
// Get node/attribute, if any
|
||||
xml_node node() const;
|
||||
xml_attribute attribute() const;
|
||||
|
||||
|
||||
// Get parent of contained node/attribute
|
||||
xml_node parent() const;
|
||||
|
||||
// Safe bool conversion operator
|
||||
operator unspecified_bool_type() const;
|
||||
|
||||
|
||||
// Borland C++ workaround
|
||||
bool operator!() const;
|
||||
|
||||
@ -1260,13 +1321,13 @@ namespace pugi
|
||||
type_sorted, // Sorted by document order (ascending)
|
||||
type_sorted_reverse // Sorted by document order (descending)
|
||||
};
|
||||
|
||||
|
||||
// Constant iterator type
|
||||
typedef const xpath_node* const_iterator;
|
||||
|
||||
// We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work
|
||||
typedef const xpath_node* iterator;
|
||||
|
||||
|
||||
// Default constructor. Constructs empty set.
|
||||
xpath_node_set();
|
||||
|
||||
@ -1275,49 +1336,49 @@ namespace pugi
|
||||
|
||||
// Destructor
|
||||
~xpath_node_set();
|
||||
|
||||
|
||||
// Copy constructor/assignment operator
|
||||
xpath_node_set(const xpath_node_set& ns);
|
||||
xpath_node_set& operator=(const xpath_node_set& ns);
|
||||
|
||||
#if __cplusplus >= 201103
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_node_set(xpath_node_set&& rhs);
|
||||
xpath_node_set& operator=(xpath_node_set&& rhs);
|
||||
xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_node_set& operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Get collection type
|
||||
type_t type() const;
|
||||
|
||||
|
||||
// Get collection size
|
||||
size_t size() const;
|
||||
|
||||
// Indexing operator
|
||||
const xpath_node& operator[](size_t index) const;
|
||||
|
||||
|
||||
// Collection iterators
|
||||
const_iterator begin() const;
|
||||
const_iterator end() const;
|
||||
|
||||
// Sort the collection in ascending/descending order by document order
|
||||
void sort(bool reverse = false);
|
||||
|
||||
|
||||
// Get first node in the collection by document order
|
||||
xpath_node first() const;
|
||||
|
||||
|
||||
// Check if collection is empty
|
||||
bool empty() const;
|
||||
|
||||
|
||||
private:
|
||||
type_t _type;
|
||||
|
||||
|
||||
xpath_node _storage;
|
||||
|
||||
|
||||
xpath_node* _begin;
|
||||
xpath_node* _end;
|
||||
|
||||
void _assign(const_iterator begin, const_iterator end, type_t type);
|
||||
void _move(xpath_node_set& rhs);
|
||||
void _move(xpath_node_set& rhs) PUGIXML_NOEXCEPT;
|
||||
};
|
||||
#endif
|
||||
|
||||
@ -1325,7 +1386,7 @@ namespace pugi
|
||||
// Convert wide string to UTF8
|
||||
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
|
||||
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
|
||||
|
||||
|
||||
// Convert UTF8 to wide string
|
||||
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
|
||||
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
|
||||
@ -1333,13 +1394,13 @@ namespace pugi
|
||||
|
||||
// Memory allocation function interface; returns pointer to allocated memory or NULL on failure
|
||||
typedef void* (*allocation_function)(size_t size);
|
||||
|
||||
|
||||
// Memory deallocation function interface
|
||||
typedef void (*deallocation_function)(void* ptr);
|
||||
|
||||
// Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
|
||||
void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
|
||||
|
||||
|
||||
// Get current memory management functions
|
||||
allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
|
||||
deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
|
||||
@ -1375,7 +1436,7 @@ namespace std
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
* Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
@ -1388,7 +1449,7 @@ namespace std
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
@ -1,6 +1,6 @@
|
||||
pugixml 1.6 - an XML processing library
|
||||
pugixml 1.9 - an XML processing library
|
||||
|
||||
Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
Report bugs and download new versions at http://pugixml.org/
|
||||
|
||||
This is the distribution of pugixml, which is a C++ XML processing library,
|
||||
@ -28,7 +28,7 @@ The distribution contains the following folders:
|
||||
|
||||
This library is distributed under the MIT License:
|
||||
|
||||
Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
@ -90,17 +90,20 @@ namespace QCD {
|
||||
// That probably makes for GridRedBlack4dCartesian grid.
|
||||
|
||||
// s,sp,c,spc,lc
|
||||
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
||||
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
||||
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
|
||||
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
|
||||
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
|
||||
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
|
||||
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
|
||||
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
|
||||
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
||||
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||
|
||||
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
||||
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
||||
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
|
||||
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
|
||||
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
|
||||
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
|
||||
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
|
||||
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
|
||||
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
||||
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||
template<typename vtype> using iSpinColourSpinColourMatrix = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
|
||||
|
||||
|
||||
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
|
||||
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||
@ -127,10 +130,28 @@ namespace QCD {
|
||||
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
|
||||
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
|
||||
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
|
||||
|
||||
|
||||
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
|
||||
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
|
||||
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
|
||||
|
||||
// SpinColourSpinColour matrix
|
||||
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||
|
||||
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||
|
||||
// SpinColourSpinColour matrix
|
||||
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||
|
||||
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||
|
||||
// LorentzColour
|
||||
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
|
||||
@ -229,6 +250,9 @@ namespace QCD {
|
||||
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
|
||||
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
|
||||
|
||||
typedef Lattice<vSpinColourSpinColourMatrix> LatticeSpinColourSpinColourMatrix;
|
||||
typedef Lattice<vSpinColourSpinColourMatrixF> LatticeSpinColourSpinColourMatrixF;
|
||||
typedef Lattice<vSpinColourSpinColourMatrixD> LatticeSpinColourSpinColourMatrixD;
|
||||
|
||||
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
|
||||
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user