mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-14 13:57:07 +01:00
Compare commits
688 Commits
feature/ha
...
FgridStagg
Author | SHA1 | Date | |
---|---|---|---|
4ac85b3e8f | |||
dd8cd8a8e8 | |||
0cccb3dc82 | |||
5d44346be3 | |||
3a754fcd51 | |||
1ef424b139 | |||
034de160bf | |||
f9e28577f3 | |||
8a3aae98f6 | |||
8309f2364b | |||
cac1750078 | |||
137886c316 | |||
27936900e6 | |||
9fe6ac71ea | |||
ef61b549e6 | |||
f1fa00b71b | |||
bf58557fb1 | |||
10cb37f504 | |||
1374c943d4 | |||
a1d80282ec | |||
4eb8bbbebe | |||
d1c6288c5f | |||
dd949bc428 | |||
bb7378cfc3 | |||
f0e084a88c | |||
153672d8ec | |||
08ca338875 | |||
f7cbf82c04 | |||
07009c569a | |||
09f4cdb11e | |||
1e54882f71 | |||
d54807b8c0 | |||
5625b47c7d | |||
1edcf902b7 | |||
e5c19e1fd7 | |||
a11d0a33d1 | |||
4f8b6f26b4 | |||
073525c5b3 | |||
eb6153080a | |||
f7072d1ac2 | |||
fddeb29d6b | |||
a9ec5cf564 | |||
946a8671b9 | |||
a6eeea777b | |||
771a1b8e79 | |||
bfb68e6f02 | |||
77f7737ccc | |||
18c335198a | |||
f9df685cde | |||
17c5b0f152 | |||
5918769f97 | |||
bbaf1ada91 | |||
1950ac9294 | |||
13fa70ac1a | |||
7cb2b11f26 | |||
1184ed29ae | |||
203c7bf6fa | |||
c709883f3f | |||
aed5de4d50 | |||
ba27cc6571 | |||
d856327250 | |||
d75369cb56 | |||
bf973d0d56 | |||
837bf8a5be | |||
c05b2199f6 | |||
a5fe07c077 | |||
b83b2b1415 | |||
3006663b9c | |||
b331be9101 | |||
49c20a9fa8 | |||
7359df3501 | |||
59bd1fe21b | |||
4e907fef2c | |||
67888b657f | |||
74af885d4e | |||
d36d2fb40d | |||
5b9267e88d | |||
15fd4003ef | |||
4b4c2a715b | |||
54a5e6c1d0 | |||
73aeca7dea | |||
ad89abb018 | |||
80c5bce5bb | |||
f68b5de9c8 | |||
d0f3d525d5 | |||
f365a83fae | |||
3a58217405 | |||
c289699d9a | |||
c3b1263e75 | |||
34a9aeb331 | |||
102ea9ae66 | |||
5fa386ddc9 | |||
edabb3577f | |||
ce5df177ee | |||
a0bb8e5b46 | |||
46f88e6d72 | |||
dd8f1ea189 | |||
b61835c1a5 | |||
d9cd4f0273 | |||
459f70e8d4 | |||
061e48fd73 | |||
ab50145001 | |||
b49bec0cec | |||
ae56e556c6 | |||
1cdf999668 | |||
11062fb686 | |||
383ca7d392 | |||
a446d95c33 | |||
be66e7dd95 | |||
6d0d064a6c | |||
bfef525ed2 | |||
0b0cf62193 | |||
7d88198387 | |||
2f619482b8 | |||
d6472eda8d | |||
9e658de238 | |||
bcefdd7c4e | |||
0145685f96 | |||
9d45fca8bc | |||
ac9e6b63c0 | |||
e140b3f802 | |||
d9d3d30cc7 | |||
47a12ec7b5 | |||
ec1e2f7a40 | |||
41f73ec083 | |||
fd367d8bfd | |||
6d0786ff9d | |||
b7f93aeb4d | |||
202a7fe900 | |||
e73e4b4002 | |||
8a3fe60a27 | |||
44051aecd1 | |||
06e6f8de00 | |||
caa6605b43 | |||
522c9248ae | |||
dbe4d7850c | |||
4fe182e5a7 | |||
175f393f9d | |||
7d867a8134 | |||
9939b267d2 | |||
14d53e1c9e | |||
191fbf85fc | |||
8bd869da37 | |||
c7036f6717 | |||
c0485d799d | |||
7abc5613bd | |||
237cfd11ab | |||
a4b7dddb67 | |||
5696781862 | |||
8f4b3049cd | |||
2a6e673a91 | |||
9b6cde173f | |||
9f280b82c4 | |||
c3f0889eda | |||
93650f3a61 | |||
cab4b4d063 | |||
cf4b30b2dd | |||
c51d0b4078 | |||
7a53dc3715 | |||
0f214ad427 | |||
fe4912880d | |||
f038c6babe | |||
169f4b2711 | |||
2d8aff36fe | |||
9fa07eecde | |||
659d7d1a40 | |||
f64fb7bd77 | |||
2a35449b91 | |||
184af5bd05 | |||
097c9637ee | |||
dc6f078246 | |||
8a4714a4a6 | |||
40e119c61c | |||
d9593c4b81 | |||
ac740f73ce | |||
75dc7794b9 | |||
dee68fc728 | |||
a2d3643634 | |||
57002924bc | |||
7b0237b081 | |||
b68ad0cc0b | |||
37263fd9b1 | |||
3d09e3e9e0 | |||
1354b46338 | |||
251a97fe1b | |||
e18929eaa0 | |||
f3b0a92e71 | |||
a0be3f7330 | |||
b5a6e4f1fd | |||
7a788db3dc | |||
f20eceb6cd | |||
38325ebbc6 | |||
b73bd151bb | |||
694b305cab | |||
2d3737a133 | |||
ac1f1838bc | |||
09d09d0fe5 | |||
bf630a6821 | |||
8859a151cc | |||
688a39cfd9 | |||
6f5a5cd9b3 | |||
0933aeefd4 | |||
322f61acee | |||
08e04b9676 | |||
feaa2ac947 | |||
07de925127 | |||
a9c816a268 | |||
e43a8b6b8a | |||
bf729766dd | |||
dafb351d38 | |||
0b707b861c | |||
15e87a4607 | |||
7d7220cbd7 | |||
54e94360ad | |||
0af740dc15 | |||
d2e8372df3 | |||
869b99ec1e | |||
4a29ab0d0a | |||
0165bcb58e | |||
4372d04ad4 | |||
349d75e483 | |||
56abbdf4c2 | |||
af71c63f4c | |||
e51475703a | |||
1feddf4ba6 | |||
600d7ddc2e | |||
e504260f3d | |||
0440d4ce66 | |||
5e4bea8f20 | |||
6ebf9f15b7 | |||
1d7aa673a4 | |||
b9104f3072 | |||
b22eab8c8b | |||
a7d56523ab | |||
9e56c65730 | |||
ef4f2b8c41 | |||
e8b95bd35b | |||
7e35286860 | |||
0486ff8e79 | |||
1e8a2e1621 | |||
7587df831a | |||
e9cc21900f | |||
0a8faac271 | |||
abc4de0fd2 | |||
b672717096 | |||
284ee194b1 | |||
cfe3cd76d1 | |||
3fa5e3109f | |||
8b7049f737 | |||
c85024683e | |||
1300b0b04b | |||
e6d984b484 | |||
1d18d95d4f | |||
ae39ec85a3 | |||
b96daf53a0 | |||
46879e1658 | |||
ae4de94798 | |||
0ab555b4f5 | |||
8e9be9f84f | |||
d572170170 | |||
81b18f843a | |||
a833f88c32 | |||
07b2c1b253 | |||
735cbdb983 | |||
2ad54c5a02 | |||
12ccc73cf5 | |||
3d04dc33c6 | |||
e7564f8330 | |||
91199a8ea0 | |||
0494feec98 | |||
a16b1e134e | |||
2f4cbeb4d5 | |||
769ad578f5 | |||
eaac0044b5 | |||
56042f002c | |||
3bfd1f13e6 | |||
70ab598c96 | |||
1d0ca65e28 | |||
2bc4d0a20e | |||
2490816297 | |||
5f55bca378 | |||
fb7c4fb815 | |||
00bb71e5af | |||
f6aa82b7f2 | |||
22749699a3 | |||
cfed2c1ea0 | |||
b1b15f0b70 | |||
0503c028be | |||
092dcd4e04 | |||
4a8c4ccfba | |||
9b44189d5a | |||
7da4856e8e | |||
aaf1e33a77 | |||
094c3d091a | |||
4b98e524a0 | |||
1a1f6d55f9 | |||
21421656ab | |||
6f687a67cd | |||
b30754e762 | |||
1e429a0d57 | |||
d38a4de36c | |||
ef1b7db374 | |||
53a9aeb965 | |||
e30fa9f4b8 | |||
58e8d0a10d | |||
62cf9cf638 | |||
0fb458879d | |||
725c513d94 | |||
d8648307ff | |||
064315c00b | |||
7c6cc85df6 | |||
a6691ef87c | |||
8e0ced627a | |||
0de314870d | |||
ffb91e53d2 | |||
f4e8bf2858 | |||
927c7ae3ed | |||
05d04ceff8 | |||
8313367a50 | |||
a74c34315c | |||
69470ccc10 | |||
b8b5934193 | |||
75856f2945 | |||
3c112a7a25 | |||
ab3596d4d3 | |||
a8c10b1933 | |||
5c479ce663 | |||
4bf9d65bf8 | |||
3a056c4dff | |||
15e801af3f | |||
b0ba651654 | |||
0ffc235741 | |||
25d4c175c3 | |||
8e19c99c7d | |||
a0bc0ad06f | |||
a8fb2835ca | |||
bc862ce3ab | |||
22f4feee7b | |||
3f858d6755 | |||
3267683e22 | |||
f46a67ffb3 | |||
f7b8383ef5 | |||
10f2872aae | |||
35fa3d1dfd | |||
cd73897b8d | |||
c4435e6beb | |||
7a8f6af5f8 | |||
49a5d9bac7 | |||
2b3fdd4a58 | |||
34502ec471 | |||
8a43e88b4f | |||
d1ece74137 | |||
238df20370 | |||
97a32a6145 | |||
655492a443 | |||
1cab06f6bd | |||
43c817cc67 | |||
f8024c262b | |||
4cc5f01f4a | |||
9c12c37aaf | |||
806eaa0530 | |||
01d0e54594 | |||
5aafa335fe | |||
8ba0494485 | |||
d99d98d9fd | |||
95a017a4ae | |||
92f92379e6 | |||
529e78d43f | |||
4ec746d262 | |||
51bf1501fc | |||
66d819c054 | |||
3f3686f869 | |||
26bb829f8c | |||
67cb04fc66 | |||
a40bd68aed | |||
36495e0fd2 | |||
93f6c15772 | |||
cb93eeff21 | |||
c7cc7e6101 | |||
c349aa6511 | |||
3bae0a2d5c | |||
c1c7566089 | |||
2439999ec8 | |||
1d96f662e3 | |||
41d1889941 | |||
0c3981e0c3 | |||
c727bd4609 | |||
db23749b67 | |||
751f2b9703 | |||
741bc836f6 | |||
697c0603ce | |||
14bedebb11 | |||
a8d7986e1c | |||
8546d01a4c | |||
47b5c07ffb | |||
da86a2bf54 | |||
c1cb60a0b3 | |||
5ed5b4bfbf | |||
de84aacdfd | |||
2888003765 | |||
da06bf5b95 | |||
20999c1370 | |||
33f0ed1a33 | |||
92ec509bfa | |||
50be56433b | |||
e80a87ff7f | |||
43924007db | |||
78ef10e60f | |||
679ae98b14 | |||
90f6bc16bb | |||
9b5b639546 | |||
945767c6d8 | |||
422cdf4979 | |||
38db174f3b | |||
92e364a35f | |||
58299b8ba2 | |||
124bf4d829 | |||
e8e56b3414 | |||
89c430136d | |||
ea9aef7baa | |||
c9e9e8061d | |||
453cf2a1c6 | |||
de7bbfa5f9 | |||
867fe93018 | |||
09651c3326 | |||
dda8d77c87 | |||
aa29f4346a | |||
f87f2a3f8b | |||
86116dbed6 | |||
7bd31e3f7c | |||
74f451715f | |||
655be8ed76 | |||
4063238943 | |||
3344788fa1 | |||
99220f6531 | |||
2a6d093749 | |||
c947947fad | |||
f555b50547 | |||
a07556dd5f | |||
1407418755 | |||
a6a0da873f | |||
90ec6eda0c | |||
fe8d625694 | |||
53e76b41d2 | |||
5e477ec553 | |||
f80a847aef | |||
93cb5d4e97 | |||
9e48b7dfda | |||
7b03d8d087 | |||
4b759b8f2a | |||
8c540333d5 | |||
d0c2c9c71f | |||
c8cafa77ca | |||
ff4e54ef80 | |||
a3bcad3804 | |||
cd1bd921bd | |||
5a5b66292b | |||
fff5751b1a | |||
2c81696fdd | |||
c9dc22efa1 | |||
0ab04a000f | |||
e63be32ad2 | |||
6aa106d906 | |||
33d59c8869 | |||
a833fd8dbf | |||
4c1ea8677e | |||
120fb59978 | |||
fd56b3ff38 | |||
0ec6829edc | |||
18b7845b7b | |||
3d0fe15374 | |||
91886068fe | |||
e9712bc7fb | |||
6d1e9e5f92 | |||
b640230b1e | |||
038b6ee9cd | |||
38806343a8 | |||
831ca4e3bf | |||
b3dede4dd3 | |||
4e34132f4d | |||
c07cb10247 | |||
d7767a2a62 | |||
ec035983fd | |||
596dcd85b2 | |||
7270c6a150 | |||
f8b9ad7d50 | |||
04a1959895 | |||
93cc270016 | |||
29b60f7e1a | |||
902afcfbaf | |||
97a6b61551 | |||
f011bdb869 | |||
bafb101e4f | |||
08fdf05528 | |||
9e72a6b22e | |||
1c12c5612c | |||
a8193c4bcb | |||
c3d7ec65fa | |||
8b6a6c8236 | |||
e0571c872b | |||
c67f41887b | |||
84687ccf1f | |||
3274561cf8 | |||
e08fbb3771 | |||
d7464aa0fe | |||
00d29153f0 | |||
2ce989f220 | |||
d7a1dc85be | |||
fc19503673 | |||
beba824136 | |||
6ebf8b12b6 | |||
e5a7ed4362 | |||
b9f7ea47c3 | |||
06f7ee202e | |||
2b2fc6453f | |||
bdd2765461 | |||
4a45c06dd7 | |||
d6a7d7d1e0 | |||
1a122a0dd8 | |||
20e20733e8 | |||
b7cd1a19e3 | |||
f510002a62 | |||
eedcaf6470 | |||
1e257a1251 | |||
522f6bf91a | |||
d35d87d2c2 | |||
74a5cda84b | |||
5be05d85b8 | |||
35ac85aea8 | |||
fa237401ff | |||
97053adcb5 | |||
f8fbe4d7a3 | |||
ef31c012bf | |||
9e9f621d5d | |||
651e1a7cbc | |||
c4d3672720 | |||
16be6d378c | |||
f05d0565aa | |||
b39f0d1fb6 | |||
9f1267dfe6 | |||
2e90285232 | |||
e254de982e | |||
28d99b5297 | |||
9bf4108d1f | |||
ee93f0218b | |||
6929a84c70 | |||
5c779a789b | |||
161ed102a5 | |||
e863a948e3 | |||
f65a585236 | |||
977f34dca6 | |||
90ad956340 | |||
7996f06335 | |||
7b40a3e3e5 | |||
f7fbbaaca3 | |||
17629b8d9e | |||
0baa20d292 | |||
4571c918a4 | |||
5251ea4d30 | |||
7f456b4173 | |||
ae99e99da2 | |||
c291ef77b5 | |||
7dd2764bb2 | |||
244f8fb6dc | |||
f3ca29af6c | |||
37988221a8 | |||
27dfe816fa | |||
af29be2c90 | |||
f96fac0aee | |||
7a327a3f28 | |||
07f2ebea1b | |||
851f2ad8ef | |||
23e0561dd6 | |||
8ae1a95ec6 | |||
82b7d4eaf0 | |||
78774fbdc0 | |||
924130833e | |||
0157274762 | |||
87e8aad5a0 | |||
c6f59c2933 | |||
b7f90aa011 | |||
92f8950a56 | |||
65987a8a58 | |||
889d828bc2 | |||
f22b79da8f | |||
3855673ebf | |||
4db82da0db | |||
0cdc3d2fa5 | |||
ad98b6193d | |||
fc760016b3 | |||
2da86f7dae | |||
0dfda4bb90 | |||
1189ebc8b5 | |||
97843e2b58 | |||
82b3f54697 | |||
1bb8578173 | |||
673994b281 | |||
bbc0eff078 | |||
4c60e31070 | |||
afbf7d4c37 | |||
8c3cc32364 | |||
5214846341 | |||
4c3fd9fa3f | |||
17b3a10d46 | |||
149a46b92c | |||
ce1a115e0b | |||
db9c28a773 | |||
9ac3ac41df | |||
2af9ab9034 | |||
6f1ea96293 | |||
2e3c5890b6 | |||
bc6678732f | |||
b10ae00c8a | |||
0cd6b1858c | |||
0bd296dda4 | |||
af0ccdd8e9 | |||
2fb92dbc6e | |||
5c74b6028b | |||
e0be2b6e6c | |||
ef72f322d2 | |||
7bc2065113 | |||
2bd4233919 | |||
143c70e29f | |||
b812d5e39c | |||
01480da0a8 | |||
6ad73145bc | |||
f7293f2ddb | |||
62749d05a6 | |||
3834feb4b7 | |||
6b8ee7bae0 | |||
739c2308b5 | |||
454302414d | |||
a71b69389b | |||
d49e502f53 | |||
92ec3404f8 | |||
f4ebea3381 | |||
cf167d0cd1 | |||
6f8b771a37 | |||
4e1ffdd17c | |||
a783282b8b | |||
19b85d8486 | |||
c363bdd784 | |||
c30d96ea50 | |||
7ffe17ada1 | |||
330a9b3f4c | |||
28ff66a381 | |||
78c7bcee36 | |||
00a7b95631 | |||
94d8321d01 | |||
ac24cc9f99 | |||
1d666771f9 | |||
d50055cd96 | |||
3ab4c8c0bb | |||
47c7159177 | |||
f415db583a | |||
f55c16f984 | |||
df67e013ca | |||
3e990c9d0a | |||
4b740fc8fd | |||
cccd14b09e | |||
e6acffdfc2 | |||
26d124283e | |||
0d889b7041 | |||
ab31ad006a | |||
392130a537 | |||
deef2673b2 | |||
977b0a6dd9 | |||
977d844394 | |||
6e4a06e180 | |||
590675e2ca | |||
8c65bdf6d3 | |||
74f1ed3bc5 | |||
79270ef510 | |||
e250e6b7bb | |||
261342c15f | |||
eda4dd622e | |||
c68a2b9637 | |||
293df6cd20 | |||
65f61bb3bf | |||
26b9740d53 | |||
6eb873dd96 | |||
11b4c80b27 | |||
c065e454c3 | |||
d9b5fbd374 | |||
cfbc1a26b8 | |||
257f69f931 | |||
e415260961 | |||
446c768cd3 |
8
.gitignore
vendored
8
.gitignore
vendored
@ -92,6 +92,7 @@ build*/*
|
||||
#####################
|
||||
*.xcodeproj/*
|
||||
build.sh
|
||||
.vscode
|
||||
|
||||
# Eigen source #
|
||||
################
|
||||
@ -106,6 +107,10 @@ lib/fftw/*
|
||||
m4/lt*
|
||||
m4/libtool.m4
|
||||
|
||||
# github pages #
|
||||
################
|
||||
gh-pages/
|
||||
|
||||
# Buck files #
|
||||
##############
|
||||
.buck*
|
||||
@ -116,4 +121,5 @@ make-bin-BUCK.sh
|
||||
# generated sources #
|
||||
#####################
|
||||
lib/qcd/spin/gamma-gen/*.h
|
||||
lib/qcd/spin/gamma-gen/*.cc
|
||||
lib/qcd/spin/gamma-gen/*.cc
|
||||
|
||||
|
67
.travis.yml
67
.travis.yml
@ -9,62 +9,6 @@ matrix:
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
- compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.9
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: VERSION=-4.9
|
||||
- compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-5
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: VERSION=-5
|
||||
- compiler: clang
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.8
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||
- compiler: clang
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.8
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||
|
||||
before_install:
|
||||
- export GRIDDIR=`pwd`
|
||||
@ -78,6 +22,10 @@ install:
|
||||
- export CC=$CC$VERSION
|
||||
- export CXX=$CXX$VERSION
|
||||
- echo $PATH
|
||||
- which autoconf
|
||||
- autoconf --version
|
||||
- which automake
|
||||
- automake --version
|
||||
- which $CC
|
||||
- $CC --version
|
||||
- which $CXX
|
||||
@ -95,9 +43,4 @@ script:
|
||||
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- echo make clean
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make -j4; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||
|
||||
|
||||
- make check
|
||||
|
11
Makefile.am
11
Makefile.am
@ -3,10 +3,15 @@ SUBDIRS = lib benchmarks tests extras
|
||||
|
||||
include $(top_srcdir)/doxygen.inc
|
||||
|
||||
tests: all
|
||||
$(MAKE) -C tests tests
|
||||
bin_SCRIPTS=grid-config
|
||||
|
||||
.PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
|
||||
|
||||
.PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
|
||||
|
||||
tests-local: all
|
||||
bench-local: all
|
||||
check-local: all
|
||||
|
||||
AM_CXXFLAGS += -I$(top_builddir)/include
|
||||
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
|
302
README.md
302
README.md
@ -1,41 +1,13 @@
|
||||
# Grid
|
||||
<table>
|
||||
<tr>
|
||||
<td>Last stable release</td>
|
||||
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Development branch</td>
|
||||
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
# Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid)
|
||||
|
||||
**Data parallel C++ mathematical object library.**
|
||||
|
||||
License: GPL v2.
|
||||
|
||||
Last update Nov 2016.
|
||||
Last update June 2017.
|
||||
|
||||
_Please do not send pull requests to the `master` branch which is reserved for releases._
|
||||
|
||||
### Bug report
|
||||
|
||||
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
|
||||
|
||||
When you file an issue, please go though the following checklist:
|
||||
|
||||
1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.
|
||||
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
|
||||
3. Give the exact `configure` command used.
|
||||
4. Attach `config.log`.
|
||||
5. Attach `config.summary`.
|
||||
6. Attach the output of `make V=1`.
|
||||
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
|
||||
|
||||
|
||||
|
||||
### Description
|
||||
@ -58,13 +30,68 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
|
||||
for most programmers.
|
||||
|
||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
||||
Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way).
|
||||
Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported.
|
||||
|
||||
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers.
|
||||
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types.
|
||||
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
|
||||
|
||||
MPI, OpenMP, and SIMD parallelism are present in the library.
|
||||
Please see https://arxiv.org/abs/1512.03487 for more detail.
|
||||
Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail.
|
||||
|
||||
|
||||
### Compilers
|
||||
|
||||
Intel ICPC v16.0.3 and later
|
||||
|
||||
Clang v3.5 and later (need 3.8 and later for OpenMP)
|
||||
|
||||
GCC v4.9.x (recommended)
|
||||
|
||||
GCC v6.3 and later
|
||||
|
||||
### Important:
|
||||
|
||||
Some versions of GCC appear to have a bug under high optimisation (-O2, -O3).
|
||||
|
||||
The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates.
|
||||
|
||||
GCC v5.x
|
||||
|
||||
GCC v6.1, v6.2
|
||||
|
||||
### Bug report
|
||||
|
||||
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
|
||||
|
||||
When you file an issue, please go though the following checklist:
|
||||
|
||||
1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.
|
||||
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
|
||||
3. Give the exact `configure` command used.
|
||||
4. Attach `config.log`.
|
||||
5. Attach `grid.config.summary`.
|
||||
6. Attach the output of `make V=1`.
|
||||
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
|
||||
|
||||
### Required libraries
|
||||
Grid requires:
|
||||
|
||||
[GMP](https://gmplib.org/),
|
||||
|
||||
[MPFR](http://www.mpfr.org/)
|
||||
|
||||
Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library.
|
||||
|
||||
Grid optionally uses:
|
||||
|
||||
[HDF5](https://support.hdfgroup.org/HDF5/)
|
||||
|
||||
[LIME](http://usqcd-software.github.io/c-lime/) for ILDG and SciDAC file format support.
|
||||
|
||||
[FFTW](http://www.fftw.org) either generic version or via the Intel MKL library.
|
||||
|
||||
LAPACK either generic version or Intel MKL library.
|
||||
|
||||
|
||||
### Quick start
|
||||
First, start by cloning the repository:
|
||||
@ -95,10 +122,10 @@ install Grid. Other options are detailed in the next section, you can also use `
|
||||
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
|
||||
customise the build.
|
||||
|
||||
Finally, you can build and install Grid:
|
||||
Finally, you can build, check, and install Grid:
|
||||
|
||||
``` bash
|
||||
make; make install
|
||||
make; make check; make install
|
||||
```
|
||||
|
||||
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
|
||||
@ -121,7 +148,7 @@ If you want to build all the tests at once just use `make tests`.
|
||||
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
|
||||
- `--enable-precision={single|double}`: set the default precision (default: `double`).
|
||||
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
||||
- `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `).
|
||||
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
|
||||
- `--disable-timers`: disable system dependent high-resolution timers.
|
||||
- `--enable-chroma`: enable Chroma regression tests.
|
||||
- `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`)
|
||||
@ -135,7 +162,6 @@ The following options can be use with the `--enable-comms=` option to target dif
|
||||
| `none` | no communications |
|
||||
| `mpi[-auto]` | MPI communications |
|
||||
| `mpi3[-auto]` | MPI communications using MPI 3 shared memory |
|
||||
| `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model |
|
||||
| `shmem ` | Cray SHMEM communications |
|
||||
|
||||
For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.
|
||||
@ -153,13 +179,13 @@ The following options can be use with the `--enable-simd=` option to target diff
|
||||
| `AVXFMA4` | AVX (256 bit) + FMA4 |
|
||||
| `AVX2` | AVX 2 (256 bit) |
|
||||
| `AVX512` | AVX 512 bit |
|
||||
| `QPX` | QPX (256 bit) |
|
||||
| `NEONv8` | [ARM NEON](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch07s03.html) (128 bit) |
|
||||
| `QPX` | IBM QPX (256 bit) |
|
||||
|
||||
Alternatively, some CPU codenames can be directly used:
|
||||
|
||||
| `<code>` | Description |
|
||||
| ----------- | -------------------------------------- |
|
||||
| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
|
||||
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
|
||||
| `BGQ` | Blue Gene/Q |
|
||||
|
||||
@ -176,21 +202,205 @@ The following configuration is recommended for the Intel Knights Landing platfor
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
--enable-comms=mpi-auto \
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-mkl \
|
||||
CXX=icpc MPICXX=mpiicpc
|
||||
```
|
||||
The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
||||
|
||||
where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
--enable-comms=mpi \
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
```
|
||||
```
|
||||
|
||||
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
|
||||
``` bash
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
```
|
||||
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
|
||||
Knight's Landing with Intel Omnipath adapters with two adapters per node
|
||||
presently performs better with use of more than one rank per node, using shared memory
|
||||
for interior communication. This is the mpi3 communications implementation.
|
||||
We recommend four ranks per node for best performance, but optimum is local volume dependent.
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
--enable-comms=mpi3-auto \
|
||||
--enable-mkl \
|
||||
CC=icpc MPICXX=mpiicpc
|
||||
```
|
||||
|
||||
### Build setup for Intel Haswell Xeon platform
|
||||
|
||||
The following configuration is recommended for the Intel Haswell platform:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
--enable-comms=mpi3-auto \
|
||||
--enable-mkl \
|
||||
CXX=icpc MPICXX=mpiicpc
|
||||
```
|
||||
The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
||||
|
||||
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
|
||||
``` bash
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
```
|
||||
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
```
|
||||
Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of
|
||||
one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using
|
||||
```
|
||||
export I_MPI_PIN=1
|
||||
```
|
||||
This is the default.
|
||||
|
||||
### Build setup for Intel Skylake Xeon platform
|
||||
|
||||
The following configuration is recommended for the Intel Skylake platform:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX512 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=mpiicpc
|
||||
```
|
||||
The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
||||
|
||||
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
|
||||
``` bash
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
```
|
||||
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX512 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
```
|
||||
Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of
|
||||
one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using
|
||||
```
|
||||
export I_MPI_PIN=1
|
||||
```
|
||||
This is the default.
|
||||
|
||||
#### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping):
|
||||
|
||||
mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18
|
||||
|
||||
TBA
|
||||
|
||||
|
||||
### Build setup for AMD EPYC / RYZEN
|
||||
|
||||
The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores.
|
||||
So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total
|
||||
are common. Each chip within the module exposes a separate NUMA domain.
|
||||
There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain.
|
||||
MPI-3 is recommended with the use of four ranks per socket,
|
||||
and 8 threads per rank.
|
||||
|
||||
The following configuration is recommended for the AMD EPYC platform.
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
--enable-comms=mpi3 \
|
||||
CXX=mpicxx
|
||||
```
|
||||
|
||||
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
|
||||
``` bash
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
```
|
||||
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
|
||||
Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank.
|
||||
This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this.
|
||||
|
||||
It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and
|
||||
shared memory to communicate within this node:
|
||||
|
||||
mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4
|
||||
|
||||
Where omp_bind.sh does the following:
|
||||
```
|
||||
#!/bin/bash
|
||||
|
||||
numanode=` expr $PMI_RANK % 8 `
|
||||
basecore=`expr $numanode \* 16`
|
||||
core0=`expr $basecore + 0 `
|
||||
core1=`expr $basecore + 2 `
|
||||
core2=`expr $basecore + 4 `
|
||||
core3=`expr $basecore + 6 `
|
||||
core4=`expr $basecore + 8 `
|
||||
core5=`expr $basecore + 10 `
|
||||
core6=`expr $basecore + 12 `
|
||||
core7=`expr $basecore + 14 `
|
||||
|
||||
export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7"
|
||||
echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY
|
||||
|
||||
$@
|
||||
```
|
||||
|
||||
Performance:
|
||||
|
||||
#### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping):
|
||||
|
||||
mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4
|
||||
|
||||
TBA
|
||||
|
||||
### Build setup for BlueGene/Q
|
||||
|
||||
To be written...
|
||||
|
||||
### Build setup for ARM Neon
|
||||
|
||||
To be written...
|
||||
|
||||
### Build setup for laptops, other compilers, non-cluster builds
|
||||
|
||||
Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX),
|
||||
and omit the enable-mkl flag.
|
||||
|
||||
Single node builds are enabled with
|
||||
```
|
||||
--enable-comms=none
|
||||
```
|
||||
|
||||
FFTW support that is not in the default search path may then enabled with
|
||||
```
|
||||
--with-fftw=<installpath>
|
||||
```
|
||||
|
||||
BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation.
|
||||
|
||||
|
33
TODO
33
TODO
@ -1,23 +1,32 @@
|
||||
TODO:
|
||||
---------------
|
||||
|
||||
Peter's work list:
|
||||
2)- Precision conversion and sort out localConvert <--
|
||||
3)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started
|
||||
4)- Binary I/O speed up & x-strips
|
||||
-- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet
|
||||
-- Physical propagator interface
|
||||
-- Conserved currents
|
||||
-- GaugeFix into central location
|
||||
-- Multigrid Wilson and DWF, compare to other Multigrid implementations
|
||||
-- HDCR resume
|
||||
Large item work list:
|
||||
|
||||
1)- BG/Q port and check ; Andrew says ok.
|
||||
2)- Christoph's local basis expansion Lanczos
|
||||
--
|
||||
3a)- RNG I/O in ILDG/SciDAC (minor)
|
||||
3b)- Precision conversion and sort out localConvert <-- partial/easy
|
||||
3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
|
||||
4)- Physical propagator interface
|
||||
5)- Conserved currents
|
||||
6)- Multigrid Wilson and DWF, compare to other Multigrid implementations
|
||||
7)- HDCR resume
|
||||
|
||||
Recent DONE
|
||||
-- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O ; <-- DONE ; bmark cori
|
||||
-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE
|
||||
-- GaugeFix into central location <-- DONE
|
||||
-- Scidac and Ildg metadata handling <-- DONE
|
||||
-- Binary I/O MPI2 IO <-- DONE
|
||||
-- Binary I/O speed up & x-strips <-- DONE
|
||||
-- Cut down the exterior overhead <-- DONE
|
||||
-- Interior legs from SHM comms <-- DONE
|
||||
-- Half-precision comms <-- DONE
|
||||
-- Merge high precision reduction into develop
|
||||
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination
|
||||
-- Merge high precision reduction into develop <-- DONE
|
||||
-- BlockCG, BCGrQ <-- DONE
|
||||
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE
|
||||
-- slice* linalg routines for multiRHS, BlockCG
|
||||
|
||||
-----
|
||||
|
9
VERSION
9
VERSION
@ -1,6 +1,5 @@
|
||||
Version : 0.6.0
|
||||
Version : 0.7.0
|
||||
|
||||
- AVX512, AVX2, AVX, SSE good
|
||||
- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
|
||||
- MPI and MPI3
|
||||
- HiRep, Smearing, Generic gauge group
|
||||
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
|
||||
- MPI and MPI3 comms optimisations for KNL and OPA finished
|
||||
- Half precision comms
|
||||
|
800
benchmarks/Benchmark_ITT.cc
Normal file
800
benchmarks/Benchmark_ITT.cc
Normal file
@ -0,0 +1,800 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./benchmarks/Benchmark_memory_bandwidth.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
|
||||
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
|
||||
|
||||
|
||||
std::vector<int> L_list;
|
||||
std::vector<int> Ls_list;
|
||||
std::vector<double> mflop_list;
|
||||
|
||||
double mflop_ref;
|
||||
double mflop_ref_err;
|
||||
|
||||
int NN_global;
|
||||
|
||||
struct time_statistics{
|
||||
double mean;
|
||||
double err;
|
||||
double min;
|
||||
double max;
|
||||
|
||||
void statistics(std::vector<double> v){
|
||||
double sum = std::accumulate(v.begin(), v.end(), 0.0);
|
||||
mean = sum / v.size();
|
||||
|
||||
std::vector<double> diff(v.size());
|
||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
||||
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
||||
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
|
||||
|
||||
auto result = std::minmax_element(v.begin(), v.end());
|
||||
min = *result.first;
|
||||
max = *result.second;
|
||||
}
|
||||
};
|
||||
|
||||
void comms_header(){
|
||||
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
|
||||
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
|
||||
};
|
||||
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT
|
||||
};
|
||||
struct controls {
|
||||
int Opt;
|
||||
int CommsOverlap;
|
||||
Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch;
|
||||
// int HugePages;
|
||||
};
|
||||
|
||||
class Benchmark {
|
||||
public:
|
||||
static void Decomposition (void ) {
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
|
||||
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tMPI tasks : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvReal : "<<sizeof(vReal )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vReal::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvRealF : "<<sizeof(vRealF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvRealD : "<<sizeof(vRealD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvComplex : "<<sizeof(vComplex )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplex::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
static void Comms(void)
|
||||
{
|
||||
int Nloop=200;
|
||||
int nmu=0;
|
||||
int maxlat=32;
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
|
||||
|
||||
std::vector<double> t_time(Nloop);
|
||||
time_statistics timestat;
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
comms_header();
|
||||
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
lat*mpi_layout[2],
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
Grid.ShmBufferFreeAll();
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
}
|
||||
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
int ncomm;
|
||||
double dbytes;
|
||||
std::vector<double> times(Nloop);
|
||||
for(int i=0;i<Nloop;i++){
|
||||
|
||||
double start=usecond();
|
||||
|
||||
dbytes=0;
|
||||
ncomm=0;
|
||||
|
||||
parallel_for(int dir=0;dir<8;dir++){
|
||||
|
||||
double tbytes;
|
||||
int mu =dir % 4;
|
||||
|
||||
if (mpi_layout[mu]>1 ) {
|
||||
|
||||
int xmit_to_rank;
|
||||
int recv_from_rank;
|
||||
if ( dir == mu ) {
|
||||
int comm_proc=1;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
} else {
|
||||
int comm_proc = mpi_layout[mu]-1;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
}
|
||||
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||
(void *)&rbuf[dir][0], recv_from_rank,
|
||||
bytes,dir);
|
||||
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp atomic
|
||||
#endif
|
||||
ncomm++;
|
||||
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp atomic
|
||||
#endif
|
||||
dbytes+=tbytes;
|
||||
}
|
||||
}
|
||||
Grid.Barrier();
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
}
|
||||
|
||||
timestat.statistics(t_time);
|
||||
// for(int i=0;i<t_time.size();i++){
|
||||
// std::cout << i<<" "<<t_time[i]<<std::endl;
|
||||
// }
|
||||
|
||||
dbytes=dbytes*ppn;
|
||||
double xbytes = dbytes*0.5;
|
||||
double rbytes = dbytes*0.5;
|
||||
double bidibytes = dbytes;
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void Memory(void)
|
||||
{
|
||||
const int Nvec=8;
|
||||
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
||||
typedef iVector<vReal,Nvec> Vec;
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
uint64_t NP;
|
||||
uint64_t NN;
|
||||
|
||||
|
||||
uint64_t lmax=48;
|
||||
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
||||
|
||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
for(int lat=8;lat<=lmax;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
NP= Grid.RankCount();
|
||||
NN =Grid.NodeCount();
|
||||
|
||||
Vec rn ; random(sRNG,rn);
|
||||
|
||||
LatticeVec z(&Grid); z=rn;
|
||||
LatticeVec x(&Grid); x=rn;
|
||||
LatticeVec y(&Grid); y=rn;
|
||||
double a=2.0;
|
||||
|
||||
uint64_t Nloop=NLOOP;
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
z=a*x-y;
|
||||
x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away
|
||||
y._odata[4]=z._odata[4];
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
double bytes=3.0*vol*Nvec*sizeof(Real);
|
||||
std::cout<<GridLogMessage<<std::setprecision(3)
|
||||
<< lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.
|
||||
<< "\t\t"<< bytes/time/NN <<std::endl;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
static double DWF5(int Ls,int L)
|
||||
{
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
|
||||
double mflops;
|
||||
double mflops_best = 0;
|
||||
double mflops_worst= 0;
|
||||
std::vector<double> mflops_all;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Set/Get the layout & grid size
|
||||
///////////////////////////////////////////////////////
|
||||
int threads = GridThread::GetThreads();
|
||||
std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4);
|
||||
std::vector<int> local({L,L,L,L});
|
||||
|
||||
GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
uint64_t NP = TmpGrid->RankCount();
|
||||
uint64_t NN = TmpGrid->NodeCount();
|
||||
NN_global=NN;
|
||||
uint64_t SHM=NP/NN;
|
||||
|
||||
std::vector<int> internal;
|
||||
if ( SHM == 1 ) internal = std::vector<int>({1,1,1,1});
|
||||
else if ( SHM == 2 ) internal = std::vector<int>({2,1,1,1});
|
||||
else if ( SHM == 4 ) internal = std::vector<int>({2,2,1,1});
|
||||
else if ( SHM == 8 ) internal = std::vector<int>({2,2,2,1});
|
||||
else assert(0);
|
||||
|
||||
std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]});
|
||||
std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]});
|
||||
|
||||
///////// Welcome message ////////////
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "Benchmark DWF Ls vec on "<<L<<"^4 local volume "<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
|
||||
std::cout<<GridLogMessage << "* MPI ranks : "<<GridCmdVectorIntToString(mpi)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Intranode : "<<GridCmdVectorIntToString(internal)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* nodes : "<<GridCmdVectorIntToString(nodes)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
///////// Lattice Init ////////////
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
|
||||
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||
|
||||
///////// RNG Init ////////////
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG RNG5(sFGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||
|
||||
///////// Source preparation ////////////
|
||||
LatticeFermion src (sFGrid); random(RNG5,src);
|
||||
LatticeFermion tmp (sFGrid);
|
||||
|
||||
RealD N2 = 1.0/::sqrt(norm2(src));
|
||||
src = src*N2;
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
|
||||
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
||||
LatticeFermion src_e (sFrbGrid);
|
||||
LatticeFermion src_o (sFrbGrid);
|
||||
LatticeFermion r_e (sFrbGrid);
|
||||
LatticeFermion r_o (sFrbGrid);
|
||||
LatticeFermion r_eo (sFGrid);
|
||||
LatticeFermion err (sFGrid);
|
||||
{
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
|
||||
#if defined(AVX512)
|
||||
const int num_cases = 6;
|
||||
std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O ");
|
||||
#else
|
||||
const int num_cases = 4;
|
||||
std::string fmt("U/S ; U/O ; G/S ; G/O ");
|
||||
#endif
|
||||
controls Cases [] = {
|
||||
#ifdef AVX512
|
||||
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
#endif
|
||||
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }
|
||||
};
|
||||
|
||||
for(int c=0;c<num_cases;c++) {
|
||||
|
||||
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
|
||||
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
|
||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
int nwarm = 100;
|
||||
uint64_t ncall = 1000;
|
||||
|
||||
double t0=usecond();
|
||||
sFGrid->Barrier();
|
||||
for(int i=0;i<nwarm;i++){
|
||||
sDw.DhopEO(src_o,r_e,DaggerNo);
|
||||
}
|
||||
sFGrid->Barrier();
|
||||
double t1=usecond();
|
||||
|
||||
sDw.ZeroCounters();
|
||||
time_statistics timestat;
|
||||
std::vector<double> t_time(ncall);
|
||||
for(uint64_t i=0;i<ncall;i++){
|
||||
t0=usecond();
|
||||
sDw.DhopEO(src_o,r_e,DaggerNo);
|
||||
t1=usecond();
|
||||
t_time[i] = t1-t0;
|
||||
}
|
||||
sFGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=(1344.0*volume)/2;
|
||||
double mf_hi, mf_lo, mf_err;
|
||||
|
||||
timestat.statistics(t_time);
|
||||
mf_hi = flops/timestat.min;
|
||||
mf_lo = flops/timestat.max;
|
||||
mf_err= flops/timestat.min * timestat.err/timestat.mean;
|
||||
|
||||
mflops = flops/timestat.mean;
|
||||
mflops_all.push_back(mflops);
|
||||
if ( mflops_best == 0 ) mflops_best = mflops;
|
||||
if ( mflops_worst== 0 ) mflops_worst= mflops;
|
||||
if ( mflops>mflops_best ) mflops_best = mflops;
|
||||
if ( mflops<mflops_worst) mflops_worst= mflops;
|
||||
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per rank "<< mflops/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per node "<< mflops/NN<<std::endl;
|
||||
|
||||
sDw.Report();
|
||||
|
||||
}
|
||||
double robust = mflops_worst/mflops_best;;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best mflop/s = "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
|
||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s = "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness = "<< robust <<std::endl;
|
||||
std::cout<<GridLogMessage <<fmt << std::endl;
|
||||
std::cout<<GridLogMessage;
|
||||
|
||||
for(int i=0;i<mflops_all.size();i++){
|
||||
std::cout<<mflops_all[i]/NN<<" ; " ;
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
}
|
||||
return mflops_best;
|
||||
}
|
||||
|
||||
static double DWF(int Ls,int L, double & robust)
|
||||
{
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
|
||||
double mflops;
|
||||
double mflops_best = 0;
|
||||
double mflops_worst= 0;
|
||||
std::vector<double> mflops_all;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Set/Get the layout & grid size
|
||||
///////////////////////////////////////////////////////
|
||||
int threads = GridThread::GetThreads();
|
||||
std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4);
|
||||
std::vector<int> local({L,L,L,L});
|
||||
|
||||
GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
uint64_t NP = TmpGrid->RankCount();
|
||||
uint64_t NN = TmpGrid->NodeCount();
|
||||
NN_global=NN;
|
||||
uint64_t SHM=NP/NN;
|
||||
|
||||
std::vector<int> internal;
|
||||
if ( SHM == 1 ) internal = std::vector<int>({1,1,1,1});
|
||||
else if ( SHM == 2 ) internal = std::vector<int>({2,1,1,1});
|
||||
else if ( SHM == 4 ) internal = std::vector<int>({2,2,1,1});
|
||||
else if ( SHM == 8 ) internal = std::vector<int>({2,2,2,1});
|
||||
else assert(0);
|
||||
|
||||
std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]});
|
||||
std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]});
|
||||
|
||||
///////// Welcome message ////////////
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
|
||||
std::cout<<GridLogMessage << "* MPI ranks : "<<GridCmdVectorIntToString(mpi)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Intranode : "<<GridCmdVectorIntToString(internal)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* nodes : "<<GridCmdVectorIntToString(nodes)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
|
||||
///////// Lattice Init ////////////
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
|
||||
///////// RNG Init ////////////
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||
|
||||
///////// Source preparation ////////////
|
||||
LatticeFermion src (FGrid); random(RNG5,src);
|
||||
LatticeFermion ref (FGrid);
|
||||
LatticeFermion tmp (FGrid);
|
||||
|
||||
RealD N2 = 1.0/::sqrt(norm2(src));
|
||||
src = src*N2;
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
|
||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
|
||||
////////////////////////////////////
|
||||
// Naive wilson implementation
|
||||
////////////////////////////////////
|
||||
{
|
||||
LatticeGaugeField Umu5d(FGrid);
|
||||
std::vector<LatticeColourMatrix> U(4,FGrid);
|
||||
for(int ss=0;ss<Umu._grid->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
|
||||
}
|
||||
}
|
||||
ref = zero;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
||||
}
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
||||
|
||||
tmp =adj(U[mu])*src;
|
||||
tmp =Cshift(tmp,mu+1,-1);
|
||||
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
|
||||
}
|
||||
ref = -0.5*ref;
|
||||
}
|
||||
|
||||
LatticeFermion src_e (FrbGrid);
|
||||
LatticeFermion src_o (FrbGrid);
|
||||
LatticeFermion r_e (FrbGrid);
|
||||
LatticeFermion r_o (FrbGrid);
|
||||
LatticeFermion r_eo (FGrid);
|
||||
LatticeFermion err (FGrid);
|
||||
{
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
|
||||
#if defined(AVX512)
|
||||
const int num_cases = 6;
|
||||
std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O ");
|
||||
#else
|
||||
const int num_cases = 4;
|
||||
std::string fmt("U/S ; U/O ; G/S ; G/O ");
|
||||
#endif
|
||||
controls Cases [] = {
|
||||
#ifdef AVX512
|
||||
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
#endif
|
||||
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }
|
||||
};
|
||||
|
||||
for(int c=0;c<num_cases;c++) {
|
||||
|
||||
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
|
||||
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
|
||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
int nwarm = 200;
|
||||
double t0=usecond();
|
||||
FGrid->Barrier();
|
||||
for(int i=0;i<nwarm;i++){
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
}
|
||||
FGrid->Barrier();
|
||||
double t1=usecond();
|
||||
// uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0);
|
||||
// if (ncall < 500) ncall = 500;
|
||||
uint64_t ncall = 1000;
|
||||
|
||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||
|
||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||
Dw.ZeroCounters();
|
||||
|
||||
time_statistics timestat;
|
||||
std::vector<double> t_time(ncall);
|
||||
for(uint64_t i=0;i<ncall;i++){
|
||||
t0=usecond();
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
t1=usecond();
|
||||
t_time[i] = t1-t0;
|
||||
}
|
||||
FGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=(1344.0*volume)/2;
|
||||
double mf_hi, mf_lo, mf_err;
|
||||
|
||||
timestat.statistics(t_time);
|
||||
mf_hi = flops/timestat.min;
|
||||
mf_lo = flops/timestat.max;
|
||||
mf_err= flops/timestat.min * timestat.err/timestat.mean;
|
||||
|
||||
mflops = flops/timestat.mean;
|
||||
mflops_all.push_back(mflops);
|
||||
if ( mflops_best == 0 ) mflops_best = mflops;
|
||||
if ( mflops_worst== 0 ) mflops_worst= mflops;
|
||||
if ( mflops>mflops_best ) mflops_best = mflops;
|
||||
if ( mflops<mflops_worst) mflops_worst= mflops;
|
||||
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
|
||||
|
||||
Dw.Report();
|
||||
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
Dw.DhopOE(src_e,r_o,DaggerNo);
|
||||
setCheckerboard(r_eo,r_o);
|
||||
setCheckerboard(r_eo,r_e);
|
||||
err = r_eo-ref;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
assert((norm2(err)<1.0e-4));
|
||||
|
||||
}
|
||||
robust = mflops_worst/mflops_best;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best mflop/s = "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
|
||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s = "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed<<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness = "<< robust <<std::endl;
|
||||
std::cout<<GridLogMessage <<fmt << std::endl;
|
||||
std::cout<<GridLogMessage ;
|
||||
|
||||
for(int i=0;i<mflops_all.size();i++){
|
||||
std::cout<<mflops_all[i]/NN<<" ; " ;
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
}
|
||||
return mflops_best;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
|
||||
#ifdef KNL
|
||||
LebesgueOrder::Block = std::vector<int>({8,2,2,2});
|
||||
#else
|
||||
LebesgueOrder::Block = std::vector<int>({2,2,2,2});
|
||||
#endif
|
||||
Benchmark::Decomposition();
|
||||
|
||||
int do_memory=1;
|
||||
int do_comms =1;
|
||||
int do_su3 =0;
|
||||
int do_wilson=1;
|
||||
int do_dwf =1;
|
||||
|
||||
if ( do_su3 ) {
|
||||
// empty for now
|
||||
}
|
||||
#if 1
|
||||
int sel=2;
|
||||
std::vector<int> L_list({8,12,16,24});
|
||||
#else
|
||||
int sel=1;
|
||||
std::vector<int> L_list({8,12});
|
||||
#endif
|
||||
int selm1=sel-1;
|
||||
std::vector<double> robust_list;
|
||||
|
||||
std::vector<double> wilson;
|
||||
std::vector<double> dwf4;
|
||||
std::vector<double> dwf5;
|
||||
|
||||
if ( do_wilson ) {
|
||||
int Ls=1;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
double robust;
|
||||
wilson.push_back(Benchmark::DWF(1,L_list[l],robust));
|
||||
}
|
||||
}
|
||||
|
||||
int Ls=16;
|
||||
if ( do_dwf ) {
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
double robust;
|
||||
double result = Benchmark::DWF(Ls,L_list[l],robust) ;
|
||||
dwf4.push_back(result);
|
||||
robust_list.push_back(robust);
|
||||
}
|
||||
}
|
||||
|
||||
if ( do_dwf ) {
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
dwf5.push_back(Benchmark::DWF5(Ls,L_list[l]));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( do_dwf ) {
|
||||
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "L \t\t Wilson \t DWF4 \t DWF5 " <<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l]<<" \t "<<dwf5[l] <<std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
}
|
||||
|
||||
int NN=NN_global;
|
||||
if ( do_memory ) {
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Memory benchmark " <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
Benchmark::Memory();
|
||||
}
|
||||
|
||||
if ( do_comms && (NN>1) ) {
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Communications benchmark " <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
Benchmark::Comms();
|
||||
}
|
||||
|
||||
if ( do_dwf ) {
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4 \t\t DWF5 " <<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<" \t "<<dwf5[l] /NN<<std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Comparison point result: " << 0.5*(dwf4[sel]+dwf4[selm1])/NN << " Mflop/s per node"<<std::endl;
|
||||
std::cout<<GridLogMessage << " Comparison point is 0.5*("<<dwf4[sel]/NN<<"+"<<dwf4[selm1]/NN << ") "<<std::endl;
|
||||
std::cout<<std::setprecision(3);
|
||||
std::cout<<GridLogMessage << " Comparison point robustness: " << robust_list[sel] <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -31,6 +31,32 @@ using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
struct time_statistics{
|
||||
double mean;
|
||||
double err;
|
||||
double min;
|
||||
double max;
|
||||
|
||||
void statistics(std::vector<double> v){
|
||||
double sum = std::accumulate(v.begin(), v.end(), 0.0);
|
||||
mean = sum / v.size();
|
||||
|
||||
std::vector<double> diff(v.size());
|
||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
||||
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
||||
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
|
||||
|
||||
auto result = std::minmax_element(v.begin(), v.end());
|
||||
min = *result.first;
|
||||
max = *result.second;
|
||||
}
|
||||
};
|
||||
|
||||
void header(){
|
||||
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
|
||||
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
@ -40,17 +66,21 @@ int main (int argc, char ** argv)
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
int Nloop=10;
|
||||
int Nloop=100;
|
||||
int nmu=0;
|
||||
int maxlat=32;
|
||||
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
|
||||
|
||||
std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl;
|
||||
std::vector<double> t_time(Nloop);
|
||||
time_statistics timestat;
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||
int maxlat=24;
|
||||
header();
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=32;Ls*=2){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
@ -58,15 +88,23 @@ int main (int argc, char ** argv)
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);
|
||||
std::vector<Vector<HalfSpinColourVectorD> > rbuf(8);
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
for(int mu=0;mu<8;mu++){
|
||||
xbuf[mu].resize(lat*lat*lat*Ls);
|
||||
rbuf[mu].resize(lat*lat*lat*Ls);
|
||||
// std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
|
||||
}
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
|
||||
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||
|
||||
@ -79,7 +117,6 @@ int main (int argc, char ** argv)
|
||||
int comm_proc=1;
|
||||
int xmit_to_rank;
|
||||
int recv_from_rank;
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.SendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
@ -102,18 +139,24 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
Grid.SendToRecvFromComplete(requests);
|
||||
Grid.Barrier();
|
||||
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
}
|
||||
double stop=usecond();
|
||||
|
||||
double dbytes = bytes;
|
||||
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||
timestat.statistics(t_time);
|
||||
|
||||
double dbytes = bytes*ppn;
|
||||
double xbytes = dbytes*2.0*ncomm;
|
||||
double rbytes = xbytes;
|
||||
double bidibytes = xbytes+rbytes;
|
||||
|
||||
double time = stop-start; // microseconds
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -121,25 +164,32 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||
|
||||
header();
|
||||
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=32;Ls*=2){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);
|
||||
std::vector<Vector<HalfSpinColourVectorD> > rbuf(8);
|
||||
|
||||
for(int mu=0;mu<8;mu++){
|
||||
xbuf[mu].resize(lat*lat*lat*Ls);
|
||||
rbuf[mu].resize(lat*lat*lat*Ls);
|
||||
// std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
|
||||
ncomm=0;
|
||||
for(int mu=0;mu<4;mu++){
|
||||
@ -178,30 +228,37 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
}
|
||||
Grid.Barrier();
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
|
||||
}
|
||||
|
||||
double stop=usecond();
|
||||
timestat.statistics(t_time);
|
||||
|
||||
double dbytes = bytes;
|
||||
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||
double dbytes = bytes*ppn;
|
||||
double xbytes = dbytes*2.0*ncomm;
|
||||
double rbytes = xbytes;
|
||||
double bidibytes = xbytes+rbytes;
|
||||
|
||||
double time = stop-start;
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Nloop=10;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||
header();
|
||||
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=32;Ls*=2){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
@ -209,6 +266,9 @@ int main (int argc, char ** argv)
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
@ -216,73 +276,86 @@ int main (int argc, char ** argv)
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
double start=usecond();
|
||||
double dbytes;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
|
||||
dbytes=0;
|
||||
ncomm=0;
|
||||
|
||||
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||
|
||||
ncomm=0;
|
||||
for(int mu=0;mu<4;mu++){
|
||||
|
||||
|
||||
if (mpi_layout[mu]>1 ) {
|
||||
|
||||
ncomm++;
|
||||
int comm_proc=1;
|
||||
int xmit_to_rank;
|
||||
int recv_from_rank;
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
dbytes+=
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes,mu);
|
||||
|
||||
comm_proc = mpi_layout[mu]-1;
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu+4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu+4][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
dbytes+=
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu+4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu+4][0],
|
||||
recv_from_rank,
|
||||
bytes,mu+4);
|
||||
|
||||
}
|
||||
}
|
||||
Grid.StencilSendToRecvFromComplete(requests);
|
||||
Grid.StencilSendToRecvFromComplete(requests,0);
|
||||
Grid.Barrier();
|
||||
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
|
||||
}
|
||||
double stop=usecond();
|
||||
|
||||
double dbytes = bytes;
|
||||
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||
double rbytes = xbytes;
|
||||
double bidibytes = xbytes+rbytes;
|
||||
timestat.statistics(t_time);
|
||||
|
||||
dbytes=dbytes*ppn;
|
||||
double xbytes = dbytes*0.5;
|
||||
double rbytes = dbytes*0.5;
|
||||
double bidibytes = dbytes;
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
double time = stop-start; // microseconds
|
||||
|
||||
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
Nloop=100;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||
header();
|
||||
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=32;Ls*=2){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
@ -290,6 +363,9 @@ int main (int argc, char ** argv)
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
@ -297,16 +373,18 @@ int main (int argc, char ** argv)
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
double start=usecond();
|
||||
double dbytes;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
|
||||
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||
|
||||
dbytes=0;
|
||||
ncomm=0;
|
||||
for(int mu=0;mu<4;mu++){
|
||||
|
||||
@ -318,44 +396,146 @@ int main (int argc, char ** argv)
|
||||
int recv_from_rank;
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
Grid.StencilSendToRecvFromComplete(requests);
|
||||
dbytes+=
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes,mu);
|
||||
Grid.StencilSendToRecvFromComplete(requests,mu);
|
||||
requests.resize(0);
|
||||
|
||||
comm_proc = mpi_layout[mu]-1;
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu+4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu+4][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
Grid.StencilSendToRecvFromComplete(requests);
|
||||
dbytes+=
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu+4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu+4][0],
|
||||
recv_from_rank,
|
||||
bytes,mu+4);
|
||||
Grid.StencilSendToRecvFromComplete(requests,mu+4);
|
||||
requests.resize(0);
|
||||
|
||||
}
|
||||
}
|
||||
Grid.Barrier();
|
||||
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
|
||||
}
|
||||
double stop=usecond();
|
||||
|
||||
double dbytes = bytes;
|
||||
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||
double rbytes = xbytes;
|
||||
double bidibytes = xbytes+rbytes;
|
||||
timestat.statistics(t_time);
|
||||
|
||||
double time = stop-start; // microseconds
|
||||
dbytes=dbytes*ppn;
|
||||
double xbytes = dbytes*0.5;
|
||||
double rbytes = dbytes*0.5;
|
||||
double bidibytes = dbytes;
|
||||
|
||||
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
header();
|
||||
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
lat*mpi_layout[2],
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
Grid.ShmBufferFreeAll();
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
double dbytes;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
|
||||
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||
dbytes=0;
|
||||
ncomm=0;
|
||||
|
||||
parallel_for(int dir=0;dir<8;dir++){
|
||||
|
||||
double tbytes;
|
||||
int mu =dir % 4;
|
||||
|
||||
if (mpi_layout[mu]>1 ) {
|
||||
|
||||
ncomm++;
|
||||
int xmit_to_rank;
|
||||
int recv_from_rank;
|
||||
if ( dir == mu ) {
|
||||
int comm_proc=1;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
} else {
|
||||
int comm_proc = mpi_layout[mu]-1;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
}
|
||||
|
||||
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||
(void *)&rbuf[dir][0], recv_from_rank, bytes,dir);
|
||||
|
||||
#pragma omp atomic
|
||||
dbytes+=tbytes;
|
||||
}
|
||||
}
|
||||
Grid.Barrier();
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
}
|
||||
|
||||
timestat.statistics(t_time);
|
||||
|
||||
dbytes=dbytes*ppn;
|
||||
double xbytes = dbytes*0.5;
|
||||
double rbytes = dbytes*0.5;
|
||||
double bidibytes = dbytes;
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
@ -1,28 +1,22 @@
|
||||
/*************************************************************************************
|
||||
|
||||
/*************************************************************************************
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./benchmarks/Benchmark_dwf.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
@ -57,7 +51,13 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
std::vector<int> latt4 = GridDefaultLatt();
|
||||
const int Ls=16;
|
||||
int Ls=16;
|
||||
for(int i=0;i<argc;i++)
|
||||
if(std::string(argv[i]) == "-Ls"){
|
||||
std::stringstream ss(argv[i+1]); ss >> Ls;
|
||||
}
|
||||
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
@ -151,6 +151,7 @@ int main (int argc, char ** argv)
|
||||
RealD M5 =1.8;
|
||||
|
||||
RealD NP = UGrid->_Nprocessors;
|
||||
RealD NN = UGrid->NodeCount();
|
||||
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||
@ -160,13 +161,17 @@ int main (int argc, char ** argv)
|
||||
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
|
||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
int ncall =1000;
|
||||
int ncall =500;
|
||||
if (1) {
|
||||
FGrid->Barrier();
|
||||
Dw.ZeroCounters();
|
||||
@ -189,6 +194,7 @@ int main (int argc, char ** argv)
|
||||
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
@ -225,6 +231,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
@ -240,6 +247,10 @@ int main (int argc, char ** argv)
|
||||
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
@ -271,6 +282,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
// std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
|
||||
sDw.Report();
|
||||
RealD sum=0;
|
||||
@ -296,6 +308,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<< "sD ERR \n " << err <<std::endl;
|
||||
}
|
||||
assert(sum < 1.0e-4);
|
||||
|
||||
|
||||
if(1){
|
||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||
@ -303,6 +316,10 @@ int main (int argc, char ** argv)
|
||||
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric )
|
||||
std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
|
||||
@ -342,6 +359,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
|
||||
sDw.Report();
|
||||
|
||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||
@ -370,8 +388,23 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
assert(error<1.0e-4);
|
||||
}
|
||||
|
||||
if(0){
|
||||
std::cout << "Single cache warm call to sDw.Dhop " <<std::endl;
|
||||
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
||||
sDw.Dhop(ssrc,sresult,0);
|
||||
PerformanceCounter Counter(i);
|
||||
Counter.Start();
|
||||
sDw.Dhop(ssrc,sresult,0);
|
||||
Counter.Stop();
|
||||
Counter.Report();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (1)
|
||||
{ // Naive wilson dag implementation
|
||||
ref = zero;
|
||||
@ -420,14 +453,15 @@ int main (int argc, char ** argv)
|
||||
|
||||
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
static int Opt; // these are a temporary hack
|
||||
static int Comms; // these are a temporary hack
|
||||
|
||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl;
|
||||
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
@ -448,6 +482,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
|
||||
Dw.Report();
|
||||
}
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
@ -474,8 +509,9 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
||||
|
||||
//assert(norm2(src_e)<1.0e-4);
|
||||
//assert(norm2(src_o)<1.0e-4);
|
||||
|
||||
assert(norm2(src_e)<1.0e-4);
|
||||
assert(norm2(src_o)<1.0e-4);
|
||||
Grid_finalize();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
190
benchmarks/Benchmark_gparity.cc
Normal file
190
benchmarks/Benchmark_gparity.cc
Normal file
@ -0,0 +1,190 @@
|
||||
#include <Grid/Grid.h>
|
||||
#include <sstream>
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT
|
||||
};
|
||||
|
||||
typedef typename GparityDomainWallFermionF::FermionField GparityLatticeFermionF;
|
||||
typedef typename GparityDomainWallFermionD::FermionField GparityLatticeFermionD;
|
||||
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
int Ls=16;
|
||||
for(int i=0;i<argc;i++)
|
||||
if(std::string(argv[i]) == "-Ls"){
|
||||
std::stringstream ss(argv[i+1]); ss >> Ls;
|
||||
}
|
||||
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Ls = " << Ls << std::endl;
|
||||
|
||||
std::vector<int> latt4 = GridDefaultLatt();
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
|
||||
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||
|
||||
GparityLatticeFermionF src (FGrid); random(RNG5,src);
|
||||
RealD N2 = 1.0/::sqrt(norm2(src));
|
||||
src = src*N2;
|
||||
|
||||
GparityLatticeFermionF result(FGrid); result=zero;
|
||||
GparityLatticeFermionF ref(FGrid); ref=zero;
|
||||
GparityLatticeFermionF tmp(FGrid);
|
||||
GparityLatticeFermionF err(FGrid);
|
||||
|
||||
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
||||
LatticeGaugeFieldF Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
|
||||
RealD NP = UGrid->_Nprocessors;
|
||||
RealD NN = UGrid->NodeCount();
|
||||
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermion::Dhop "<<std::endl;
|
||||
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexF::Nsimd()<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
|
||||
|
||||
|
||||
std::cout << GridLogMessage<< "* SINGLE/SINGLE"<<std::endl;
|
||||
GparityDomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
int ncall =1000;
|
||||
if (1) {
|
||||
FGrid->Barrier();
|
||||
Dw.ZeroCounters();
|
||||
Dw.Dhop(src,result,0);
|
||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
Dw.Dhop(src,result,0);
|
||||
__SSC_STOP;
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=2*1344*volume*ncall;
|
||||
|
||||
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
Dw.Report();
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage<< "* SINGLE/HALF"<<std::endl;
|
||||
GparityDomainWallFermionFH DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
if (1) {
|
||||
FGrid->Barrier();
|
||||
DwH.ZeroCounters();
|
||||
DwH.Dhop(src,result,0);
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
DwH.Dhop(src,result,0);
|
||||
__SSC_STOP;
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=2*1344*volume*ncall;
|
||||
|
||||
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
DwH.Report();
|
||||
}
|
||||
|
||||
GridCartesian * UGrid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_d);
|
||||
GridCartesian * FGrid_d = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_d);
|
||||
GridRedBlackCartesian * FrbGrid_d = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_d);
|
||||
|
||||
|
||||
std::cout << GridLogMessage<< "* DOUBLE/DOUBLE"<<std::endl;
|
||||
GparityLatticeFermionD src_d(FGrid_d);
|
||||
precisionChange(src_d,src);
|
||||
|
||||
LatticeGaugeFieldD Umu_d(UGrid_d);
|
||||
precisionChange(Umu_d,Umu);
|
||||
|
||||
GparityLatticeFermionD result_d(FGrid_d);
|
||||
|
||||
GparityDomainWallFermionD DwD(Umu_d,*FGrid_d,*FrbGrid_d,*UGrid_d,*UrbGrid_d,mass,M5);
|
||||
if (1) {
|
||||
FGrid_d->Barrier();
|
||||
DwD.ZeroCounters();
|
||||
DwD.Dhop(src_d,result_d,0);
|
||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
DwD.Dhop(src_d,result_d,0);
|
||||
__SSC_STOP;
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid_d->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=2*1344*volume*ncall;
|
||||
|
||||
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
DwD.Report();
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
@ -55,21 +55,21 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
uint64_t lmax=44;
|
||||
#define NLOOP (1*lmax*lmax*lmax*lmax/vol)
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
uint64_t lmax=96;
|
||||
#define NLOOP (10*lmax*lmax*lmax*lmax/vol)
|
||||
for(int lat=8;lat<=lmax;lat+=8){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
uint64_t Nloop=NLOOP;
|
||||
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
LatticeVec z(&Grid);// random(pRNG,z);
|
||||
LatticeVec x(&Grid);// random(pRNG,x);
|
||||
LatticeVec y(&Grid);// random(pRNG,y);
|
||||
double a=2.0;
|
||||
|
||||
|
||||
@ -83,7 +83,7 @@ int main (int argc, char ** argv)
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
double bytes=3*vol*Nvec*sizeof(Real);
|
||||
double bytes=3.0*vol*Nvec*sizeof(Real);
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||
|
||||
}
|
||||
@ -94,17 +94,17 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
for(int lat=8;lat<=lmax;lat+=8){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
LatticeVec z(&Grid);// random(pRNG,z);
|
||||
LatticeVec x(&Grid);// random(pRNG,x);
|
||||
LatticeVec y(&Grid);// random(pRNG,y);
|
||||
double a=2.0;
|
||||
|
||||
uint64_t Nloop=NLOOP;
|
||||
@ -119,7 +119,7 @@ int main (int argc, char ** argv)
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
double bytes=3*vol*Nvec*sizeof(Real);
|
||||
double bytes=3.0*vol*Nvec*sizeof(Real);
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||
|
||||
}
|
||||
@ -129,20 +129,20 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
for(int lat=8;lat<=lmax;lat+=8){
|
||||
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
uint64_t Nloop=NLOOP;
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
LatticeVec z(&Grid);// random(pRNG,z);
|
||||
LatticeVec x(&Grid);// random(pRNG,x);
|
||||
LatticeVec y(&Grid);// random(pRNG,y);
|
||||
RealD a=2.0;
|
||||
|
||||
|
||||
@ -154,7 +154,7 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double bytes=2*vol*Nvec*sizeof(Real);
|
||||
double bytes=2.0*vol*Nvec*sizeof(Real);
|
||||
double flops=vol*Nvec*1;// mul
|
||||
std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||
|
||||
@ -166,17 +166,17 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
for(int lat=8;lat<=lmax;lat+=8){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
uint64_t Nloop=NLOOP;
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
LatticeVec z(&Grid);// random(pRNG,z);
|
||||
LatticeVec x(&Grid);// random(pRNG,x);
|
||||
LatticeVec y(&Grid);// random(pRNG,y);
|
||||
RealD a=2.0;
|
||||
Real nn;
|
||||
double start=usecond();
|
||||
@ -187,7 +187,7 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double bytes=vol*Nvec*sizeof(Real);
|
||||
double bytes=1.0*vol*Nvec*sizeof(Real);
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl;
|
||||
|
||||
|
@ -40,7 +40,7 @@ int main (int argc, char ** argv)
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(&Grid);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
@ -35,13 +35,14 @@ using namespace Grid::QCD;
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
#define LMAX (64)
|
||||
|
||||
int Nloop=1000;
|
||||
int64_t Nloop=20;
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
int64_t threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
@ -50,19 +51,19 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=32;lat+=2){
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
|
||||
LatticeColourMatrix z(&Grid);// random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid);// random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid);// random(pRNG,y);
|
||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
x=x*y;
|
||||
}
|
||||
double stop=usecond();
|
||||
@ -82,20 +83,20 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=32;lat+=2){
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
|
||||
LatticeColourMatrix z(&Grid); //random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); //random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); //random(pRNG,y);
|
||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
z=x*y;
|
||||
}
|
||||
double stop=usecond();
|
||||
@ -113,20 +114,20 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=32;lat+=2){
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
|
||||
LatticeColourMatrix z(&Grid); //random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); //random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); //random(pRNG,y);
|
||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
mult(z,x,y);
|
||||
}
|
||||
double stop=usecond();
|
||||
@ -144,20 +145,20 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=32;lat+=2){
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
|
||||
LatticeColourMatrix z(&Grid); //random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); //random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); //random(pRNG,y);
|
||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
mac(z,x,y);
|
||||
}
|
||||
double stop=usecond();
|
||||
|
@ -58,7 +58,7 @@ int main (int argc, char ** argv)
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(&Grid);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
@ -93,7 +93,7 @@ int main (int argc, char ** argv)
|
||||
std::cout << latt_size.back() << "\t\t";
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(&Grid);
|
||||
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
|
@ -1,11 +1,7 @@
|
||||
include Make.inc
|
||||
|
||||
simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o
|
||||
|
||||
EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a
|
||||
|
||||
libsimple_su3_test_a_SOURCES = simple_su3_test.cc
|
||||
|
||||
libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc
|
||||
|
||||
libsimple_simd_test_a_SOURCES = simple_simd_test.cc
|
||||
bench-local: all
|
||||
./Benchmark_su3
|
||||
./Benchmark_memory_bandwidth
|
||||
./Benchmark_wilson
|
||||
./Benchmark_dwf --dslash-unroll
|
@ -1,11 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
|
||||
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2'
|
||||
|
||||
echo "-- deploying Eigen source..."
|
||||
wget ${EIGEN_URL} --no-check-certificate
|
||||
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
|
||||
rm `basename ${EIGEN_URL}`
|
||||
#rm `basename ${EIGEN_URL}`
|
||||
|
||||
echo '-- generating Make.inc files...'
|
||||
./scripts/filelist
|
||||
|
210
configure.ac
210
configure.ac
@ -1,16 +1,23 @@
|
||||
AC_PREREQ([2.63])
|
||||
AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid])
|
||||
AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid])
|
||||
AC_CANONICAL_BUILD
|
||||
AC_CANONICAL_HOST
|
||||
AC_CANONICAL_TARGET
|
||||
AM_INIT_AUTOMAKE(subdir-objects)
|
||||
AM_INIT_AUTOMAKE([subdir-objects 1.13])
|
||||
AM_EXTRA_RECURSIVE_TARGETS([tests bench])
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AC_CONFIG_SRCDIR([lib/Grid.h])
|
||||
AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h])
|
||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
|
||||
############### Checks for programs
|
||||
################ Get git info
|
||||
#AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])])
|
||||
|
||||
################ Set flags
|
||||
# do not move!
|
||||
CXXFLAGS="-O3 $CXXFLAGS"
|
||||
|
||||
############### Checks for programs
|
||||
AC_PROG_CXX
|
||||
AC_PROG_RANLIB
|
||||
|
||||
@ -24,12 +31,14 @@ AX_GXX_VERSION
|
||||
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
|
||||
[version of g++ that will compile the code])
|
||||
|
||||
|
||||
|
||||
############### Checks for typedefs, structures, and compiler characteristics
|
||||
AC_TYPE_SIZE_T
|
||||
AC_TYPE_UINT32_T
|
||||
AC_TYPE_UINT64_T
|
||||
|
||||
############### OpenMP
|
||||
############### OpenMP
|
||||
AC_OPENMP
|
||||
ac_openmp=no
|
||||
if test "${OPENMP_CXXFLAGS}X" != "X"; then
|
||||
@ -45,9 +54,14 @@ AC_CHECK_HEADERS(malloc/malloc.h)
|
||||
AC_CHECK_HEADERS(malloc.h)
|
||||
AC_CHECK_HEADERS(endian.h)
|
||||
AC_CHECK_HEADERS(execinfo.h)
|
||||
AC_CHECK_HEADERS(numaif.h)
|
||||
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
||||
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
||||
|
||||
############## Standard libraries
|
||||
AC_CHECK_LIB([m],[cos])
|
||||
AC_CHECK_LIB([stdc++],[abort])
|
||||
|
||||
############### GMP and MPFR
|
||||
AC_ARG_WITH([gmp],
|
||||
[AS_HELP_STRING([--with-gmp=prefix],
|
||||
@ -60,16 +74,23 @@ AC_ARG_WITH([mpfr],
|
||||
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
|
||||
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
|
||||
|
||||
############### FFTW3
|
||||
AC_ARG_WITH([fftw],
|
||||
############### FFTW3
|
||||
AC_ARG_WITH([fftw],
|
||||
[AS_HELP_STRING([--with-fftw=prefix],
|
||||
[try this for a non-standard install prefix of the FFTW3 library])],
|
||||
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
|
||||
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
|
||||
|
||||
############### lapack
|
||||
############### LIME
|
||||
AC_ARG_WITH([lime],
|
||||
[AS_HELP_STRING([--with-lime=prefix],
|
||||
[try this for a non-standard install prefix of the LIME library])],
|
||||
[AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"]
|
||||
[AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"])
|
||||
|
||||
############### lapack
|
||||
AC_ARG_ENABLE([lapack],
|
||||
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
||||
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
||||
[ac_LAPACK=${enable_lapack}], [ac_LAPACK=no])
|
||||
|
||||
case ${ac_LAPACK} in
|
||||
@ -85,7 +106,7 @@ esac
|
||||
|
||||
############### FP16 conversions
|
||||
AC_ARG_ENABLE([sfw-fp16],
|
||||
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
|
||||
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
|
||||
[ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes])
|
||||
case ${ac_SFW_FP16} in
|
||||
yes)
|
||||
@ -120,7 +141,7 @@ AC_ARG_WITH([hdf5],
|
||||
|
||||
############### first-touch
|
||||
AC_ARG_ENABLE([numa],
|
||||
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
|
||||
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
|
||||
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
|
||||
|
||||
case ${ac_NUMA} in
|
||||
@ -146,8 +167,8 @@ if test "${ac_MKL}x" != "nox"; then
|
||||
fi
|
||||
|
||||
AC_SEARCH_LIBS([__gmpf_init], [gmp],
|
||||
[AC_SEARCH_LIBS([mpfr_init], [mpfr],
|
||||
[AC_DEFINE([HAVE_LIBMPFR], [1],
|
||||
[AC_SEARCH_LIBS([mpfr_init], [mpfr],
|
||||
[AC_DEFINE([HAVE_LIBMPFR], [1],
|
||||
[Define to 1 if you have the `MPFR' library])]
|
||||
[have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])]
|
||||
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])]
|
||||
@ -156,7 +177,7 @@ AC_SEARCH_LIBS([__gmpf_init], [gmp],
|
||||
if test "${ac_LAPACK}x" != "nox"; then
|
||||
AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [],
|
||||
[AC_MSG_ERROR("LAPACK enabled but library not found")])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_SEARCH_LIBS([fftw_execute], [fftw3],
|
||||
[AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [],
|
||||
@ -164,6 +185,23 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3],
|
||||
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
|
||||
[have_fftw=true])
|
||||
|
||||
AC_SEARCH_LIBS([limeCreateReader], [lime],
|
||||
[AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])]
|
||||
[have_lime=true],
|
||||
[AC_MSG_WARN(C-LIME library was not found in your system.
|
||||
In order to use ILGG file format please install or provide the correct path to your installation
|
||||
Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)])
|
||||
|
||||
AC_SEARCH_LIBS([crc32], [z],
|
||||
[AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])]
|
||||
[have_zlib=true] [LIBS="${LIBS} -lz"],
|
||||
[AC_MSG_ERROR(zlib library was not found in your system.)])
|
||||
|
||||
AC_SEARCH_LIBS([move_pages], [numa],
|
||||
[AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])]
|
||||
[have_libnuma=true] [LIBS="${LIBS} -lnuma"],
|
||||
[AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)])
|
||||
|
||||
AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
|
||||
[AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]
|
||||
[have_hdf5=true]
|
||||
@ -216,6 +254,7 @@ case ${ax_cv_cxx_compiler_vendor} in
|
||||
SIMD_FLAGS='';;
|
||||
KNL)
|
||||
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||
AC_DEFINE([KNL],[1],[Knights landing processor])
|
||||
SIMD_FLAGS='-march=knl';;
|
||||
GEN)
|
||||
AC_DEFINE([GEN],[1],[generic vector code])
|
||||
@ -223,6 +262,9 @@ case ${ax_cv_cxx_compiler_vendor} in
|
||||
[generic SIMD vector width (in bytes)])
|
||||
SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)"
|
||||
SIMD_FLAGS='';;
|
||||
NEONv8)
|
||||
AC_DEFINE([NEONV8],[1],[ARMv8 NEON])
|
||||
SIMD_FLAGS='-march=armv8-a';;
|
||||
QPX|BGQ)
|
||||
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
|
||||
SIMD_FLAGS='';;
|
||||
@ -251,6 +293,7 @@ case ${ax_cv_cxx_compiler_vendor} in
|
||||
SIMD_FLAGS='';;
|
||||
KNL)
|
||||
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
|
||||
AC_DEFINE([KNL],[1],[Knights landing processor])
|
||||
SIMD_FLAGS='-xmic-avx512';;
|
||||
GEN)
|
||||
AC_DEFINE([GEN],[1],[generic vector code])
|
||||
@ -288,8 +331,41 @@ case ${ac_PRECISION} in
|
||||
double)
|
||||
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]);
|
||||
;;
|
||||
esac
|
||||
|
||||
###################### Shared memory allocation technique under MPI3
|
||||
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs],
|
||||
[Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen])
|
||||
|
||||
case ${ac_SHM} in
|
||||
|
||||
shmget)
|
||||
AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] )
|
||||
;;
|
||||
|
||||
shmopen)
|
||||
AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] )
|
||||
;;
|
||||
|
||||
hugetlbfs)
|
||||
AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] )
|
||||
;;
|
||||
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_SHM} unsupported --enable-shm option]);
|
||||
;;
|
||||
esac
|
||||
|
||||
###################### Shared base path for SHMMMAP
|
||||
AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path],
|
||||
[Select SHM mmap base path for hugetlbfs])],
|
||||
[ac_SHMPATH=${enable_shmpath}],
|
||||
[ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/])
|
||||
AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing])
|
||||
|
||||
############### communication type selection
|
||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem],
|
||||
[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
||||
@ -299,14 +375,14 @@ case ${ac_COMMS} in
|
||||
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
|
||||
comms_type='none'
|
||||
;;
|
||||
mpi3l*)
|
||||
AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] )
|
||||
comms_type='mpi3l'
|
||||
;;
|
||||
mpi3*)
|
||||
AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
|
||||
comms_type='mpi3'
|
||||
;;
|
||||
mpit)
|
||||
AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] )
|
||||
comms_type='mpit'
|
||||
;;
|
||||
mpi*)
|
||||
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
||||
comms_type='mpi'
|
||||
@ -316,7 +392,7 @@ case ${ac_COMMS} in
|
||||
comms_type='shmem'
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
||||
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
||||
;;
|
||||
esac
|
||||
case ${ac_COMMS} in
|
||||
@ -334,7 +410,7 @@ esac
|
||||
AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ])
|
||||
AM_CONDITIONAL(BUILD_COMMS_MPI, [ test "${comms_type}X" == "mpiX" ])
|
||||
AM_CONDITIONAL(BUILD_COMMS_MPI3, [ test "${comms_type}X" == "mpi3X" ] )
|
||||
AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] )
|
||||
AM_CONDITIONAL(BUILD_COMMS_MPIT, [ test "${comms_type}X" == "mpitX" ] )
|
||||
AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ])
|
||||
|
||||
############### RNG selection
|
||||
@ -353,7 +429,7 @@ case ${ac_RNG} in
|
||||
AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] )
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
|
||||
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -370,7 +446,7 @@ case ${ac_TIMERS} in
|
||||
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
|
||||
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -382,7 +458,7 @@ case ${ac_CHROMA} in
|
||||
yes|no)
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
||||
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -403,12 +479,67 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg])
|
||||
|
||||
############### Ouput
|
||||
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
|
||||
GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||
GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||
GRID_LIBS=$LIBS
|
||||
GRID_SHORT_SHA=`git rev-parse --short HEAD`
|
||||
GRID_SHA=`git rev-parse HEAD`
|
||||
GRID_BRANCH=`git rev-parse --abbrev-ref HEAD`
|
||||
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
|
||||
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
|
||||
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
|
||||
AC_SUBST([AM_CFLAGS])
|
||||
AC_SUBST([AM_CXXFLAGS])
|
||||
AC_SUBST([AM_LDFLAGS])
|
||||
AC_SUBST([GRID_CXXFLAGS])
|
||||
AC_SUBST([GRID_LDFLAGS])
|
||||
AC_SUBST([GRID_LIBS])
|
||||
AC_SUBST([GRID_SHA])
|
||||
AC_SUBST([GRID_BRANCH])
|
||||
|
||||
git_commit=`cd $srcdir && ./scripts/configure.commit`
|
||||
|
||||
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Summary of configuration for $PACKAGE v$VERSION
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
----- GIT VERSION -------------------------------------
|
||||
$git_commit
|
||||
----- PLATFORM ----------------------------------------
|
||||
architecture (build) : $build_cpu
|
||||
os (build) : $build_os
|
||||
architecture (target) : $target_cpu
|
||||
os (target) : $target_os
|
||||
compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
||||
compiler version : ${ax_cv_gxx_version}
|
||||
----- BUILD OPTIONS -----------------------------------
|
||||
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
|
||||
Threading : ${ac_openmp}
|
||||
Communications type : ${comms_type}
|
||||
Shared memory allocator : ${ac_SHM}
|
||||
Shared memory mmap path : ${ac_SHMPATH}
|
||||
Default precision : ${ac_PRECISION}
|
||||
Software FP16 conversion : ${ac_SFW_FP16}
|
||||
RNG choice : ${ac_RNG}
|
||||
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
||||
LAPACK : ${ac_LAPACK}
|
||||
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
||||
LIME (ILDG support) : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi`
|
||||
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
|
||||
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
|
||||
----- BUILD FLAGS -------------------------------------
|
||||
CXXFLAGS:
|
||||
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
LDFLAGS:
|
||||
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
LIBS:
|
||||
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
-------------------------------------------------------" > grid.configure.summary
|
||||
|
||||
GRID_SUMMARY="`cat grid.configure.summary`"
|
||||
AM_SUBST_NOTMAKE([GRID_SUMMARY])
|
||||
AC_SUBST([GRID_SUMMARY])
|
||||
|
||||
AC_CONFIG_FILES([grid-config], [chmod +x grid-config])
|
||||
AC_CONFIG_FILES(Makefile)
|
||||
AC_CONFIG_FILES(lib/Makefile)
|
||||
AC_CONFIG_FILES(tests/Makefile)
|
||||
@ -419,6 +550,8 @@ AC_CONFIG_FILES(tests/forces/Makefile)
|
||||
AC_CONFIG_FILES(tests/hadrons/Makefile)
|
||||
AC_CONFIG_FILES(tests/hmc/Makefile)
|
||||
AC_CONFIG_FILES(tests/solver/Makefile)
|
||||
AC_CONFIG_FILES(tests/lanczos/Makefile)
|
||||
AC_CONFIG_FILES(tests/smearing/Makefile)
|
||||
AC_CONFIG_FILES(tests/qdpxx/Makefile)
|
||||
AC_CONFIG_FILES(tests/testu01/Makefile)
|
||||
AC_CONFIG_FILES(benchmarks/Makefile)
|
||||
@ -426,36 +559,7 @@ AC_CONFIG_FILES(extras/Makefile)
|
||||
AC_CONFIG_FILES(extras/Hadrons/Makefile)
|
||||
AC_OUTPUT
|
||||
|
||||
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Summary of configuration for $PACKAGE v$VERSION
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
----- PLATFORM ----------------------------------------
|
||||
architecture (build) : $build_cpu
|
||||
os (build) : $build_os
|
||||
architecture (target) : $target_cpu
|
||||
os (target) : $target_os
|
||||
compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
||||
compiler version : ${ax_cv_gxx_version}
|
||||
----- BUILD OPTIONS -----------------------------------
|
||||
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
|
||||
Threading : ${ac_openmp}
|
||||
Communications type : ${comms_type}
|
||||
Default precision : ${ac_PRECISION}
|
||||
Software FP16 conversion : ${ac_SFW_FP16}
|
||||
RNG choice : ${ac_RNG}
|
||||
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
||||
LAPACK : ${ac_LAPACK}
|
||||
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
||||
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
|
||||
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
|
||||
----- BUILD FLAGS -------------------------------------
|
||||
CXXFLAGS:
|
||||
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
LDFLAGS:
|
||||
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
LIBS:
|
||||
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
-------------------------------------------------------" > config.summary
|
||||
echo ""
|
||||
cat config.summary
|
||||
cat grid.configure.summary
|
||||
echo ""
|
||||
|
||||
|
@ -162,7 +162,8 @@ void Application::saveParameterFile(const std::string parameterFileName)
|
||||
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
|
||||
|
||||
#define DEFINE_MEMPEAK \
|
||||
auto memPeak = [this](const std::vector<unsigned int> &program)\
|
||||
GeneticScheduler<unsigned int>::ObjFunc memPeak = \
|
||||
[this](const std::vector<unsigned int> &program)\
|
||||
{\
|
||||
unsigned int memPeak;\
|
||||
bool msg;\
|
||||
|
@ -41,9 +41,10 @@ using namespace Hadrons;
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
Environment::Environment(void)
|
||||
{
|
||||
nd_ = GridDefaultLatt().size();
|
||||
dim_ = GridDefaultLatt();
|
||||
nd_ = dim_.size();
|
||||
grid4d_.reset(SpaceTimeGrid::makeFourDimGrid(
|
||||
GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()),
|
||||
dim_, GridDefaultSimd(nd_, vComplex::Nsimd()),
|
||||
GridDefaultMpi()));
|
||||
gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get()));
|
||||
auto loc = getGrid()->LocalDimensions();
|
||||
@ -132,6 +133,16 @@ unsigned int Environment::getNd(void) const
|
||||
return nd_;
|
||||
}
|
||||
|
||||
std::vector<int> Environment::getDim(void) const
|
||||
{
|
||||
return dim_;
|
||||
}
|
||||
|
||||
int Environment::getDim(const unsigned int mu) const
|
||||
{
|
||||
return dim_[mu];
|
||||
}
|
||||
|
||||
// random number generator /////////////////////////////////////////////////////
|
||||
void Environment::setSeed(const std::vector<int> &seed)
|
||||
{
|
||||
@ -271,6 +282,21 @@ std::string Environment::getModuleType(const std::string name) const
|
||||
return getModuleType(getModuleAddress(name));
|
||||
}
|
||||
|
||||
std::string Environment::getModuleNamespace(const unsigned int address) const
|
||||
{
|
||||
std::string type = getModuleType(address), ns;
|
||||
|
||||
auto pos2 = type.rfind("::");
|
||||
auto pos1 = type.rfind("::", pos2 - 2);
|
||||
|
||||
return type.substr(pos1 + 2, pos2 - pos1 - 2);
|
||||
}
|
||||
|
||||
std::string Environment::getModuleNamespace(const std::string name) const
|
||||
{
|
||||
return getModuleNamespace(getModuleAddress(name));
|
||||
}
|
||||
|
||||
bool Environment::hasModule(const unsigned int address) const
|
||||
{
|
||||
return (address < module_.size());
|
||||
@ -492,7 +518,14 @@ std::string Environment::getObjectType(const unsigned int address) const
|
||||
{
|
||||
if (hasRegisteredObject(address))
|
||||
{
|
||||
return typeName(object_[address].type);
|
||||
if (object_[address].type)
|
||||
{
|
||||
return typeName(object_[address].type);
|
||||
}
|
||||
else
|
||||
{
|
||||
return "<no type>";
|
||||
}
|
||||
}
|
||||
else if (hasObject(address))
|
||||
{
|
||||
@ -532,6 +565,23 @@ Environment::Size Environment::getObjectSize(const std::string name) const
|
||||
return getObjectSize(getObjectAddress(name));
|
||||
}
|
||||
|
||||
unsigned int Environment::getObjectModule(const unsigned int address) const
|
||||
{
|
||||
if (hasObject(address))
|
||||
{
|
||||
return object_[address].module;
|
||||
}
|
||||
else
|
||||
{
|
||||
HADRON_ERROR("no object with address " + std::to_string(address));
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int Environment::getObjectModule(const std::string name) const
|
||||
{
|
||||
return getObjectModule(getObjectAddress(name));
|
||||
}
|
||||
|
||||
unsigned int Environment::getObjectLs(const unsigned int address) const
|
||||
{
|
||||
if (hasRegisteredObject(address))
|
||||
|
@ -106,6 +106,8 @@ public:
|
||||
void createGrid(const unsigned int Ls);
|
||||
GridCartesian * getGrid(const unsigned int Ls = 1) const;
|
||||
GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const;
|
||||
std::vector<int> getDim(void) const;
|
||||
int getDim(const unsigned int mu) const;
|
||||
unsigned int getNd(void) const;
|
||||
// random number generator
|
||||
void setSeed(const std::vector<int> &seed);
|
||||
@ -131,6 +133,8 @@ public:
|
||||
std::string getModuleName(const unsigned int address) const;
|
||||
std::string getModuleType(const unsigned int address) const;
|
||||
std::string getModuleType(const std::string name) const;
|
||||
std::string getModuleNamespace(const unsigned int address) const;
|
||||
std::string getModuleNamespace(const std::string name) const;
|
||||
bool hasModule(const unsigned int address) const;
|
||||
bool hasModule(const std::string name) const;
|
||||
Graph<unsigned int> makeModuleGraph(void) const;
|
||||
@ -171,6 +175,8 @@ public:
|
||||
std::string getObjectType(const std::string name) const;
|
||||
Size getObjectSize(const unsigned int address) const;
|
||||
Size getObjectSize(const std::string name) const;
|
||||
unsigned int getObjectModule(const unsigned int address) const;
|
||||
unsigned int getObjectModule(const std::string name) const;
|
||||
unsigned int getObjectLs(const unsigned int address) const;
|
||||
unsigned int getObjectLs(const std::string name) const;
|
||||
bool hasObject(const unsigned int address) const;
|
||||
@ -181,6 +187,10 @@ public:
|
||||
bool hasCreatedObject(const std::string name) const;
|
||||
bool isObject5d(const unsigned int address) const;
|
||||
bool isObject5d(const std::string name) const;
|
||||
template <typename T>
|
||||
bool isObjectOfType(const unsigned int address) const;
|
||||
template <typename T>
|
||||
bool isObjectOfType(const std::string name) const;
|
||||
Environment::Size getTotalSize(void) const;
|
||||
void addOwnership(const unsigned int owner,
|
||||
const unsigned int property);
|
||||
@ -197,6 +207,7 @@ private:
|
||||
bool dryRun_{false};
|
||||
unsigned int traj_, locVol_;
|
||||
// grids
|
||||
std::vector<int> dim_;
|
||||
GridPt grid4d_;
|
||||
std::map<unsigned int, GridPt> grid5d_;
|
||||
GridRbPt gridRb4d_;
|
||||
@ -343,7 +354,7 @@ T * Environment::getObject(const unsigned int address) const
|
||||
else
|
||||
{
|
||||
HADRON_ERROR("object with address " + std::to_string(address) +
|
||||
" does not have type '" + typeid(T).name() +
|
||||
" does not have type '" + typeName(&typeid(T)) +
|
||||
"' (has type '" + getObjectType(address) + "')");
|
||||
}
|
||||
}
|
||||
@ -380,6 +391,37 @@ T * Environment::createLattice(const std::string name)
|
||||
return createLattice<T>(getObjectAddress(name));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool Environment::isObjectOfType(const unsigned int address) const
|
||||
{
|
||||
if (hasRegisteredObject(address))
|
||||
{
|
||||
if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get()))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (hasObject(address))
|
||||
{
|
||||
HADRON_ERROR("object with address " + std::to_string(address) +
|
||||
" exists but is not registered");
|
||||
}
|
||||
else
|
||||
{
|
||||
HADRON_ERROR("no object with address " + std::to_string(address));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool Environment::isObjectOfType(const std::string name) const
|
||||
{
|
||||
return isObjectOfType<T>(getObjectAddress(name));
|
||||
}
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Environment_hpp_
|
||||
|
@ -51,23 +51,43 @@ using Grid::operator<<;
|
||||
* error with GCC 5 (clang & GCC 6 compile fine without it).
|
||||
*/
|
||||
|
||||
// FIXME: find a way to do that in a more general fashion
|
||||
#ifndef FIMPL
|
||||
#define FIMPL WilsonImplR
|
||||
#endif
|
||||
#ifndef SIMPL
|
||||
#define SIMPL ScalarImplCR
|
||||
#endif
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
// type aliases
|
||||
#define TYPE_ALIASES(FImpl, suffix)\
|
||||
#define FERM_TYPE_ALIASES(FImpl, suffix)\
|
||||
typedef FermionOperator<FImpl> FMat##suffix; \
|
||||
typedef typename FImpl::FermionField FermionField##suffix; \
|
||||
typedef typename FImpl::PropagatorField PropagatorField##suffix; \
|
||||
typedef typename FImpl::SitePropagator SitePropagator##suffix; \
|
||||
typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;\
|
||||
typedef std::function<void(FermionField##suffix &, \
|
||||
typedef std::vector<typename FImpl::SitePropagator::scalar_object> \
|
||||
SlicedPropagator##suffix;
|
||||
|
||||
#define GAUGE_TYPE_ALIASES(FImpl, suffix)\
|
||||
typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;
|
||||
|
||||
#define SCALAR_TYPE_ALIASES(SImpl, suffix)\
|
||||
typedef typename SImpl::Field ScalarField##suffix;\
|
||||
typedef typename SImpl::Field PropagatorField##suffix;
|
||||
|
||||
#define SOLVER_TYPE_ALIASES(FImpl, suffix)\
|
||||
typedef std::function<void(FermionField##suffix &,\
|
||||
const FermionField##suffix &)> SolverFn##suffix;
|
||||
|
||||
#define SINK_TYPE_ALIASES(suffix)\
|
||||
typedef std::function<SlicedPropagator##suffix(const PropagatorField##suffix &)> SinkFn##suffix;
|
||||
|
||||
#define FGS_TYPE_ALIASES(FImpl, suffix)\
|
||||
FERM_TYPE_ALIASES(FImpl, suffix)\
|
||||
GAUGE_TYPE_ALIASES(FImpl, suffix)\
|
||||
SOLVER_TYPE_ALIASES(FImpl, suffix)
|
||||
|
||||
// logger
|
||||
class HadronsLogger: public Logger
|
||||
{
|
||||
@ -145,6 +165,15 @@ std::string typeName(void)
|
||||
return typeName(typeIdPt<T>());
|
||||
}
|
||||
|
||||
// default writers/readers
|
||||
#ifdef HAVE_HDF5
|
||||
typedef Hdf5Reader CorrReader;
|
||||
typedef Hdf5Writer CorrWriter;
|
||||
#else
|
||||
typedef XmlReader CorrReader;
|
||||
typedef XmlWriter CorrWriter;
|
||||
#endif
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Global_hpp_
|
||||
|
@ -1,40 +1,25 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules.hpp
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Hadrons/Modules/MAction/DWF.hpp>
|
||||
#include <Grid/Hadrons/Modules/MAction/Wilson.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/Baryon.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/Meson.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
|
||||
#include <Grid/Hadrons/Modules/MFermion/GaugeProp.hpp>
|
||||
#include <Grid/Hadrons/Modules/MGauge/Load.hpp>
|
||||
#include <Grid/Hadrons/Modules/MGauge/Random.hpp>
|
||||
#include <Grid/Hadrons/Modules/MGauge/StochEm.hpp>
|
||||
#include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
|
||||
#include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp>
|
||||
#include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp>
|
||||
#include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp>
|
||||
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSink/Point.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSource/Point.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSource/Wall.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSource/Z2.hpp>
|
||||
#include <Grid/Hadrons/Modules/Quark.hpp>
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_DWF_hpp_
|
||||
#define Hadrons_DWF_hpp_
|
||||
#ifndef Hadrons_MAction_DWF_hpp_
|
||||
#define Hadrons_MAction_DWF_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -48,14 +48,15 @@ public:
|
||||
std::string, gauge,
|
||||
unsigned int, Ls,
|
||||
double , mass,
|
||||
double , M5);
|
||||
double , M5,
|
||||
std::string , boundary);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TDWF: public Module<DWFPar>
|
||||
{
|
||||
public:
|
||||
TYPE_ALIASES(FImpl,);
|
||||
FGS_TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TDWF(const std::string name);
|
||||
@ -116,14 +117,19 @@ void TDWF<FImpl>::execute(void)
|
||||
<< par().mass << ", M5= " << par().M5 << " and Ls= "
|
||||
<< par().Ls << " using gauge field '" << par().gauge << "'"
|
||||
<< std::endl;
|
||||
LOG(Message) << "Fermion boundary conditions: " << par().boundary
|
||||
<< std::endl;
|
||||
env().createGrid(par().Ls);
|
||||
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
|
||||
auto &g4 = *env().getGrid();
|
||||
auto &grb4 = *env().getRbGrid();
|
||||
auto &g5 = *env().getGrid(par().Ls);
|
||||
auto &grb5 = *env().getRbGrid(par().Ls);
|
||||
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
|
||||
typename DomainWallFermion<FImpl>::ImplParams implParams(boundary);
|
||||
FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4,
|
||||
par().mass, par().M5);
|
||||
par().mass, par().M5,
|
||||
implParams);
|
||||
env().setObject(getName(), fMatPt);
|
||||
}
|
||||
|
||||
@ -131,4 +137,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_DWF_hpp_
|
||||
#endif // Hadrons_MAction_DWF_hpp_
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_Wilson_hpp_
|
||||
#define Hadrons_Wilson_hpp_
|
||||
#ifndef Hadrons_MAction_Wilson_hpp_
|
||||
#define Hadrons_MAction_Wilson_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -46,14 +46,15 @@ class WilsonPar: Serializable
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar,
|
||||
std::string, gauge,
|
||||
double , mass);
|
||||
double , mass,
|
||||
std::string, boundary);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TWilson: public Module<WilsonPar>
|
||||
{
|
||||
public:
|
||||
TYPE_ALIASES(FImpl,);
|
||||
FGS_TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TWilson(const std::string name);
|
||||
@ -112,10 +113,15 @@ void TWilson<FImpl>::execute()
|
||||
{
|
||||
LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass
|
||||
<< " using gauge field '" << par().gauge << "'" << std::endl;
|
||||
LOG(Message) << "Fermion boundary conditions: " << par().boundary
|
||||
<< std::endl;
|
||||
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
|
||||
auto &grid = *env().getGrid();
|
||||
auto &gridRb = *env().getRbGrid();
|
||||
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass);
|
||||
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
|
||||
typename WilsonFermion<FImpl>::ImplParams implParams(boundary);
|
||||
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass,
|
||||
implParams);
|
||||
env().setObject(getName(), fMatPt);
|
||||
}
|
||||
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_Baryon_hpp_
|
||||
#define Hadrons_Baryon_hpp_
|
||||
#ifndef Hadrons_MContraction_Baryon_hpp_
|
||||
#define Hadrons_MContraction_Baryon_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -55,9 +55,9 @@ template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
class TBaryon: public Module<BaryonPar>
|
||||
{
|
||||
public:
|
||||
TYPE_ALIASES(FImpl1, 1);
|
||||
TYPE_ALIASES(FImpl2, 2);
|
||||
TYPE_ALIASES(FImpl3, 3);
|
||||
FERM_TYPE_ALIASES(FImpl1, 1);
|
||||
FERM_TYPE_ALIASES(FImpl2, 2);
|
||||
FERM_TYPE_ALIASES(FImpl3, 3);
|
||||
class Result: Serializable
|
||||
{
|
||||
public:
|
||||
@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
|
||||
<< " quarks '" << par().q1 << "', '" << par().q2 << "', and '"
|
||||
<< par().q3 << "'" << std::endl;
|
||||
|
||||
XmlWriter writer(par().output);
|
||||
CorrWriter writer(par().output);
|
||||
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
|
||||
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
|
||||
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q2);
|
||||
@ -121,11 +121,11 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
|
||||
|
||||
// FIXME: do contractions
|
||||
|
||||
write(writer, "meson", result);
|
||||
// write(writer, "meson", result);
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Baryon_hpp_
|
||||
#endif // Hadrons_MContraction_Baryon_hpp_
|
||||
|
144
extras/Hadrons/Modules/MContraction/DiscLoop.hpp
Normal file
144
extras/Hadrons/Modules/MContraction/DiscLoop.hpp
Normal file
@ -0,0 +1,144 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_DiscLoop_hpp_
|
||||
#define Hadrons_MContraction_DiscLoop_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* DiscLoop *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
class DiscLoopPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar,
|
||||
std::string, q_loop,
|
||||
Gamma::Algebra, gamma,
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TDiscLoop: public Module<DiscLoopPar>
|
||||
{
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
class Result: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
|
||||
Gamma::Algebra, gamma,
|
||||
std::vector<Complex>, corr);
|
||||
};
|
||||
public:
|
||||
// constructor
|
||||
TDiscLoop(const std::string name);
|
||||
// destructor
|
||||
virtual ~TDiscLoop(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction);
|
||||
|
||||
/******************************************************************************
|
||||
* TDiscLoop implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
TDiscLoop<FImpl>::TDiscLoop(const std::string name)
|
||||
: Module<DiscLoopPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TDiscLoop<FImpl>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q_loop};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TDiscLoop<FImpl>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TDiscLoop<FImpl>::setup(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TDiscLoop<FImpl>::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing disconnected loop contraction '" << getName()
|
||||
<< "' using '" << par().q_loop << "' with " << par().gamma
|
||||
<< " insertion." << std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
PropagatorField &q_loop = *env().template getObject<PropagatorField>(par().q_loop);
|
||||
LatticeComplex c(env().getGrid());
|
||||
Gamma gamma(par().gamma);
|
||||
std::vector<TComplex> buf;
|
||||
Result result;
|
||||
|
||||
c = trace(gamma*q_loop);
|
||||
sliceSum(c, buf, Tp);
|
||||
|
||||
result.gamma = par().gamma;
|
||||
result.corr.resize(buf.size());
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
{
|
||||
result.corr[t] = TensorRemove(buf[t]);
|
||||
}
|
||||
|
||||
write(writer, "disc", result);
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_DiscLoop_hpp_
|
170
extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
Normal file
170
extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
Normal file
@ -0,0 +1,170 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_Gamma3pt_hpp_
|
||||
#define Hadrons_MContraction_Gamma3pt_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/*
|
||||
* 3pt contraction with gamma matrix insertion.
|
||||
*
|
||||
* Schematic:
|
||||
*
|
||||
* q2 q3
|
||||
* /----<------*------<----¬
|
||||
* / gamma \
|
||||
* / \
|
||||
* i * * f
|
||||
* \ /
|
||||
* \ /
|
||||
* \----------->----------/
|
||||
* q1
|
||||
*
|
||||
* trace(g5*q1*adj(q2)*g5*gamma*q3)
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
* Gamma3pt *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
class Gamma3ptPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar,
|
||||
std::string, q1,
|
||||
std::string, q2,
|
||||
std::string, q3,
|
||||
Gamma::Algebra, gamma,
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
class TGamma3pt: public Module<Gamma3ptPar>
|
||||
{
|
||||
FERM_TYPE_ALIASES(FImpl1, 1);
|
||||
FERM_TYPE_ALIASES(FImpl2, 2);
|
||||
FERM_TYPE_ALIASES(FImpl3, 3);
|
||||
class Result: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
|
||||
Gamma::Algebra, gamma,
|
||||
std::vector<Complex>, corr);
|
||||
};
|
||||
public:
|
||||
// constructor
|
||||
TGamma3pt(const std::string name);
|
||||
// destructor
|
||||
virtual ~TGamma3pt(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction);
|
||||
|
||||
/******************************************************************************
|
||||
* TGamma3pt implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name)
|
||||
: Module<Gamma3ptPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q1, par().q2, par().q3};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing 3pt contractions '" << getName() << "' using"
|
||||
<< " quarks '" << par().q1 << "', '" << par().q2 << "' and '"
|
||||
<< par().q3 << "', with " << par().gamma << " insertion."
|
||||
<< std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
|
||||
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
|
||||
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q3);
|
||||
LatticeComplex c(env().getGrid());
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
Gamma gamma(par().gamma);
|
||||
std::vector<TComplex> buf;
|
||||
Result result;
|
||||
|
||||
c = trace(g5*q1*adj(q2)*(g5*gamma)*q3);
|
||||
sliceSum(c, buf, Tp);
|
||||
|
||||
result.gamma = par().gamma;
|
||||
result.corr.resize(buf.size());
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
{
|
||||
result.corr[t] = TensorRemove(buf[t]);
|
||||
}
|
||||
|
||||
write(writer, "gamma3pt", result);
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_Gamma3pt_hpp_
|
@ -6,8 +6,10 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -27,8 +29,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_Meson_hpp_
|
||||
#define Hadrons_Meson_hpp_
|
||||
#ifndef Hadrons_MContraction_Meson_hpp_
|
||||
#define Hadrons_MContraction_Meson_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -36,32 +38,56 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/*
|
||||
|
||||
Meson contractions
|
||||
-----------------------------
|
||||
|
||||
* options:
|
||||
- q1: input propagator 1 (string)
|
||||
- q2: input propagator 2 (string)
|
||||
- gammas: gamma products to insert at sink & source, pairs of gamma matrices
|
||||
(space-separated strings) in angled brackets (i.e. <g_sink g_src>),
|
||||
in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>").
|
||||
|
||||
Special values: "all" - perform all possible contractions.
|
||||
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."),
|
||||
given as multiples of (2*pi) / L.
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
* TMeson *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair;
|
||||
|
||||
class MesonPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar,
|
||||
std::string, q1,
|
||||
std::string, q2,
|
||||
std::string, output,
|
||||
Gamma::Algebra, gammaSource,
|
||||
Gamma::Algebra, gammaSink);
|
||||
std::string, q1,
|
||||
std::string, q2,
|
||||
std::string, gammas,
|
||||
std::string, sink,
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
template <typename FImpl1, typename FImpl2>
|
||||
class TMeson: public Module<MesonPar>
|
||||
{
|
||||
public:
|
||||
TYPE_ALIASES(FImpl1, 1);
|
||||
TYPE_ALIASES(FImpl2, 2);
|
||||
FERM_TYPE_ALIASES(FImpl1, 1);
|
||||
FERM_TYPE_ALIASES(FImpl2, 2);
|
||||
FERM_TYPE_ALIASES(ScalarImplCR, Scalar);
|
||||
SINK_TYPE_ALIASES(Scalar);
|
||||
class Result: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr);
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
|
||||
Gamma::Algebra, gamma_snk,
|
||||
Gamma::Algebra, gamma_src,
|
||||
std::vector<Complex>, corr);
|
||||
};
|
||||
public:
|
||||
// constructor
|
||||
@ -71,6 +97,7 @@ public:
|
||||
// dependencies/products
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
virtual void parseGammaString(std::vector<GammaPair> &gammaList);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
@ -90,7 +117,7 @@ TMeson<FImpl1, FImpl2>::TMeson(const std::string name)
|
||||
template <typename FImpl1, typename FImpl2>
|
||||
std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> input = {par().q1, par().q2};
|
||||
std::vector<std::string> input = {par().q1, par().q2, par().sink};
|
||||
|
||||
return input;
|
||||
}
|
||||
@ -103,7 +130,35 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void)
|
||||
return output;
|
||||
}
|
||||
|
||||
template <typename FImpl1, typename FImpl2>
|
||||
void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList)
|
||||
{
|
||||
gammaList.clear();
|
||||
// Determine gamma matrices to insert at source/sink.
|
||||
if (par().gammas.compare("all") == 0)
|
||||
{
|
||||
// Do all contractions.
|
||||
for (unsigned int i = 1; i < Gamma::nGamma; i += 2)
|
||||
{
|
||||
for (unsigned int j = 1; j < Gamma::nGamma; j += 2)
|
||||
{
|
||||
gammaList.push_back(std::make_pair((Gamma::Algebra)i,
|
||||
(Gamma::Algebra)j));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Parse individual contractions from input string.
|
||||
gammaList = strToVec<GammaPair>(par().gammas);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
#define mesonConnected(q1, q2, gSnk, gSrc) \
|
||||
(g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2)
|
||||
|
||||
template <typename FImpl1, typename FImpl2>
|
||||
void TMeson<FImpl1, FImpl2>::execute(void)
|
||||
{
|
||||
@ -111,21 +166,73 @@ void TMeson<FImpl1, FImpl2>::execute(void)
|
||||
<< " quarks '" << par().q1 << "' and '" << par().q2 << "'"
|
||||
<< std::endl;
|
||||
|
||||
XmlWriter writer(par().output);
|
||||
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
|
||||
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
|
||||
LatticeComplex c(env().getGrid());
|
||||
Gamma gSrc(par().gammaSource), gSnk(par().gammaSink);
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
std::vector<TComplex> buf;
|
||||
Result result;
|
||||
CorrWriter writer(par().output);
|
||||
std::vector<TComplex> buf;
|
||||
std::vector<Result> result;
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
std::vector<GammaPair> gammaList;
|
||||
int nt = env().getDim(Tp);
|
||||
|
||||
c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5);
|
||||
sliceSum(c, buf, Tp);
|
||||
result.corr.resize(buf.size());
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
parseGammaString(gammaList);
|
||||
result.resize(gammaList.size());
|
||||
for (unsigned int i = 0; i < result.size(); ++i)
|
||||
{
|
||||
result.corr[t] = TensorRemove(buf[t]);
|
||||
result[i].gamma_snk = gammaList[i].first;
|
||||
result[i].gamma_src = gammaList[i].second;
|
||||
result[i].corr.resize(nt);
|
||||
}
|
||||
if (env().template isObjectOfType<SlicedPropagator1>(par().q1) and
|
||||
env().template isObjectOfType<SlicedPropagator2>(par().q2))
|
||||
{
|
||||
SlicedPropagator1 &q1 = *env().template getObject<SlicedPropagator1>(par().q1);
|
||||
SlicedPropagator2 &q2 = *env().template getObject<SlicedPropagator2>(par().q2);
|
||||
|
||||
LOG(Message) << "(propagator already sinked)" << std::endl;
|
||||
for (unsigned int i = 0; i < result.size(); ++i)
|
||||
{
|
||||
Gamma gSnk(gammaList[i].first);
|
||||
Gamma gSrc(gammaList[i].second);
|
||||
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
{
|
||||
result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc)));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
|
||||
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
|
||||
LatticeComplex c(env().getGrid());
|
||||
|
||||
LOG(Message) << "(using sink '" << par().sink << "')" << std::endl;
|
||||
for (unsigned int i = 0; i < result.size(); ++i)
|
||||
{
|
||||
Gamma gSnk(gammaList[i].first);
|
||||
Gamma gSrc(gammaList[i].second);
|
||||
std::string ns;
|
||||
|
||||
ns = env().getModuleNamespace(env().getObjectModule(par().sink));
|
||||
if (ns == "MSource")
|
||||
{
|
||||
PropagatorField1 &sink =
|
||||
*env().template getObject<PropagatorField1>(par().sink);
|
||||
|
||||
c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink);
|
||||
sliceSum(c, buf, Tp);
|
||||
}
|
||||
else if (ns == "MSink")
|
||||
{
|
||||
SinkFnScalar &sink = *env().template getObject<SinkFnScalar>(par().sink);
|
||||
|
||||
c = trace(mesonConnected(q1, q2, gSnk, gSrc));
|
||||
buf = sink(c);
|
||||
}
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
{
|
||||
result[i].corr[t] = TensorRemove(buf[t]);
|
||||
}
|
||||
}
|
||||
}
|
||||
write(writer, "meson", result);
|
||||
}
|
||||
@ -134,4 +241,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Meson_hpp_
|
||||
#endif // Hadrons_MContraction_Meson_hpp_
|
||||
|
114
extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
Normal file
114
extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
Normal file
@ -0,0 +1,114 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_WeakHamiltonian_hpp_
|
||||
#define Hadrons_MContraction_WeakHamiltonian_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* WeakHamiltonian *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
/*******************************************************************************
|
||||
* Utilities for contractions involving the Weak Hamiltonian.
|
||||
******************************************************************************/
|
||||
//// Sum and store correlator.
|
||||
#define MAKE_DIAG(exp, buf, res, n)\
|
||||
sliceSum(exp, buf, Tp);\
|
||||
res.name = (n);\
|
||||
res.corr.resize(buf.size());\
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)\
|
||||
{\
|
||||
res.corr[t] = TensorRemove(buf[t]);\
|
||||
}
|
||||
|
||||
//// Contraction of mu index: use 'mu' variable in exp.
|
||||
#define SUM_MU(buf,exp)\
|
||||
buf = zero;\
|
||||
for (unsigned int mu = 0; mu < ndim; ++mu)\
|
||||
{\
|
||||
buf += exp;\
|
||||
}
|
||||
|
||||
enum
|
||||
{
|
||||
i_V = 0,
|
||||
i_A = 1,
|
||||
n_i = 2
|
||||
};
|
||||
|
||||
class WeakHamiltonianPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar,
|
||||
std::string, q1,
|
||||
std::string, q2,
|
||||
std::string, q3,
|
||||
std::string, q4,
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
#define MAKE_WEAK_MODULE(modname)\
|
||||
class T##modname: public Module<WeakHamiltonianPar>\
|
||||
{\
|
||||
public:\
|
||||
FERM_TYPE_ALIASES(FIMPL,)\
|
||||
class Result: Serializable\
|
||||
{\
|
||||
public:\
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\
|
||||
std::string, name,\
|
||||
std::vector<Complex>, corr);\
|
||||
};\
|
||||
public:\
|
||||
/* constructor */ \
|
||||
T##modname(const std::string name);\
|
||||
/* destructor */ \
|
||||
virtual ~T##modname(void) = default;\
|
||||
/* dependency relation */ \
|
||||
virtual std::vector<std::string> getInput(void);\
|
||||
virtual std::vector<std::string> getOutput(void);\
|
||||
/* setup */ \
|
||||
virtual void setup(void);\
|
||||
/* execution */ \
|
||||
virtual void execute(void);\
|
||||
std::vector<std::string> VA_label = {"V", "A"};\
|
||||
};\
|
||||
MODULE_REGISTER_NS(modname, T##modname, MContraction);
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_WeakHamiltonian_hpp_
|
137
extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
Normal file
137
extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
Normal file
@ -0,0 +1,137 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MContraction;
|
||||
|
||||
/*
|
||||
* Weak Hamiltonian current-current contractions, Eye-type.
|
||||
*
|
||||
* These contractions are generated by the Q1 and Q2 operators in the physical
|
||||
* basis (see e.g. Fig 3 of arXiv:1507.03094).
|
||||
*
|
||||
* Schematics: q4 |
|
||||
* /-<-¬ |
|
||||
* / \ | q2 q3
|
||||
* \ / | /----<------*------<----¬
|
||||
* q2 \ / q3 | / /-*-¬ \
|
||||
* /-----<-----* *-----<----¬ | / / \ \
|
||||
* i * H_W * f | i * \ / q4 * f
|
||||
* \ / | \ \->-/ /
|
||||
* \ / | \ /
|
||||
* \---------->---------/ | \----------->----------/
|
||||
* q1 | q1
|
||||
* |
|
||||
* Saucer (S) | Eye (E)
|
||||
*
|
||||
* S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2])
|
||||
* E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2])
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
* TWeakHamiltonianEye implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name)
|
||||
: Module<WeakHamiltonianPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TWeakHamiltonianEye::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TWeakHamiltonianEye::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TWeakHamiltonianEye::setup(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TWeakHamiltonianEye::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"
|
||||
<< getName() << "' using quarks '" << par().q1 << "', '"
|
||||
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
|
||||
<< "'." << std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
|
||||
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
|
||||
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
|
||||
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
|
||||
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
|
||||
LatticeComplex expbuf(env().getGrid());
|
||||
std::vector<TComplex> corrbuf;
|
||||
std::vector<Result> result(n_eye_diag);
|
||||
unsigned int ndim = env().getNd();
|
||||
|
||||
PropagatorField tmp1(env().getGrid());
|
||||
LatticeComplex tmp2(env().getGrid());
|
||||
std::vector<PropagatorField> S_body(ndim, tmp1);
|
||||
std::vector<PropagatorField> S_loop(ndim, tmp1);
|
||||
std::vector<LatticeComplex> E_body(ndim, tmp2);
|
||||
std::vector<LatticeComplex> E_loop(ndim, tmp2);
|
||||
|
||||
// Setup for S-type contractions.
|
||||
for (int mu = 0; mu < ndim; ++mu)
|
||||
{
|
||||
S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu]));
|
||||
S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu]));
|
||||
}
|
||||
|
||||
// Perform S-type contractions.
|
||||
SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu]))
|
||||
MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S")
|
||||
|
||||
// Recycle sub-expressions for E-type contractions.
|
||||
for (unsigned int mu = 0; mu < ndim; ++mu)
|
||||
{
|
||||
E_body[mu] = trace(S_body[mu]);
|
||||
E_loop[mu] = trace(S_loop[mu]);
|
||||
}
|
||||
|
||||
// Perform E-type contractions.
|
||||
SUM_MU(expbuf, E_body[mu]*E_loop[mu])
|
||||
MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E")
|
||||
|
||||
write(writer, "HW_Eye", result);
|
||||
}
|
58
extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
Normal file
58
extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
Normal file
@ -0,0 +1,58 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_
|
||||
#define Hadrons_MContraction_WeakHamiltonianEye_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* WeakHamiltonianEye *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
enum
|
||||
{
|
||||
S_diag = 0,
|
||||
E_diag = 1,
|
||||
n_eye_diag = 2
|
||||
};
|
||||
|
||||
// Saucer and Eye subdiagram contractions.
|
||||
#define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma)
|
||||
#define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma)
|
||||
|
||||
MAKE_WEAK_MODULE(WeakHamiltonianEye)
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_
|
139
extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
Normal file
139
extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
Normal file
@ -0,0 +1,139 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MContraction;
|
||||
|
||||
/*
|
||||
* Weak Hamiltonian current-current contractions, Non-Eye-type.
|
||||
*
|
||||
* These contractions are generated by the Q1 and Q2 operators in the physical
|
||||
* basis (see e.g. Fig 3 of arXiv:1507.03094).
|
||||
*
|
||||
* Schematic:
|
||||
* q2 q3 | q2 q3
|
||||
* /--<--¬ /--<--¬ | /--<--¬ /--<--¬
|
||||
* / \ / \ | / \ / \
|
||||
* / \ / \ | / \ / \
|
||||
* / \ / \ | / \ / \
|
||||
* i * * H_W * f | i * * * H_W * f
|
||||
* \ * | | \ / \ /
|
||||
* \ / \ / | \ / \ /
|
||||
* \ / \ / | \ / \ /
|
||||
* \ / \ / | \-->--/ \-->--/
|
||||
* \-->--/ \-->--/ | q1 q4
|
||||
* q1 q4 |
|
||||
* Connected (C) | Wing (W)
|
||||
*
|
||||
* C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu])
|
||||
* W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu])
|
||||
*
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
* TWeakHamiltonianNonEye implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name)
|
||||
: Module<WeakHamiltonianPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TWeakHamiltonianNonEye::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TWeakHamiltonianNonEye::setup(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TWeakHamiltonianNonEye::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"
|
||||
<< getName() << "' using quarks '" << par().q1 << "', '"
|
||||
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
|
||||
<< "'." << std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
|
||||
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
|
||||
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
|
||||
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
|
||||
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
|
||||
LatticeComplex expbuf(env().getGrid());
|
||||
std::vector<TComplex> corrbuf;
|
||||
std::vector<Result> result(n_noneye_diag);
|
||||
unsigned int ndim = env().getNd();
|
||||
|
||||
PropagatorField tmp1(env().getGrid());
|
||||
LatticeComplex tmp2(env().getGrid());
|
||||
std::vector<PropagatorField> C_i_side_loop(ndim, tmp1);
|
||||
std::vector<PropagatorField> C_f_side_loop(ndim, tmp1);
|
||||
std::vector<LatticeComplex> W_i_side_loop(ndim, tmp2);
|
||||
std::vector<LatticeComplex> W_f_side_loop(ndim, tmp2);
|
||||
|
||||
// Setup for C-type contractions.
|
||||
for (int mu = 0; mu < ndim; ++mu)
|
||||
{
|
||||
C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu]));
|
||||
C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu]));
|
||||
}
|
||||
|
||||
// Perform C-type contractions.
|
||||
SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu]))
|
||||
MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C")
|
||||
|
||||
// Recycle sub-expressions for W-type contractions.
|
||||
for (unsigned int mu = 0; mu < ndim; ++mu)
|
||||
{
|
||||
W_i_side_loop[mu] = trace(C_i_side_loop[mu]);
|
||||
W_f_side_loop[mu] = trace(C_f_side_loop[mu]);
|
||||
}
|
||||
|
||||
// Perform W-type contractions.
|
||||
SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu])
|
||||
MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W")
|
||||
|
||||
write(writer, "HW_NonEye", result);
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_
|
||||
#define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* WeakHamiltonianNonEye *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
enum
|
||||
{
|
||||
W_diag = 0,
|
||||
C_diag = 1,
|
||||
n_noneye_diag = 2
|
||||
};
|
||||
|
||||
// Wing and Connected subdiagram contractions
|
||||
#define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
|
||||
|
||||
MAKE_WEAK_MODULE(WeakHamiltonianNonEye)
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_
|
135
extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
Normal file
135
extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
Normal file
@ -0,0 +1,135 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MContraction;
|
||||
|
||||
/*
|
||||
* Weak Hamiltonian + current contractions, disconnected topology for neutral
|
||||
* mesons.
|
||||
*
|
||||
* These contractions are generated by operators Q_1,...,10 of the dS=1 Weak
|
||||
* Hamiltonian in the physical basis and an additional current J (see e.g.
|
||||
* Fig 11 of arXiv:1507.03094).
|
||||
*
|
||||
* Schematic:
|
||||
*
|
||||
* q2 q4 q3
|
||||
* /--<--¬ /---<--¬ /---<--¬
|
||||
* / \ / \ / \
|
||||
* i * * H_W | J * * f
|
||||
* \ / \ / \ /
|
||||
* \--->---/ \-------/ \------/
|
||||
* q1
|
||||
*
|
||||
* options
|
||||
* - q1: input propagator 1 (string)
|
||||
* - q2: input propagator 2 (string)
|
||||
* - q3: input propagator 3 (string), assumed to be sequential propagator
|
||||
* - q4: input propagator 4 (string), assumed to be a loop
|
||||
*
|
||||
* type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5)
|
||||
* type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5)
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
* TWeakNeutral4ptDisc implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name)
|
||||
: Module<WeakHamiltonianPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TWeakNeutral4ptDisc::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TWeakNeutral4ptDisc::setup(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TWeakNeutral4ptDisc::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"
|
||||
<< getName() << "' using quarks '" << par().q1 << "', '"
|
||||
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
|
||||
<< "'." << std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
|
||||
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
|
||||
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
|
||||
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
|
||||
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
|
||||
LatticeComplex expbuf(env().getGrid());
|
||||
std::vector<TComplex> corrbuf;
|
||||
std::vector<Result> result(n_neut_disc_diag);
|
||||
unsigned int ndim = env().getNd();
|
||||
|
||||
PropagatorField tmp(env().getGrid());
|
||||
std::vector<PropagatorField> meson(ndim, tmp);
|
||||
std::vector<PropagatorField> loop(ndim, tmp);
|
||||
LatticeComplex curr(env().getGrid());
|
||||
|
||||
// Setup for type 1 contractions.
|
||||
for (int mu = 0; mu < ndim; ++mu)
|
||||
{
|
||||
meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu]));
|
||||
loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu]));
|
||||
}
|
||||
curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5));
|
||||
|
||||
// Perform type 1 contractions.
|
||||
SUM_MU(expbuf, trace(meson[mu]*loop[mu]))
|
||||
expbuf *= curr;
|
||||
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1")
|
||||
|
||||
// Perform type 2 contractions.
|
||||
SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu]))
|
||||
expbuf *= curr;
|
||||
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2")
|
||||
|
||||
write(writer, "HW_disc0", result);
|
||||
}
|
59
extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
Normal file
59
extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
Normal file
@ -0,0 +1,59 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_
|
||||
#define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* WeakNeutral4ptDisc *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
enum
|
||||
{
|
||||
neut_disc_1_diag = 0,
|
||||
neut_disc_2_diag = 1,
|
||||
n_neut_disc_diag = 2
|
||||
};
|
||||
|
||||
// Neutral 4pt disconnected subdiagram contractions.
|
||||
#define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
|
||||
#define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma)
|
||||
#define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma))
|
||||
|
||||
MAKE_WEAK_MODULE(WeakNeutral4ptDisc)
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_
|
@ -1,34 +1,5 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/Quark.hpp
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_Quark_hpp_
|
||||
#define Hadrons_Quark_hpp_
|
||||
#ifndef Hadrons_MFermion_GaugeProp_hpp_
|
||||
#define Hadrons_MFermion_GaugeProp_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -37,27 +8,29 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* TQuark *
|
||||
* GaugeProp *
|
||||
******************************************************************************/
|
||||
class QuarkPar: Serializable
|
||||
BEGIN_MODULE_NAMESPACE(MFermion)
|
||||
|
||||
class GaugePropPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar,
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(GaugePropPar,
|
||||
std::string, source,
|
||||
std::string, solver);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TQuark: public Module<QuarkPar>
|
||||
class TGaugeProp: public Module<GaugePropPar>
|
||||
{
|
||||
public:
|
||||
TYPE_ALIASES(FImpl,);
|
||||
FGS_TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TQuark(const std::string name);
|
||||
TGaugeProp(const std::string name);
|
||||
// destructor
|
||||
virtual ~TQuark(void) = default;
|
||||
// dependencies/products
|
||||
virtual ~TGaugeProp(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
@ -69,20 +42,20 @@ private:
|
||||
SolverFn *solver_{nullptr};
|
||||
};
|
||||
|
||||
MODULE_REGISTER(Quark, TQuark<FIMPL>);
|
||||
MODULE_REGISTER_NS(GaugeProp, TGaugeProp<FIMPL>, MFermion);
|
||||
|
||||
/******************************************************************************
|
||||
* TQuark implementation *
|
||||
* TGaugeProp implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
TQuark<FImpl>::TQuark(const std::string name)
|
||||
: Module(name)
|
||||
TGaugeProp<FImpl>::TGaugeProp(const std::string name)
|
||||
: Module<GaugePropPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TQuark<FImpl>::getInput(void)
|
||||
std::vector<std::string> TGaugeProp<FImpl>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().source, par().solver};
|
||||
|
||||
@ -90,7 +63,7 @@ std::vector<std::string> TQuark<FImpl>::getInput(void)
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TQuark<FImpl>::getOutput(void)
|
||||
std::vector<std::string> TGaugeProp<FImpl>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName(), getName() + "_5d"};
|
||||
|
||||
@ -99,7 +72,7 @@ std::vector<std::string> TQuark<FImpl>::getOutput(void)
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TQuark<FImpl>::setup(void)
|
||||
void TGaugeProp<FImpl>::setup(void)
|
||||
{
|
||||
Ls_ = env().getObjectLs(par().solver);
|
||||
env().template registerLattice<PropagatorField>(getName());
|
||||
@ -111,13 +84,13 @@ void TQuark<FImpl>::setup(void)
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TQuark<FImpl>::execute(void)
|
||||
void TGaugeProp<FImpl>::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing quark propagator '" << getName() << "'"
|
||||
<< std::endl;
|
||||
<< std::endl;
|
||||
|
||||
FermionField source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)),
|
||||
tmp(env().getGrid());
|
||||
tmp(env().getGrid());
|
||||
std::string propName = (Ls_ == 1) ? getName() : (getName() + "_5d");
|
||||
PropagatorField &prop = *env().template createLattice<PropagatorField>(propName);
|
||||
PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source);
|
||||
@ -128,7 +101,7 @@ void TQuark<FImpl>::execute(void)
|
||||
}
|
||||
|
||||
LOG(Message) << "Inverting using solver '" << par().solver
|
||||
<< "' on source '" << par().source << "'" << std::endl;
|
||||
<< "' on source '" << par().source << "'" << std::endl;
|
||||
for (unsigned int s = 0; s < Ns; ++s)
|
||||
for (unsigned int c = 0; c < Nc; ++c)
|
||||
{
|
||||
@ -170,16 +143,18 @@ void TQuark<FImpl>::execute(void)
|
||||
if (Ls_ > 1)
|
||||
{
|
||||
PropagatorField &p4d =
|
||||
*env().template getObject<PropagatorField>(getName());
|
||||
*env().template getObject<PropagatorField>(getName());
|
||||
|
||||
axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0);
|
||||
axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1);
|
||||
axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1);
|
||||
ExtractSlice(tmp, sol, 0, 0);
|
||||
FermToProp(p4d, tmp, s, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Quark_hpp_
|
||||
#endif // Hadrons_MFermion_GaugeProp_hpp_
|
@ -65,7 +65,7 @@ void TLoad::setup(void)
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TLoad::execute(void)
|
||||
{
|
||||
NerscField header;
|
||||
FieldMetaData header;
|
||||
std::string fileName = par().file + "."
|
||||
+ std::to_string(env().getTrajectory());
|
||||
|
||||
@ -74,5 +74,5 @@ void TLoad::execute(void)
|
||||
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
|
||||
NerscIO::readConfiguration(U, header, fileName);
|
||||
LOG(Message) << "NERSC header:" << std::endl;
|
||||
dump_nersc_header(header, LOG(Message));
|
||||
dump_meta_data(header, LOG(Message));
|
||||
}
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_Load_hpp_
|
||||
#define Hadrons_Load_hpp_
|
||||
#ifndef Hadrons_MGauge_Load_hpp_
|
||||
#define Hadrons_MGauge_Load_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -70,4 +70,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Load_hpp_
|
||||
#endif // Hadrons_MGauge_Load_hpp_
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_Random_hpp_
|
||||
#define Hadrons_Random_hpp_
|
||||
#ifndef Hadrons_MGauge_Random_hpp_
|
||||
#define Hadrons_MGauge_Random_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -63,4 +63,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Random_hpp_
|
||||
#endif // Hadrons_MGauge_Random_hpp_
|
||||
|
88
extras/Hadrons/Modules/MGauge/StochEm.cc
Normal file
88
extras/Hadrons/Modules/MGauge/StochEm.cc
Normal file
@ -0,0 +1,88 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MGauge/StochEm.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Hadrons/Modules/MGauge/StochEm.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MGauge;
|
||||
|
||||
/******************************************************************************
|
||||
* TStochEm implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TStochEm::TStochEm(const std::string name)
|
||||
: Module<StochEmPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TStochEm::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in;
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TStochEm::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TStochEm::setup(void)
|
||||
{
|
||||
if (!env().hasRegisteredObject("_" + getName() + "_weight"))
|
||||
{
|
||||
env().registerLattice<EmComp>("_" + getName() + "_weight");
|
||||
}
|
||||
env().registerLattice<EmField>(getName());
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TStochEm::execute(void)
|
||||
{
|
||||
PhotonR photon(par().gauge, par().zmScheme);
|
||||
EmField &a = *env().createLattice<EmField>(getName());
|
||||
EmComp *w;
|
||||
|
||||
if (!env().hasCreatedObject("_" + getName() + "_weight"))
|
||||
{
|
||||
LOG(Message) << "Caching stochatic EM potential weight (gauge: "
|
||||
<< par().gauge << ", zero-mode scheme: "
|
||||
<< par().zmScheme << ")..." << std::endl;
|
||||
w = env().createLattice<EmComp>("_" + getName() + "_weight");
|
||||
photon.StochasticWeight(*w);
|
||||
}
|
||||
else
|
||||
{
|
||||
w = env().getObject<EmComp>("_" + getName() + "_weight");
|
||||
}
|
||||
LOG(Message) << "Generating stochatic EM potential..." << std::endl;
|
||||
photon.StochasticField(a, *env().get4dRng(), *w);
|
||||
}
|
75
extras/Hadrons/Modules/MGauge/StochEm.hpp
Normal file
75
extras/Hadrons/Modules/MGauge/StochEm.hpp
Normal file
@ -0,0 +1,75 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef Hadrons_MGauge_StochEm_hpp_
|
||||
#define Hadrons_MGauge_StochEm_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* StochEm *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MGauge)
|
||||
|
||||
class StochEmPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar,
|
||||
PhotonR::Gauge, gauge,
|
||||
PhotonR::ZmScheme, zmScheme);
|
||||
};
|
||||
|
||||
class TStochEm: public Module<StochEmPar>
|
||||
{
|
||||
public:
|
||||
typedef PhotonR::GaugeField EmField;
|
||||
typedef PhotonR::GaugeLinkField EmComp;
|
||||
public:
|
||||
// constructor
|
||||
TStochEm(const std::string name);
|
||||
// destructor
|
||||
virtual ~TStochEm(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(StochEm, TStochEm, MGauge);
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MGauge_StochEm_hpp_
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_Unit_hpp_
|
||||
#define Hadrons_Unit_hpp_
|
||||
#ifndef Hadrons_MGauge_Unit_hpp_
|
||||
#define Hadrons_MGauge_Unit_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -63,4 +63,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Unit_hpp_
|
||||
#endif // Hadrons_MGauge_Unit_hpp_
|
||||
|
132
extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
Normal file
132
extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
Normal file
@ -0,0 +1,132 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MLoop_NoiseLoop_hpp_
|
||||
#define Hadrons_MLoop_NoiseLoop_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/*
|
||||
|
||||
Noise loop propagator
|
||||
-----------------------------
|
||||
* loop_x = q_x * adj(eta_x)
|
||||
|
||||
* options:
|
||||
- q = Result of inversion on noise source.
|
||||
- eta = noise source.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
* NoiseLoop *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MLoop)
|
||||
|
||||
class NoiseLoopPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar,
|
||||
std::string, q,
|
||||
std::string, eta);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TNoiseLoop: public Module<NoiseLoopPar>
|
||||
{
|
||||
public:
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TNoiseLoop(const std::string name);
|
||||
// destructor
|
||||
virtual ~TNoiseLoop(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop);
|
||||
|
||||
/******************************************************************************
|
||||
* TNoiseLoop implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
TNoiseLoop<FImpl>::TNoiseLoop(const std::string name)
|
||||
: Module<NoiseLoopPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TNoiseLoop<FImpl>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q, par().eta};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TNoiseLoop<FImpl>::setup(void)
|
||||
{
|
||||
env().template registerLattice<PropagatorField>(getName());
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TNoiseLoop<FImpl>::execute(void)
|
||||
{
|
||||
PropagatorField &loop = *env().template createLattice<PropagatorField>(getName());
|
||||
PropagatorField &q = *env().template getObject<PropagatorField>(par().q);
|
||||
PropagatorField &eta = *env().template getObject<PropagatorField>(par().eta);
|
||||
loop = q*adj(eta);
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MLoop_NoiseLoop_hpp_
|
226
extras/Hadrons/Modules/MScalar/ChargedProp.cc
Normal file
226
extras/Hadrons/Modules/MScalar/ChargedProp.cc
Normal file
@ -0,0 +1,226 @@
|
||||
#include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp>
|
||||
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MScalar;
|
||||
|
||||
/******************************************************************************
|
||||
* TChargedProp implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TChargedProp::TChargedProp(const std::string name)
|
||||
: Module<ChargedPropPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TChargedProp::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().source, par().emField};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TChargedProp::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TChargedProp::setup(void)
|
||||
{
|
||||
freeMomPropName_ = FREEMOMPROP(par().mass);
|
||||
phaseName_.clear();
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
phaseName_.push_back("_shiftphase_" + std::to_string(mu));
|
||||
}
|
||||
GFSrcName_ = "_" + getName() + "_DinvSrc";
|
||||
if (!env().hasRegisteredObject(freeMomPropName_))
|
||||
{
|
||||
env().registerLattice<ScalarField>(freeMomPropName_);
|
||||
}
|
||||
if (!env().hasRegisteredObject(phaseName_[0]))
|
||||
{
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
env().registerLattice<ScalarField>(phaseName_[mu]);
|
||||
}
|
||||
}
|
||||
if (!env().hasRegisteredObject(GFSrcName_))
|
||||
{
|
||||
env().registerLattice<ScalarField>(GFSrcName_);
|
||||
}
|
||||
env().registerLattice<ScalarField>(getName());
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TChargedProp::execute(void)
|
||||
{
|
||||
// CACHING ANALYTIC EXPRESSIONS
|
||||
ScalarField &source = *env().getObject<ScalarField>(par().source);
|
||||
Complex ci(0.0,1.0);
|
||||
FFT fft(env().getGrid());
|
||||
|
||||
// cache free scalar propagator
|
||||
if (!env().hasCreatedObject(freeMomPropName_))
|
||||
{
|
||||
LOG(Message) << "Caching momentum space free scalar propagator"
|
||||
<< " (mass= " << par().mass << ")..." << std::endl;
|
||||
freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_);
|
||||
SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass);
|
||||
}
|
||||
else
|
||||
{
|
||||
freeMomProp_ = env().getObject<ScalarField>(freeMomPropName_);
|
||||
}
|
||||
// cache G*F*src
|
||||
if (!env().hasCreatedObject(GFSrcName_))
|
||||
|
||||
{
|
||||
GFSrc_ = env().createLattice<ScalarField>(GFSrcName_);
|
||||
fft.FFT_all_dim(*GFSrc_, source, FFT::forward);
|
||||
*GFSrc_ = (*freeMomProp_)*(*GFSrc_);
|
||||
}
|
||||
else
|
||||
{
|
||||
GFSrc_ = env().getObject<ScalarField>(GFSrcName_);
|
||||
}
|
||||
// cache phases
|
||||
if (!env().hasCreatedObject(phaseName_[0]))
|
||||
{
|
||||
std::vector<int> &l = env().getGrid()->_fdimensions;
|
||||
|
||||
LOG(Message) << "Caching shift phases..." << std::endl;
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
Real twoPiL = M_PI*2./l[mu];
|
||||
|
||||
phase_.push_back(env().createLattice<ScalarField>(phaseName_[mu]));
|
||||
LatticeCoordinate(*(phase_[mu]), mu);
|
||||
*(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu])));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
phase_.push_back(env().getObject<ScalarField>(phaseName_[mu]));
|
||||
}
|
||||
}
|
||||
|
||||
// PROPAGATOR CALCULATION
|
||||
LOG(Message) << "Computing charged scalar propagator"
|
||||
<< " (mass= " << par().mass
|
||||
<< ", charge= " << par().charge << ")..." << std::endl;
|
||||
|
||||
ScalarField &prop = *env().createLattice<ScalarField>(getName());
|
||||
ScalarField buf(env().getGrid());
|
||||
ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_;
|
||||
double q = par().charge;
|
||||
|
||||
// G*F*Src
|
||||
prop = GFSrc;
|
||||
|
||||
// - q*G*momD1*G*F*Src (momD1 = F*D1*Finv)
|
||||
buf = GFSrc;
|
||||
momD1(buf, fft);
|
||||
buf = G*buf;
|
||||
prop = prop - q*buf;
|
||||
|
||||
// + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src)
|
||||
momD1(buf, fft);
|
||||
prop = prop + q*q*G*buf;
|
||||
|
||||
// - q^2*G*momD2*G*F*Src (momD2 = F*D2*Finv)
|
||||
buf = GFSrc;
|
||||
momD2(buf, fft);
|
||||
prop = prop - q*q*G*buf;
|
||||
|
||||
// final FT
|
||||
fft.FFT_all_dim(prop, prop, FFT::backward);
|
||||
|
||||
// OUTPUT IF NECESSARY
|
||||
if (!par().output.empty())
|
||||
{
|
||||
std::string filename = par().output + "." +
|
||||
std::to_string(env().getTrajectory());
|
||||
|
||||
LOG(Message) << "Saving zero-momentum projection to '"
|
||||
<< filename << "'..." << std::endl;
|
||||
|
||||
CorrWriter writer(filename);
|
||||
std::vector<TComplex> vecBuf;
|
||||
std::vector<Complex> result;
|
||||
|
||||
sliceSum(prop, vecBuf, Tp);
|
||||
result.resize(vecBuf.size());
|
||||
for (unsigned int t = 0; t < vecBuf.size(); ++t)
|
||||
{
|
||||
result[t] = TensorRemove(vecBuf[t]);
|
||||
}
|
||||
write(writer, "charge", q);
|
||||
write(writer, "prop", result);
|
||||
}
|
||||
}
|
||||
|
||||
void TChargedProp::momD1(ScalarField &s, FFT &fft)
|
||||
{
|
||||
EmField &A = *env().getObject<EmField>(par().emField);
|
||||
ScalarField buf(env().getGrid()), result(env().getGrid()),
|
||||
Amu(env().getGrid());
|
||||
Complex ci(0.0,1.0);
|
||||
|
||||
result = zero;
|
||||
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
Amu = peekLorentz(A, mu);
|
||||
buf = (*phase_[mu])*s;
|
||||
fft.FFT_all_dim(buf, buf, FFT::backward);
|
||||
buf = Amu*buf;
|
||||
fft.FFT_all_dim(buf, buf, FFT::forward);
|
||||
result = result - ci*buf;
|
||||
}
|
||||
fft.FFT_all_dim(s, s, FFT::backward);
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
Amu = peekLorentz(A, mu);
|
||||
buf = Amu*s;
|
||||
fft.FFT_all_dim(buf, buf, FFT::forward);
|
||||
result = result + ci*adj(*phase_[mu])*buf;
|
||||
}
|
||||
|
||||
s = result;
|
||||
}
|
||||
|
||||
void TChargedProp::momD2(ScalarField &s, FFT &fft)
|
||||
{
|
||||
EmField &A = *env().getObject<EmField>(par().emField);
|
||||
ScalarField buf(env().getGrid()), result(env().getGrid()),
|
||||
Amu(env().getGrid());
|
||||
|
||||
result = zero;
|
||||
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
Amu = peekLorentz(A, mu);
|
||||
buf = (*phase_[mu])*s;
|
||||
fft.FFT_all_dim(buf, buf, FFT::backward);
|
||||
buf = Amu*Amu*buf;
|
||||
fft.FFT_all_dim(buf, buf, FFT::forward);
|
||||
result = result + .5*buf;
|
||||
}
|
||||
fft.FFT_all_dim(s, s, FFT::backward);
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
Amu = peekLorentz(A, mu);
|
||||
buf = Amu*Amu*s;
|
||||
fft.FFT_all_dim(buf, buf, FFT::forward);
|
||||
result = result + .5*adj(*phase_[mu])*buf;
|
||||
}
|
||||
|
||||
s = result;
|
||||
}
|
61
extras/Hadrons/Modules/MScalar/ChargedProp.hpp
Normal file
61
extras/Hadrons/Modules/MScalar/ChargedProp.hpp
Normal file
@ -0,0 +1,61 @@
|
||||
#ifndef Hadrons_MScalar_ChargedProp_hpp_
|
||||
#define Hadrons_MScalar_ChargedProp_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* Charged scalar propagator *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MScalar)
|
||||
|
||||
class ChargedPropPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar,
|
||||
std::string, emField,
|
||||
std::string, source,
|
||||
double, mass,
|
||||
double, charge,
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
class TChargedProp: public Module<ChargedPropPar>
|
||||
{
|
||||
public:
|
||||
SCALAR_TYPE_ALIASES(SIMPL,);
|
||||
typedef PhotonR::GaugeField EmField;
|
||||
typedef PhotonR::GaugeLinkField EmComp;
|
||||
public:
|
||||
// constructor
|
||||
TChargedProp(const std::string name);
|
||||
// destructor
|
||||
virtual ~TChargedProp(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
private:
|
||||
void momD1(ScalarField &s, FFT &fft);
|
||||
void momD2(ScalarField &s, FFT &fft);
|
||||
private:
|
||||
std::string freeMomPropName_, GFSrcName_;
|
||||
std::vector<std::string> phaseName_;
|
||||
ScalarField *freeMomProp_, *GFSrc_;
|
||||
std::vector<ScalarField *> phase_;
|
||||
EmField *A;
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar);
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MScalar_ChargedProp_hpp_
|
79
extras/Hadrons/Modules/MScalar/FreeProp.cc
Normal file
79
extras/Hadrons/Modules/MScalar/FreeProp.cc
Normal file
@ -0,0 +1,79 @@
|
||||
#include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp>
|
||||
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MScalar;
|
||||
|
||||
/******************************************************************************
|
||||
* TFreeProp implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TFreeProp::TFreeProp(const std::string name)
|
||||
: Module<FreePropPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TFreeProp::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().source};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TFreeProp::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TFreeProp::setup(void)
|
||||
{
|
||||
freeMomPropName_ = FREEMOMPROP(par().mass);
|
||||
|
||||
if (!env().hasRegisteredObject(freeMomPropName_))
|
||||
{
|
||||
env().registerLattice<ScalarField>(freeMomPropName_);
|
||||
}
|
||||
env().registerLattice<ScalarField>(getName());
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TFreeProp::execute(void)
|
||||
{
|
||||
ScalarField &prop = *env().createLattice<ScalarField>(getName());
|
||||
ScalarField &source = *env().getObject<ScalarField>(par().source);
|
||||
ScalarField *freeMomProp;
|
||||
|
||||
if (!env().hasCreatedObject(freeMomPropName_))
|
||||
{
|
||||
LOG(Message) << "Caching momentum space free scalar propagator"
|
||||
<< " (mass= " << par().mass << ")..." << std::endl;
|
||||
freeMomProp = env().createLattice<ScalarField>(freeMomPropName_);
|
||||
SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass);
|
||||
}
|
||||
else
|
||||
{
|
||||
freeMomProp = env().getObject<ScalarField>(freeMomPropName_);
|
||||
}
|
||||
LOG(Message) << "Computing free scalar propagator..." << std::endl;
|
||||
SIMPL::FreePropagator(source, prop, *freeMomProp);
|
||||
|
||||
if (!par().output.empty())
|
||||
{
|
||||
TextWriter writer(par().output + "." +
|
||||
std::to_string(env().getTrajectory()));
|
||||
std::vector<TComplex> buf;
|
||||
std::vector<Complex> result;
|
||||
|
||||
sliceSum(prop, buf, Tp);
|
||||
result.resize(buf.size());
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
{
|
||||
result[t] = TensorRemove(buf[t]);
|
||||
}
|
||||
write(writer, "prop", result);
|
||||
}
|
||||
}
|
50
extras/Hadrons/Modules/MScalar/FreeProp.hpp
Normal file
50
extras/Hadrons/Modules/MScalar/FreeProp.hpp
Normal file
@ -0,0 +1,50 @@
|
||||
#ifndef Hadrons_MScalar_FreeProp_hpp_
|
||||
#define Hadrons_MScalar_FreeProp_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* FreeProp *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MScalar)
|
||||
|
||||
class FreePropPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar,
|
||||
std::string, source,
|
||||
double, mass,
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
class TFreeProp: public Module<FreePropPar>
|
||||
{
|
||||
public:
|
||||
SCALAR_TYPE_ALIASES(SIMPL,);
|
||||
public:
|
||||
// constructor
|
||||
TFreeProp(const std::string name);
|
||||
// destructor
|
||||
virtual ~TFreeProp(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
private:
|
||||
std::string freeMomPropName_;
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar);
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MScalar_FreeProp_hpp_
|
6
extras/Hadrons/Modules/MScalar/Scalar.hpp
Normal file
6
extras/Hadrons/Modules/MScalar/Scalar.hpp
Normal file
@ -0,0 +1,6 @@
|
||||
#ifndef Hadrons_Scalar_hpp_
|
||||
#define Hadrons_Scalar_hpp_
|
||||
|
||||
#define FREEMOMPROP(m) "_scalar_mom_prop_" + std::to_string(m)
|
||||
|
||||
#endif // Hadrons_Scalar_hpp_
|
114
extras/Hadrons/Modules/MSink/Point.hpp
Normal file
114
extras/Hadrons/Modules/MSink/Point.hpp
Normal file
@ -0,0 +1,114 @@
|
||||
#ifndef Hadrons_MSink_Point_hpp_
|
||||
#define Hadrons_MSink_Point_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* Point *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MSink)
|
||||
|
||||
class PointPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar,
|
||||
std::string, mom);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TPoint: public Module<PointPar>
|
||||
{
|
||||
public:
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
SINK_TYPE_ALIASES();
|
||||
public:
|
||||
// constructor
|
||||
TPoint(const std::string name);
|
||||
// destructor
|
||||
virtual ~TPoint(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSink);
|
||||
MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSink);
|
||||
|
||||
/******************************************************************************
|
||||
* TPoint implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
TPoint<FImpl>::TPoint(const std::string name)
|
||||
: Module<PointPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TPoint<FImpl>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in;
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TPoint<FImpl>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TPoint<FImpl>::setup(void)
|
||||
{
|
||||
unsigned int size;
|
||||
|
||||
size = env().template lattice4dSize<LatticeComplex>();
|
||||
env().registerObject(getName(), size);
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TPoint<FImpl>::execute(void)
|
||||
{
|
||||
std::vector<Real> p = strToVec<Real>(par().mom);
|
||||
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
|
||||
Complex i(0.0,1.0);
|
||||
|
||||
LOG(Message) << "Setting up point sink function for momentum ["
|
||||
<< par().mom << "]" << std::endl;
|
||||
ph = zero;
|
||||
for(unsigned int mu = 0; mu < env().getNd(); mu++)
|
||||
{
|
||||
LatticeCoordinate(coor, mu);
|
||||
ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor;
|
||||
}
|
||||
ph = exp((Real)(2*M_PI)*i*ph);
|
||||
auto sink = [ph](const PropagatorField &field)
|
||||
{
|
||||
SlicedPropagator res;
|
||||
PropagatorField tmp = ph*field;
|
||||
|
||||
sliceSum(tmp, res, Tp);
|
||||
|
||||
return res;
|
||||
};
|
||||
env().setObject(getName(), new SinkFn(sink));
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MSink_Point_hpp_
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_RBPrecCG_hpp_
|
||||
#define Hadrons_RBPrecCG_hpp_
|
||||
#ifndef Hadrons_MSolver_RBPrecCG_hpp_
|
||||
#define Hadrons_MSolver_RBPrecCG_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -53,7 +53,7 @@ template <typename FImpl>
|
||||
class TRBPrecCG: public Module<RBPrecCGPar>
|
||||
{
|
||||
public:
|
||||
TYPE_ALIASES(FImpl,);
|
||||
FGS_TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TRBPrecCG(const std::string name);
|
||||
@ -129,4 +129,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_RBPrecCG_hpp_
|
||||
#endif // Hadrons_MSolver_RBPrecCG_hpp_
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_Point_hpp_
|
||||
#define Hadrons_Point_hpp_
|
||||
#ifndef Hadrons_MSource_Point_hpp_
|
||||
#define Hadrons_MSource_Point_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -63,7 +63,7 @@ template <typename FImpl>
|
||||
class TPoint: public Module<PointPar>
|
||||
{
|
||||
public:
|
||||
TYPE_ALIASES(FImpl,);
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TPoint(const std::string name);
|
||||
@ -78,7 +78,8 @@ public:
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource);
|
||||
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource);
|
||||
MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSource);
|
||||
|
||||
/******************************************************************************
|
||||
* TPoint template implementation *
|
||||
@ -132,4 +133,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Point_hpp_
|
||||
#endif // Hadrons_MSource_Point_hpp_
|
||||
|
@ -6,6 +6,7 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
|
||||
@ -27,8 +28,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_SeqGamma_hpp_
|
||||
#define Hadrons_SeqGamma_hpp_
|
||||
#ifndef Hadrons_MSource_SeqGamma_hpp_
|
||||
#define Hadrons_MSource_SeqGamma_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -71,7 +72,7 @@ template <typename FImpl>
|
||||
class TSeqGamma: public Module<SeqGammaPar>
|
||||
{
|
||||
public:
|
||||
TYPE_ALIASES(FImpl,);
|
||||
FGS_TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TSeqGamma(const std::string name);
|
||||
@ -149,9 +150,9 @@ void TSeqGamma<FImpl>::execute(void)
|
||||
for(unsigned int mu = 0; mu < env().getNd(); mu++)
|
||||
{
|
||||
LatticeCoordinate(coor, mu);
|
||||
ph = ph + p[mu]*coor;
|
||||
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
|
||||
}
|
||||
ph = exp(i*ph);
|
||||
ph = exp((Real)(2*M_PI)*i*ph);
|
||||
LatticeCoordinate(t, Tp);
|
||||
src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q);
|
||||
}
|
||||
@ -160,4 +161,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_SeqGamma_hpp_
|
||||
#endif // Hadrons_MSource_SeqGamma_hpp_
|
||||
|
147
extras/Hadrons/Modules/MSource/Wall.hpp
Normal file
147
extras/Hadrons/Modules/MSource/Wall.hpp
Normal file
@ -0,0 +1,147 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MSource/Wall.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MSource_WallSource_hpp_
|
||||
#define Hadrons_MSource_WallSource_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/*
|
||||
|
||||
Wall source
|
||||
-----------------------------
|
||||
* src_x = delta(x_3 - tW) * exp(i x.mom)
|
||||
|
||||
* options:
|
||||
- tW: source timeslice (integer)
|
||||
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.")
|
||||
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
* Wall *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MSource)
|
||||
|
||||
class WallPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar,
|
||||
unsigned int, tW,
|
||||
std::string, mom);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TWall: public Module<WallPar>
|
||||
{
|
||||
public:
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TWall(const std::string name);
|
||||
// destructor
|
||||
virtual ~TWall(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource);
|
||||
|
||||
/******************************************************************************
|
||||
* TWall implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
TWall<FImpl>::TWall(const std::string name)
|
||||
: Module<WallPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TWall<FImpl>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in;
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TWall<FImpl>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TWall<FImpl>::setup(void)
|
||||
{
|
||||
env().template registerLattice<PropagatorField>(getName());
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TWall<FImpl>::execute(void)
|
||||
{
|
||||
LOG(Message) << "Generating wall source at t = " << par().tW
|
||||
<< " with momentum " << par().mom << std::endl;
|
||||
|
||||
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
|
||||
Lattice<iScalar<vInteger>> t(env().getGrid());
|
||||
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
|
||||
std::vector<Real> p;
|
||||
Complex i(0.0,1.0);
|
||||
|
||||
p = strToVec<Real>(par().mom);
|
||||
ph = zero;
|
||||
for(unsigned int mu = 0; mu < Nd; mu++)
|
||||
{
|
||||
LatticeCoordinate(coor, mu);
|
||||
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
|
||||
}
|
||||
ph = exp((Real)(2*M_PI)*i*ph);
|
||||
LatticeCoordinate(t, Tp);
|
||||
src = 1.;
|
||||
src = where((t == par().tW), src*ph, 0.*src);
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MSource_WallSource_hpp_
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_Z2_hpp_
|
||||
#define Hadrons_Z2_hpp_
|
||||
#ifndef Hadrons_MSource_Z2_hpp_
|
||||
#define Hadrons_MSource_Z2_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -67,7 +67,7 @@ template <typename FImpl>
|
||||
class TZ2: public Module<Z2Par>
|
||||
{
|
||||
public:
|
||||
TYPE_ALIASES(FImpl,);
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TZ2(const std::string name);
|
||||
@ -82,7 +82,8 @@ public:
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource);
|
||||
MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource);
|
||||
MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplCR>, MSource);
|
||||
|
||||
/******************************************************************************
|
||||
* TZ2 template implementation *
|
||||
@ -148,4 +149,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Z2_hpp_
|
||||
#endif // Hadrons_MSource_Z2_hpp_
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifndef Hadrons____FILEBASENAME____hpp_
|
||||
#define Hadrons____FILEBASENAME____hpp_
|
||||
#ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
#define Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -41,4 +41,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons____FILEBASENAME____hpp_
|
||||
#endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifndef Hadrons____FILEBASENAME____hpp_
|
||||
#define Hadrons____FILEBASENAME____hpp_
|
||||
#ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
#define Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -82,4 +82,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons____FILEBASENAME____hpp_
|
||||
#endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
|
@ -1,19 +1,38 @@
|
||||
modules_cc =\
|
||||
Modules/MContraction/WeakHamiltonianEye.cc \
|
||||
Modules/MContraction/WeakHamiltonianNonEye.cc \
|
||||
Modules/MContraction/WeakNeutral4ptDisc.cc \
|
||||
Modules/MGauge/Load.cc \
|
||||
Modules/MGauge/Random.cc \
|
||||
Modules/MGauge/Unit.cc
|
||||
Modules/MGauge/StochEm.cc \
|
||||
Modules/MGauge/Unit.cc \
|
||||
Modules/MScalar/ChargedProp.cc \
|
||||
Modules/MScalar/FreeProp.cc
|
||||
|
||||
modules_hpp =\
|
||||
Modules/MAction/DWF.hpp \
|
||||
Modules/MAction/Wilson.hpp \
|
||||
Modules/MContraction/Baryon.hpp \
|
||||
Modules/MContraction/DiscLoop.hpp \
|
||||
Modules/MContraction/Gamma3pt.hpp \
|
||||
Modules/MContraction/Meson.hpp \
|
||||
Modules/MContraction/WeakHamiltonian.hpp \
|
||||
Modules/MContraction/WeakHamiltonianEye.hpp \
|
||||
Modules/MContraction/WeakHamiltonianNonEye.hpp \
|
||||
Modules/MContraction/WeakNeutral4ptDisc.hpp \
|
||||
Modules/MFermion/GaugeProp.hpp \
|
||||
Modules/MGauge/Load.hpp \
|
||||
Modules/MGauge/Random.hpp \
|
||||
Modules/MGauge/StochEm.hpp \
|
||||
Modules/MGauge/Unit.hpp \
|
||||
Modules/MLoop/NoiseLoop.hpp \
|
||||
Modules/MScalar/ChargedProp.hpp \
|
||||
Modules/MScalar/FreeProp.hpp \
|
||||
Modules/MScalar/Scalar.hpp \
|
||||
Modules/MSink/Point.hpp \
|
||||
Modules/MSolver/RBPrecCG.hpp \
|
||||
Modules/MSource/Point.hpp \
|
||||
Modules/MSource/SeqGamma.hpp \
|
||||
Modules/MSource/Z2.hpp \
|
||||
Modules/Quark.hpp
|
||||
Modules/MSource/Wall.hpp \
|
||||
Modules/MSource/Z2.hpp
|
||||
|
||||
|
11
extras/qed-fvol/Global.cc
Normal file
11
extras/qed-fvol/Global.cc
Normal file
@ -0,0 +1,11 @@
|
||||
#include <qed-fvol/Global.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace QCD;
|
||||
using namespace QedFVol;
|
||||
|
||||
QedFVolLogger QedFVol::QedFVolLogError(1,"Error");
|
||||
QedFVolLogger QedFVol::QedFVolLogWarning(1,"Warning");
|
||||
QedFVolLogger QedFVol::QedFVolLogMessage(1,"Message");
|
||||
QedFVolLogger QedFVol::QedFVolLogIterative(1,"Iterative");
|
||||
QedFVolLogger QedFVol::QedFVolLogDebug(1,"Debug");
|
42
extras/qed-fvol/Global.hpp
Normal file
42
extras/qed-fvol/Global.hpp
Normal file
@ -0,0 +1,42 @@
|
||||
#ifndef QedFVol_Global_hpp_
|
||||
#define QedFVol_Global_hpp_
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#define BEGIN_QEDFVOL_NAMESPACE \
|
||||
namespace Grid {\
|
||||
using namespace QCD;\
|
||||
namespace QedFVol {\
|
||||
using Grid::operator<<;
|
||||
#define END_QEDFVOL_NAMESPACE }}
|
||||
|
||||
/* the 'using Grid::operator<<;' statement prevents a very nasty compilation
|
||||
* error with GCC (clang compiles fine without it).
|
||||
*/
|
||||
|
||||
BEGIN_QEDFVOL_NAMESPACE
|
||||
|
||||
class QedFVolLogger: public Logger
|
||||
{
|
||||
public:
|
||||
QedFVolLogger(int on, std::string nm): Logger("QedFVol", on, nm,
|
||||
GridLogColours, "BLACK"){};
|
||||
};
|
||||
|
||||
#define LOG(channel) std::cout << QedFVolLog##channel
|
||||
#define QEDFVOL_ERROR(msg)\
|
||||
LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\
|
||||
<< __LINE__ << ")" << std::endl;\
|
||||
abort();
|
||||
|
||||
#define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl;
|
||||
|
||||
extern QedFVolLogger QedFVolLogError;
|
||||
extern QedFVolLogger QedFVolLogWarning;
|
||||
extern QedFVolLogger QedFVolLogMessage;
|
||||
extern QedFVolLogger QedFVolLogIterative;
|
||||
extern QedFVolLogger QedFVolLogDebug;
|
||||
|
||||
END_QEDFVOL_NAMESPACE
|
||||
|
||||
#endif // QedFVol_Global_hpp_
|
9
extras/qed-fvol/Makefile.am
Normal file
9
extras/qed-fvol/Makefile.am
Normal file
@ -0,0 +1,9 @@
|
||||
AM_CXXFLAGS += -I$(top_srcdir)/extras
|
||||
|
||||
bin_PROGRAMS = qed-fvol
|
||||
|
||||
qed_fvol_SOURCES = \
|
||||
qed-fvol.cc \
|
||||
Global.cc
|
||||
|
||||
qed_fvol_LDADD = -lGrid
|
265
extras/qed-fvol/WilsonLoops.h
Normal file
265
extras/qed-fvol/WilsonLoops.h
Normal file
@ -0,0 +1,265 @@
|
||||
#ifndef QEDFVOL_WILSONLOOPS_H
|
||||
#define QEDFVOL_WILSONLOOPS_H
|
||||
|
||||
#include <Global.hpp>
|
||||
|
||||
BEGIN_QEDFVOL_NAMESPACE
|
||||
|
||||
template <class Gimpl> class NewWilsonLoops : public Gimpl {
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
typedef typename Gimpl::GaugeLinkField GaugeMat;
|
||||
typedef typename Gimpl::GaugeField GaugeLorentz;
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// directed plaquette oriented in mu,nu plane
|
||||
//////////////////////////////////////////////////
|
||||
static void dirPlaquette(GaugeMat &plaq, const std::vector<GaugeMat> &U,
|
||||
const int mu, const int nu) {
|
||||
// Annoyingly, must use either scope resolution to find dependent base
|
||||
// class,
|
||||
// or this-> ; there is no "this" in a static method. This forces explicit
|
||||
// Gimpl scope
|
||||
// resolution throughout the usage in this file, and rather defeats the
|
||||
// purpose of deriving
|
||||
// from Gimpl.
|
||||
plaq = Gimpl::CovShiftBackward(
|
||||
U[mu], mu, Gimpl::CovShiftBackward(
|
||||
U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu])));
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// trace of directed plaquette oriented in mu,nu plane
|
||||
//////////////////////////////////////////////////
|
||||
static void traceDirPlaquette(LatticeComplex &plaq,
|
||||
const std::vector<GaugeMat> &U, const int mu,
|
||||
const int nu) {
|
||||
GaugeMat sp(U[0]._grid);
|
||||
dirPlaquette(sp, U, mu, nu);
|
||||
plaq = trace(sp);
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all planes of plaquette
|
||||
//////////////////////////////////////////////////
|
||||
static void sitePlaquette(LatticeComplex &Plaq,
|
||||
const std::vector<GaugeMat> &U) {
|
||||
LatticeComplex sitePlaq(U[0]._grid);
|
||||
Plaq = zero;
|
||||
for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) {
|
||||
for (int nu = 0; nu < mu; nu++) {
|
||||
traceDirPlaquette(sitePlaq, U, mu, nu);
|
||||
Plaq = Plaq + sitePlaq;
|
||||
}
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all x,y,z,t and over all planes of plaquette
|
||||
//////////////////////////////////////////////////
|
||||
static Real sumPlaquette(const GaugeLorentz &Umu) {
|
||||
std::vector<GaugeMat> U(4, Umu._grid);
|
||||
|
||||
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
|
||||
LatticeComplex Plaq(Umu._grid);
|
||||
|
||||
sitePlaquette(Plaq, U);
|
||||
|
||||
TComplex Tp = sum(Plaq);
|
||||
Complex p = TensorRemove(Tp);
|
||||
return p.real();
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z,t and over all planes of plaquette
|
||||
//////////////////////////////////////////////////
|
||||
static Real avgPlaquette(const GaugeLorentz &Umu) {
|
||||
int ndim = Umu._grid->_ndimension;
|
||||
Real sumplaq = sumPlaquette(Umu);
|
||||
Real vol = Umu._grid->gSites();
|
||||
Real faces = (1.0 * ndim * (ndim - 1)) / 2.0;
|
||||
return sumplaq / vol / faces / Nc; // Nc dependent... FIXME
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Wilson loop of size (R1, R2), oriented in mu,nu plane
|
||||
//////////////////////////////////////////////////
|
||||
static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U,
|
||||
const int Rmu, const int Rnu,
|
||||
const int mu, const int nu) {
|
||||
wl = U[nu];
|
||||
|
||||
for(int i = 0; i < Rnu-1; i++){
|
||||
wl = Gimpl::CovShiftForward(U[nu], nu, wl);
|
||||
}
|
||||
|
||||
for(int i = 0; i < Rmu; i++){
|
||||
wl = Gimpl::CovShiftForward(U[mu], mu, wl);
|
||||
}
|
||||
|
||||
for(int i = 0; i < Rnu; i++){
|
||||
wl = Gimpl::CovShiftBackward(U[nu], nu, wl);
|
||||
}
|
||||
|
||||
for(int i = 0; i < Rmu; i++){
|
||||
wl = Gimpl::CovShiftBackward(U[mu], mu, wl);
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// trace of Wilson Loop oriented in mu,nu plane
|
||||
//////////////////////////////////////////////////
|
||||
static void traceWilsonLoop(LatticeComplex &wl,
|
||||
const std::vector<GaugeMat> &U,
|
||||
const int Rmu, const int Rnu,
|
||||
const int mu, const int nu) {
|
||||
GaugeMat sp(U[0]._grid);
|
||||
wilsonLoop(sp, U, Rmu, Rnu, mu, nu);
|
||||
wl = trace(sp);
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all planes of Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static void siteWilsonLoop(LatticeComplex &Wl,
|
||||
const std::vector<GaugeMat> &U,
|
||||
const int R1, const int R2) {
|
||||
LatticeComplex siteWl(U[0]._grid);
|
||||
Wl = zero;
|
||||
for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) {
|
||||
for (int nu = 0; nu < mu; nu++) {
|
||||
traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
|
||||
Wl = Wl + siteWl;
|
||||
traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
|
||||
Wl = Wl + siteWl;
|
||||
}
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over planes of Wilson loop with length R1
|
||||
// in the time direction
|
||||
//////////////////////////////////////////////////
|
||||
static void siteTimelikeWilsonLoop(LatticeComplex &Wl,
|
||||
const std::vector<GaugeMat> &U,
|
||||
const int R1, const int R2) {
|
||||
LatticeComplex siteWl(U[0]._grid);
|
||||
|
||||
int ndim = U[0]._grid->_ndimension;
|
||||
|
||||
Wl = zero;
|
||||
for (int nu = 0; nu < ndim - 1; nu++) {
|
||||
traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu);
|
||||
Wl = Wl + siteWl;
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum Wilson loop over all planes orthogonal to the time direction
|
||||
//////////////////////////////////////////////////
|
||||
static void siteSpatialWilsonLoop(LatticeComplex &Wl,
|
||||
const std::vector<GaugeMat> &U,
|
||||
const int R1, const int R2) {
|
||||
LatticeComplex siteWl(U[0]._grid);
|
||||
|
||||
Wl = zero;
|
||||
for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) {
|
||||
for (int nu = 0; nu < mu; nu++) {
|
||||
traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
|
||||
Wl = Wl + siteWl;
|
||||
traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
|
||||
Wl = Wl + siteWl;
|
||||
}
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all x,y,z,t and over all planes of Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real sumWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
std::vector<GaugeMat> U(4, Umu._grid);
|
||||
|
||||
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
|
||||
LatticeComplex Wl(Umu._grid);
|
||||
|
||||
siteWilsonLoop(Wl, U, R1, R2);
|
||||
|
||||
TComplex Tp = sum(Wl);
|
||||
Complex p = TensorRemove(Tp);
|
||||
return p.real();
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all x,y,z,t and over all planes of timelike Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
std::vector<GaugeMat> U(4, Umu._grid);
|
||||
|
||||
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
|
||||
LatticeComplex Wl(Umu._grid);
|
||||
|
||||
siteTimelikeWilsonLoop(Wl, U, R1, R2);
|
||||
|
||||
TComplex Tp = sum(Wl);
|
||||
Complex p = TensorRemove(Tp);
|
||||
return p.real();
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all x,y,z,t and over all planes of spatial Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
std::vector<GaugeMat> U(4, Umu._grid);
|
||||
|
||||
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
|
||||
LatticeComplex Wl(Umu._grid);
|
||||
|
||||
siteSpatialWilsonLoop(Wl, U, R1, R2);
|
||||
|
||||
TComplex Tp = sum(Wl);
|
||||
Complex p = TensorRemove(Tp);
|
||||
return p.real();
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z,t and over all planes of Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real avgWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
int ndim = Umu._grid->_ndimension;
|
||||
Real sumWl = sumWilsonLoop(Umu, R1, R2);
|
||||
Real vol = Umu._grid->gSites();
|
||||
Real faces = 1.0 * ndim * (ndim - 1);
|
||||
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z,t and over all planes of timelike Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
int ndim = Umu._grid->_ndimension;
|
||||
Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2);
|
||||
Real vol = Umu._grid->gSites();
|
||||
Real faces = 1.0 * (ndim - 1);
|
||||
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z,t and over all planes of spatial Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
int ndim = Umu._grid->_ndimension;
|
||||
Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2);
|
||||
Real vol = Umu._grid->gSites();
|
||||
Real faces = 1.0 * (ndim - 1) * (ndim - 2);
|
||||
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
|
||||
}
|
||||
};
|
||||
|
||||
END_QEDFVOL_NAMESPACE
|
||||
|
||||
#endif // QEDFVOL_WILSONLOOPS_H
|
88
extras/qed-fvol/qed-fvol.cc
Normal file
88
extras/qed-fvol/qed-fvol.cc
Normal file
@ -0,0 +1,88 @@
|
||||
#include <Global.hpp>
|
||||
#include <WilsonLoops.h>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace QCD;
|
||||
using namespace QedFVol;
|
||||
|
||||
typedef PeriodicGaugeImpl<QedGimplR> QedPeriodicGimplR;
|
||||
typedef PhotonR::GaugeField EmField;
|
||||
typedef PhotonR::GaugeLinkField EmComp;
|
||||
|
||||
const int NCONFIGS = 10;
|
||||
const int NWILSON = 10;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// parse command line
|
||||
std::string parameterFileName;
|
||||
|
||||
if (argc < 2)
|
||||
{
|
||||
std::cerr << "usage: " << argv[0] << " <parameter file> [Grid options]";
|
||||
std::cerr << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
parameterFileName = argv[1];
|
||||
|
||||
// initialization
|
||||
Grid_init(&argc, &argv);
|
||||
QedFVolLogError.Active(GridLogError.isActive());
|
||||
QedFVolLogWarning.Active(GridLogWarning.isActive());
|
||||
QedFVolLogMessage.Active(GridLogMessage.isActive());
|
||||
QedFVolLogIterative.Active(GridLogIterative.isActive());
|
||||
QedFVolLogDebug.Active(GridLogDebug.isActive());
|
||||
LOG(Message) << "Grid initialized" << std::endl;
|
||||
|
||||
// QED stuff
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian grid(latt_size,simd_layout,mpi_layout);
|
||||
GridParallelRNG pRNG(&grid);
|
||||
PhotonR photon(PhotonR::Gauge::feynman,
|
||||
PhotonR::ZmScheme::qedL);
|
||||
EmField a(&grid);
|
||||
EmField expA(&grid);
|
||||
|
||||
Complex imag_unit(0, 1);
|
||||
|
||||
Real wlA;
|
||||
std::vector<Real> logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0);
|
||||
|
||||
pRNG.SeedRandomDevice();
|
||||
|
||||
LOG(Message) << "Wilson loop calculation beginning" << std::endl;
|
||||
for(int ic = 0; ic < NCONFIGS; ic++){
|
||||
LOG(Message) << "Configuration " << ic <<std::endl;
|
||||
photon.StochasticField(a, pRNG);
|
||||
|
||||
// Exponentiate photon field
|
||||
expA = exp(imag_unit*a);
|
||||
|
||||
// Calculate Wilson loops
|
||||
for(int iw=1; iw<=NWILSON; iw++){
|
||||
wlA = NewWilsonLoops<QedPeriodicGimplR>::avgWilsonLoop(expA, iw, iw) * 3;
|
||||
logWlAvg[iw-1] -= 2*log(wlA);
|
||||
wlA = NewWilsonLoops<QedPeriodicGimplR>::avgTimelikeWilsonLoop(expA, iw, iw) * 3;
|
||||
logWlTime[iw-1] -= 2*log(wlA);
|
||||
wlA = NewWilsonLoops<QedPeriodicGimplR>::avgSpatialWilsonLoop(expA, iw, iw) * 3;
|
||||
logWlSpace[iw-1] -= 2*log(wlA);
|
||||
}
|
||||
}
|
||||
LOG(Message) << "Wilson loop calculation completed" << std::endl;
|
||||
|
||||
// Calculate Wilson loops
|
||||
for(int iw=1; iw<=10; iw++){
|
||||
LOG(Message) << iw << 'x' << iw << " Wilson loop" << std::endl;
|
||||
LOG(Message) << "-2log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl;
|
||||
LOG(Message) << "-2log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl;
|
||||
LOG(Message) << "-2log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl;
|
||||
}
|
||||
|
||||
// epilogue
|
||||
LOG(Message) << "Grid is finalizing now" << std::endl;
|
||||
Grid_finalize();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
@ -20,4 +20,17 @@ The simple testcase in this directory is the submitted bug report that encapsula
|
||||
problem. The test case works with icpc and with clang++, but fails consistently on g++
|
||||
current variants.
|
||||
|
||||
Peter
|
||||
Peter
|
||||
|
||||
|
||||
************
|
||||
|
||||
Second GCC bug reported, see Issue 100.
|
||||
|
||||
https://wandbox.org/permlink/tzssJza6R9XnqANw
|
||||
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652
|
||||
|
||||
Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the
|
||||
CI test suite. The limitations of Travis runtime limits & weak cores are being shown.
|
||||
|
||||
Travis uses 5.4.1 for g++-5.
|
||||
|
86
grid-config.in
Executable file
86
grid-config.in
Executable file
@ -0,0 +1,86 @@
|
||||
#! /bin/sh
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
includedir=@includedir@
|
||||
|
||||
usage()
|
||||
{
|
||||
cat <<EOF
|
||||
Usage: grid-config [OPTION]
|
||||
|
||||
Known values for OPTION are:
|
||||
|
||||
--prefix show Grid installation prefix
|
||||
--cxxflags print pre-processor and compiler flags
|
||||
--ldflags print library linking flags
|
||||
--libs print library linking information
|
||||
--summary print full build summary
|
||||
--help display this help and exit
|
||||
--version output version information
|
||||
--git print git revision
|
||||
|
||||
EOF
|
||||
|
||||
exit $1
|
||||
}
|
||||
|
||||
if test $# -eq 0; then
|
||||
usage 1
|
||||
fi
|
||||
|
||||
cflags=false
|
||||
libs=false
|
||||
|
||||
while test $# -gt 0; do
|
||||
case "$1" in
|
||||
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
|
||||
*) optarg= ;;
|
||||
esac
|
||||
|
||||
case "$1" in
|
||||
--prefix)
|
||||
echo $prefix
|
||||
;;
|
||||
|
||||
--version)
|
||||
echo @VERSION@
|
||||
exit 0
|
||||
;;
|
||||
|
||||
--git)
|
||||
echo "@GRID_BRANCH@ @GRID_SHA@"
|
||||
exit 0
|
||||
;;
|
||||
|
||||
--help)
|
||||
usage 0
|
||||
;;
|
||||
|
||||
--cxxflags)
|
||||
echo @GRID_CXXFLAGS@
|
||||
;;
|
||||
|
||||
--ldflags)
|
||||
echo @GRID_LDFLAGS@
|
||||
;;
|
||||
|
||||
--libs)
|
||||
echo @GRID_LIBS@
|
||||
;;
|
||||
|
||||
--summary)
|
||||
echo ""
|
||||
echo "@GRID_SUMMARY@"
|
||||
echo ""
|
||||
;;
|
||||
|
||||
*)
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
exit 0
|
37
lib/DisableWarnings.h
Normal file
37
lib/DisableWarnings.h
Normal file
@ -0,0 +1,37 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/DisableWarnings.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef DISABLE_WARNINGS_H
|
||||
#define DISABLE_WARNINGS_H
|
||||
|
||||
//disables and intel compiler specific warning (in json.hpp)
|
||||
#pragma warning disable 488
|
||||
|
||||
|
||||
#endif
|
@ -41,7 +41,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/GridQCDcore.h>
|
||||
#include <Grid/qcd/action/Action.h>
|
||||
#include <Grid/qcd/utils/GaugeFix.h>
|
||||
#include <Grid/qcd/smearing/Smearing.h>
|
||||
#include <Grid/parallelIO/MetaData.h>
|
||||
#include <Grid/qcd/hmc/HMC_aggregate.h>
|
||||
|
||||
#endif
|
||||
|
@ -38,28 +38,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#ifndef GRID_BASE_H
|
||||
#define GRID_BASE_H
|
||||
|
||||
///////////////////
|
||||
// Std C++ dependencies
|
||||
///////////////////
|
||||
#include <cassert>
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <random>
|
||||
#include <functional>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <ctime>
|
||||
#include <sys/time.h>
|
||||
#include <chrono>
|
||||
|
||||
///////////////////
|
||||
// Grid headers
|
||||
///////////////////
|
||||
#include "Config.h"
|
||||
#include <Grid/GridStd.h>
|
||||
|
||||
#include <Grid/perfmon/Timer.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
|
29
lib/GridStd.h
Normal file
29
lib/GridStd.h
Normal file
@ -0,0 +1,29 @@
|
||||
#ifndef GRID_STD_H
|
||||
#define GRID_STD_H
|
||||
|
||||
///////////////////
|
||||
// Std C++ dependencies
|
||||
///////////////////
|
||||
#include <cassert>
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <random>
|
||||
#include <functional>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <ctime>
|
||||
#include <sys/time.h>
|
||||
#include <chrono>
|
||||
#include <zlib.h>
|
||||
|
||||
///////////////////
|
||||
// Grid config
|
||||
///////////////////
|
||||
#include "Config.h"
|
||||
|
||||
#endif /* GRID_STD_H */
|
9
lib/Grid_Eigen_Dense.h
Normal file
9
lib/Grid_Eigen_Dense.h
Normal file
@ -0,0 +1,9 @@
|
||||
#pragma once
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
#include <Grid/Eigen/Dense>
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
@ -10,8 +10,8 @@ if BUILD_COMMS_MPI3
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
if BUILD_COMMS_MPI3L
|
||||
extra_sources+=communicator/Communicator_mpi3_leader.cc
|
||||
if BUILD_COMMS_MPIT
|
||||
extra_sources+=communicator/Communicator_mpit.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Algorithms.h
|
||||
|
||||
@ -37,6 +37,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/algorithms/approx/Chebyshev.h>
|
||||
#include <Grid/algorithms/approx/Remez.h>
|
||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||
#include <Grid/algorithms/approx/Forecast.h>
|
||||
|
||||
#include <Grid/algorithms/densematrix/DenseMatrix.h>
|
||||
#include <Grid/algorithms/densematrix/Francis.h>
|
||||
#include <Grid/algorithms/densematrix/Householder.h>
|
||||
|
||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||
@ -44,30 +49,18 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/algorithms/iterative/SchurRedBlack.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
|
||||
|
||||
// Lanczos support
|
||||
//#include <Grid/algorithms/iterative/MatrixUtils.h>
|
||||
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczosCJ.h>
|
||||
#include <Grid/algorithms/iterative/SimpleLanczos.h>
|
||||
#include <Grid/algorithms/CoarsenedMatrix.h>
|
||||
#include <Grid/algorithms/FFT.h>
|
||||
|
||||
// Eigen/lanczos
|
||||
// EigCg
|
||||
// MCR
|
||||
// Pcg
|
||||
// Multishift CG
|
||||
// Hdcg
|
||||
// GCR
|
||||
// etc..
|
||||
|
||||
// integrator/Leapfrog
|
||||
// integrator/Omelyan
|
||||
// integrator/ForceGradient
|
||||
|
||||
// montecarlo/hmc
|
||||
// montecarlo/rhmc
|
||||
// montecarlo/metropolis
|
||||
// etc...
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -230,6 +230,7 @@ namespace Grid {
|
||||
// Barrel shift and collect global pencil
|
||||
std::vector<int> lcoor(Nd), gcoor(Nd);
|
||||
result = source;
|
||||
int pc = processor_coor[dim];
|
||||
for(int p=0;p<processors[dim];p++) {
|
||||
PARALLEL_REGION
|
||||
{
|
||||
@ -240,7 +241,8 @@ namespace Grid {
|
||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
||||
peekLocalSite(s,result,cbuf);
|
||||
cbuf[dim]+=p*L;
|
||||
cbuf[dim]+=((pc+p) % processors[dim])*L;
|
||||
// cbuf[dim]+=p*L;
|
||||
pokeLocalSite(s,pgbuf,cbuf);
|
||||
}
|
||||
}
|
||||
@ -278,7 +280,6 @@ namespace Grid {
|
||||
flops+= flops_call*NN;
|
||||
|
||||
// writing out result
|
||||
int pc = processor_coor[dim];
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> clbuf(Nd), cgbuf(Nd);
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -162,15 +163,10 @@ namespace Grid {
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
ComplexD dot;
|
||||
|
||||
_Mat.M(in,out);
|
||||
|
||||
dot= innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
|
||||
dot = innerProduct(out,out);
|
||||
n2=real(dot);
|
||||
ComplexD dot= innerProduct(in,out); n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
@ -192,10 +188,10 @@ namespace Grid {
|
||||
ni=Mpc(in,tmp);
|
||||
no=MpcDag(tmp,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
MpcDagMpc(in,out,n1,n2);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
@ -212,7 +208,6 @@ namespace Grid {
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||
@ -235,7 +230,7 @@ namespace Grid {
|
||||
Field tmp(in._grid);
|
||||
|
||||
_Mat.MeooeDag(in,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
|
||||
_Mat.MooeeDag(in,out);
|
||||
@ -270,7 +265,6 @@ namespace Grid {
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagTwoOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
@ -299,6 +293,168 @@ namespace Grid {
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
|
||||
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
|
||||
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Staggered use
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
n2 = Mpc(in,out);
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
n1 = real(dot);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
_Mat.Meooe(in,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
_Mat.Mooee(in,out);
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
return Mpc(in,out);
|
||||
}
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
assert(0);// Never need with staggered
|
||||
}
|
||||
};
|
||||
// template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;
|
||||
template<class Matrix,class Field>
|
||||
// class SchurStagOperator : public LinearOperatorBase<Field> {
|
||||
class SchurStagOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurStagOperator (Matrix &Mat): _Mat(Mat){};
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
Field tmp2(in._grid);
|
||||
|
||||
_Mat.Mooee(in,out);
|
||||
_Mat.Mooee(out,tmp);
|
||||
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp2);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp2,tmp);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
|
||||
return Mpc(in,out);
|
||||
}
|
||||
#if 0
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
Field tmp(in._grid);
|
||||
ni=Mpc(in,tmp);
|
||||
no=MpcDag(tmp,out);
|
||||
}
|
||||
#endif
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
n2 = Mpc(in,out);
|
||||
ComplexD dot = innerProduct(in,out);
|
||||
n1 = real(dot);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
MpcDag(in,out);
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
assert(0); // must coarsen the unpreconditioned system
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#if 0
|
||||
// This is specific to (Z)mobius fermions
|
||||
template<class Matrix, class Field>
|
||||
class KappaSimilarityTransform {
|
||||
public:
|
||||
// INHERIT_IMPL_TYPES(Matrix);
|
||||
typedef typename Matrix::Coeff_t Coeff_t;
|
||||
std::vector<Coeff_t> kappa, kappaDag, kappaInv, kappaInvDag;
|
||||
|
||||
KappaSimilarityTransform (Matrix &zmob) {
|
||||
for (int i=0;i<(int)zmob.bs.size();i++) {
|
||||
Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) );
|
||||
kappa.push_back( k );
|
||||
kappaDag.push_back( conj(k) );
|
||||
kappaInv.push_back( 1.0 / k );
|
||||
kappaInvDag.push_back( 1.0 / conj(k) );
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vobj>
|
||||
void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) {
|
||||
GridBase *grid=out._grid;
|
||||
out.checkerboard = in.checkerboard;
|
||||
assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now
|
||||
int Ls = grid->_rdimensions[0];
|
||||
parallel_for(int ss=0;ss<grid->oSites();ss++){
|
||||
vobj tmp = s[ss % Ls]*in._odata[ss];
|
||||
vstream(out._odata[ss],tmp);
|
||||
}
|
||||
}
|
||||
|
||||
RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) {
|
||||
sscale(in,out,s);
|
||||
return norm2(out);
|
||||
}
|
||||
|
||||
virtual RealD M (const Field& in, Field& out) { return sscale_norm(in,out,&kappa[0]); }
|
||||
virtual RealD MDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaDag[0]);}
|
||||
virtual RealD MInv (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);}
|
||||
virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);}
|
||||
|
||||
};
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagTwoKappaOperator : public SchurOperatorBase<Field> {
|
||||
public:
|
||||
KappaSimilarityTransform<Matrix, Field> _S;
|
||||
SchurDiagTwoOperator<Matrix, Field> _Mat;
|
||||
|
||||
SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {};
|
||||
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
|
||||
_S.MInv(in,out);
|
||||
_Mat.Mpc(out,tmp);
|
||||
return _S.M(tmp,out);
|
||||
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in._grid);
|
||||
|
||||
_S.MDag(in,out);
|
||||
_Mat.MpcDag(out,tmp);
|
||||
return _S.MInvDag(tmp,out);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -193,12 +194,54 @@ namespace Grid {
|
||||
return sum;
|
||||
};
|
||||
|
||||
RealD approxD(RealD x)
|
||||
{
|
||||
RealD Un;
|
||||
RealD Unm;
|
||||
RealD Unp;
|
||||
|
||||
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||
|
||||
RealD U0=1;
|
||||
RealD U1=2*y;
|
||||
|
||||
RealD sum;
|
||||
sum = Coeffs[1]*U0;
|
||||
sum+= Coeffs[2]*U1*2.0;
|
||||
|
||||
Un =U1;
|
||||
Unm=U0;
|
||||
for(int i=2;i<order-1;i++){
|
||||
Unp=2*y*Un-Unm;
|
||||
Unm=Un;
|
||||
Un =Unp;
|
||||
sum+= Un*Coeffs[i+1]*(i+1.0);
|
||||
}
|
||||
return sum/(0.5*(hi-lo));
|
||||
};
|
||||
|
||||
RealD approxInv(RealD z, RealD x0, int maxiter, RealD resid) {
|
||||
RealD x = x0;
|
||||
RealD eps;
|
||||
|
||||
int i;
|
||||
for (i=0;i<maxiter;i++) {
|
||||
eps = approx(x) - z;
|
||||
if (fabs(eps / z) < resid)
|
||||
return x;
|
||||
x = x - eps / approxD(x);
|
||||
}
|
||||
|
||||
return std::numeric_limits<double>::quiet_NaN();
|
||||
}
|
||||
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
|
||||
GridBase *grid=in._grid;
|
||||
//std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
|
||||
//<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
|
||||
|
||||
// std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
|
||||
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
|
||||
|
||||
int vol=grid->gSites();
|
||||
|
||||
|
152
lib/algorithms/approx/Forecast.h
Normal file
152
lib/algorithms/approx/Forecast.h
Normal file
@ -0,0 +1,152 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/Forecast.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef INCLUDED_FORECAST_H
|
||||
#define INCLUDED_FORECAST_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// Abstract base class.
|
||||
// Takes a matrix (Mat), a source (phi), and a vector of Fields (chi)
|
||||
// and returns a forecasted solution to the system D*psi = phi (psi).
|
||||
template<class Matrix, class Field>
|
||||
class Forecast
|
||||
{
|
||||
public:
|
||||
virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0;
|
||||
};
|
||||
|
||||
// Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012),
|
||||
// used to forecast solutions across poles of the EOFA heatbath.
|
||||
//
|
||||
// Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C)
|
||||
template<class Matrix, class Field>
|
||||
class ChronoForecast : public Forecast<Matrix,Field>
|
||||
{
|
||||
public:
|
||||
Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns)
|
||||
{
|
||||
int degree = prev_solns.size();
|
||||
Field chi(phi); // forecasted solution
|
||||
|
||||
// Trivial cases
|
||||
if(degree == 0){ chi = zero; return chi; }
|
||||
else if(degree == 1){ return prev_solns[0]; }
|
||||
|
||||
RealD dot;
|
||||
ComplexD xp;
|
||||
Field r(phi); // residual
|
||||
Field Mv(phi);
|
||||
std::vector<Field> v(prev_solns); // orthonormalized previous solutions
|
||||
std::vector<Field> MdagMv(degree,phi);
|
||||
|
||||
// Array to hold the matrix elements
|
||||
std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree));
|
||||
|
||||
// Solution and source vectors
|
||||
std::vector<ComplexD> a(degree);
|
||||
std::vector<ComplexD> b(degree);
|
||||
|
||||
// Orthonormalize the vector basis
|
||||
for(int i=0; i<degree; i++){
|
||||
v[i] *= 1.0/std::sqrt(norm2(v[i]));
|
||||
for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; }
|
||||
}
|
||||
|
||||
// Perform sparse matrix multiplication and construct rhs
|
||||
for(int i=0; i<degree; i++){
|
||||
b[i] = innerProduct(v[i],phi);
|
||||
Mat.M(v[i],Mv);
|
||||
Mat.Mdag(Mv,MdagMv[i]);
|
||||
G[i][i] = innerProduct(v[i],MdagMv[i]);
|
||||
}
|
||||
|
||||
// Construct the matrix
|
||||
for(int j=0; j<degree; j++){
|
||||
for(int k=j+1; k<degree; k++){
|
||||
G[j][k] = innerProduct(v[j],MdagMv[k]);
|
||||
G[k][j] = std::conj(G[j][k]);
|
||||
}}
|
||||
|
||||
// Gauss-Jordan elimination with partial pivoting
|
||||
for(int i=0; i<degree; i++){
|
||||
|
||||
// Perform partial pivoting
|
||||
int k = i;
|
||||
for(int j=i+1; j<degree; j++){ if(std::abs(G[j][j]) > std::abs(G[k][k])){ k = j; } }
|
||||
if(k != i){
|
||||
xp = b[k];
|
||||
b[k] = b[i];
|
||||
b[i] = xp;
|
||||
for(int j=0; j<degree; j++){
|
||||
xp = G[k][j];
|
||||
G[k][j] = G[i][j];
|
||||
G[i][j] = xp;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert matrix to upper triangular form
|
||||
for(int j=i+1; j<degree; j++){
|
||||
xp = G[j][i]/G[i][i];
|
||||
b[j] -= xp * b[i];
|
||||
for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; }
|
||||
}
|
||||
}
|
||||
|
||||
// Use Gaussian elimination to solve equations and calculate initial guess
|
||||
chi = zero;
|
||||
r = phi;
|
||||
for(int i=degree-1; i>=0; i--){
|
||||
a[i] = 0.0;
|
||||
for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; }
|
||||
a[i] = (b[i]-a[i])/G[i][i];
|
||||
chi += a[i]*v[i];
|
||||
r -= a[i]*MdagMv[i];
|
||||
}
|
||||
|
||||
RealD true_r(0.0);
|
||||
ComplexD tmp;
|
||||
for(int i=0; i<degree; i++){
|
||||
tmp = -b[i];
|
||||
for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
|
||||
tmp = std::conj(tmp)*tmp;
|
||||
true_r += std::sqrt(tmp.real());
|
||||
}
|
||||
|
||||
RealD error = std::sqrt(norm2(r)/norm2(phi));
|
||||
std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl;
|
||||
|
||||
return chi;
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -16,7 +16,7 @@
|
||||
#define INCLUDED_ALG_REMEZ_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <Config.h>
|
||||
#include <Grid/GridStd.h>
|
||||
|
||||
#ifdef HAVE_LIBGMP
|
||||
#include "bigfloat.h"
|
||||
|
@ -33,6 +33,8 @@ directory
|
||||
|
||||
namespace Grid {
|
||||
|
||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Block conjugate gradient. Dimension zero should be the block direction
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@ -40,25 +42,286 @@ template <class Field>
|
||||
class BlockConjugateGradient : public OperatorFunction<Field> {
|
||||
public:
|
||||
|
||||
|
||||
typedef typename Field::scalar_type scomplex;
|
||||
|
||||
const int blockDim = 0;
|
||||
|
||||
int blockDim ;
|
||||
int Nblock;
|
||||
|
||||
BlockCGtype CGtype;
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
|
||||
BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
ErrorOnNoConverge(err_on_no_conv){};
|
||||
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv)
|
||||
{};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Thin QR factorisation (google it)
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
Eigen::MatrixXcd &C,
|
||||
Eigen::MatrixXcd &Cinv,
|
||||
Field & Q,
|
||||
const Field & R)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//Dimensions
|
||||
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
|
||||
//
|
||||
// Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
|
||||
//
|
||||
// Q C = R => Q = R C^{-1}
|
||||
//
|
||||
// Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
|
||||
//
|
||||
// Set C = L^{dag}, and then Q^dag Q = ident
|
||||
//
|
||||
// Checks:
|
||||
// Cdag C = Rdag R ; passes.
|
||||
// QdagQ = 1 ; passes
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
||||
|
||||
// Force manifest hermitian to avoid rounding related
|
||||
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||
|
||||
#if 0
|
||||
std::cout << " Calling Cholesky ldlt on m_rr " << m_rr <<std::endl;
|
||||
Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();
|
||||
std::cout << " Called Cholesky ldlt on m_rr " << L_ldlt <<std::endl;
|
||||
auto D_ldlt = m_rr.ldlt().vectorD();
|
||||
std::cout << " Called Cholesky ldlt on m_rr " << D_ldlt <<std::endl;
|
||||
#endif
|
||||
|
||||
// std::cout << " Calling Cholesky llt on m_rr " <<std::endl;
|
||||
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||
// std::cout << " Called Cholesky llt on m_rr " << L <<std::endl;
|
||||
C = L.adjoint();
|
||||
Cinv = C.inverse();
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Q = R C^{-1}
|
||||
//
|
||||
// Q_j = R_i Cinv(i,j)
|
||||
//
|
||||
// NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
sliceMulMatrix(Q,Cinv,R,Orthog);
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Call one of several implementations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
int Orthog = 0; // First dimension is block dim
|
||||
if ( CGtype == BlockCGrQ ) {
|
||||
BlockCGrQsolve(Linop,Src,Psi);
|
||||
} else if (CGtype == BlockCG ) {
|
||||
BlockCGsolve(Linop,Src,Psi);
|
||||
} else if (CGtype == CGmultiRHS ) {
|
||||
CGmultiRHSsolve(Linop,Src,Psi);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// BlockCGrQ implementation:
|
||||
//--------------------------
|
||||
// X is guess/Solution
|
||||
// B is RHS
|
||||
// Solve A X_i = B_i ; i refers to Nblock index
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
Nblock = B._grid->_fdimensions[Orthog];
|
||||
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
X.checkerboard = B.checkerboard;
|
||||
conformable(X, B);
|
||||
|
||||
Field tmp(B);
|
||||
Field Q(B);
|
||||
Field D(B);
|
||||
Field Z(B);
|
||||
Field AD(B);
|
||||
|
||||
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
sliceNorm(ssq,B,Orthog);
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
sliceNorm(residuals,B,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
sliceNorm(residuals,X,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
/************************************************************************
|
||||
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
||||
************************************************************************
|
||||
* Dimensions:
|
||||
*
|
||||
* X,B==(Nferm x Nblock)
|
||||
* A==(Nferm x Nferm)
|
||||
*
|
||||
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
|
||||
*
|
||||
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
* for k:
|
||||
* Z = AD
|
||||
* M = [D^dag Z]^{-1}
|
||||
* X = X + D MC
|
||||
* QS = Q - ZM
|
||||
* D = Q + D S^dag
|
||||
* C = S C
|
||||
*/
|
||||
///////////////////////////////////////
|
||||
// Initial block: initial search dir is guess
|
||||
///////////////////////////////////////
|
||||
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
|
||||
|
||||
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
|
||||
Linop.HermOp(X, AD);
|
||||
tmp = B - AD;
|
||||
//std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
|
||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||
//std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
|
||||
//std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl;
|
||||
//std::cout << GridLogMessage << " m_C " << m_C<<std::endl;
|
||||
//std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl;
|
||||
D=Q;
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
//3. Z = AD
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(D, Z);
|
||||
MatrixTimer.Stop();
|
||||
//std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl;
|
||||
|
||||
//4. M = [D^dag Z]^{-1}
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
m_M = m_DZ.inverse();
|
||||
//std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl;
|
||||
|
||||
//5. X = X + D MC
|
||||
m_tmp = m_M * m_C;
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(X,m_tmp, D,X,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//6. QS = Q - ZM
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
|
||||
sliceMaddTimer.Stop();
|
||||
QRTimer.Start();
|
||||
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
|
||||
QRTimer.Stop();
|
||||
|
||||
//7. D = Q + D S^dag
|
||||
m_tmp = m_S.adjoint();
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//8. C = S C
|
||||
m_C = m_S*m_C;
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
m_rr = m_C.adjoint() * m_C;
|
||||
|
||||
RealD max_resid=0;
|
||||
RealD rrsum=0;
|
||||
RealD rr;
|
||||
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
rrsum+=real(m_rr(b,b));
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
||||
<<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
|
||||
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(X, AD);
|
||||
AD = AD-B;
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
Nblock = Src._grid->_fdimensions[Orthog];
|
||||
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
@ -162,8 +425,9 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
*********************
|
||||
*/
|
||||
RealD max_resid=0;
|
||||
RealD rr;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
RealD rr = real(m_rr(b,b))/ssq[b];
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
@ -173,13 +437,14 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
|
||||
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(Psi, AP);
|
||||
AP = AP-Src;
|
||||
std::cout << GridLogMessage <<"\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
@ -197,35 +462,13 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// multiRHS conjugate gradient. Dimension zero should be the block direction
|
||||
// Use this for spread out across nodes
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class Field>
|
||||
class MultiRHSConjugateGradient : public OperatorFunction<Field> {
|
||||
public:
|
||||
|
||||
typedef typename Field::scalar_type scomplex;
|
||||
|
||||
const int blockDim = 0;
|
||||
|
||||
int Nblock;
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
|
||||
MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
ErrorOnNoConverge(err_on_no_conv){};
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
int Orthog = 0; // First dimension is block dim
|
||||
int Orthog = blockDim; // First dimension is block dim
|
||||
Nblock = Src._grid->_fdimensions[Orthog];
|
||||
|
||||
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
@ -285,12 +528,10 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
MatrixTimer.Stop();
|
||||
|
||||
// Alpha
|
||||
// sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog);
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductVector(v_pAp,P,AP,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
for(int b=0;b<Nblock;b++){
|
||||
// std::cout << " "<< v_pAp[b]<<" "<< v_pAp_test[b]<<std::endl;
|
||||
v_alpha[b] = v_rr[b]/real(v_pAp[b]);
|
||||
}
|
||||
|
||||
@ -332,7 +573,7 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
|
||||
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
|
||||
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
@ -358,9 +599,8 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -0,0 +1,753 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_BIRL_H
|
||||
#define GRID_BIRL_H
|
||||
|
||||
#include <string.h> //memset
|
||||
|
||||
#include <zlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h>
|
||||
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h>
|
||||
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Implicitly restarted lanczos
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class BlockImplicitlyRestartedLanczos {
|
||||
|
||||
const RealD small = 1.0e-16;
|
||||
public:
|
||||
int lock;
|
||||
int get;
|
||||
int Niter;
|
||||
int converged;
|
||||
|
||||
int Nminres; // Minimum number of restarts; only check for convergence after
|
||||
int Nstop; // Number of evecs checked for convergence
|
||||
int Nk; // Number of converged sought
|
||||
int Np; // Np -- Number of spare vecs in kryloc space
|
||||
int Nm; // Nm -- total number of vectors
|
||||
|
||||
int orth_period;
|
||||
|
||||
RealD OrthoTime;
|
||||
|
||||
RealD eresid, betastp;
|
||||
SortEigen<Field> _sort;
|
||||
LinearFunction<Field> &_HermOp;
|
||||
LinearFunction<Field> &_HermOpTest;
|
||||
/////////////////////////
|
||||
// Constructor
|
||||
/////////////////////////
|
||||
|
||||
BlockImplicitlyRestartedLanczos(
|
||||
LinearFunction<Field> & HermOp,
|
||||
LinearFunction<Field> & HermOpTest,
|
||||
int _Nstop, // sought vecs
|
||||
int _Nk, // sought vecs
|
||||
int _Nm, // spare vecs
|
||||
RealD _eresid, // resid in lmdue deficit
|
||||
RealD _betastp, // if beta(k) < betastp: converged
|
||||
int _Niter, // Max iterations
|
||||
int _Nminres, int _orth_period = 1) :
|
||||
_HermOp(HermOp),
|
||||
_HermOpTest(HermOpTest),
|
||||
Nstop(_Nstop),
|
||||
Nk(_Nk),
|
||||
Nm(_Nm),
|
||||
eresid(_eresid),
|
||||
betastp(_betastp),
|
||||
Niter(_Niter),
|
||||
Nminres(_Nminres),
|
||||
orth_period(_orth_period)
|
||||
{
|
||||
Np = Nm-Nk; assert(Np>0);
|
||||
};
|
||||
|
||||
BlockImplicitlyRestartedLanczos(
|
||||
LinearFunction<Field> & HermOp,
|
||||
LinearFunction<Field> & HermOpTest,
|
||||
int _Nk, // sought vecs
|
||||
int _Nm, // spare vecs
|
||||
RealD _eresid, // resid in lmdue deficit
|
||||
RealD _betastp, // if beta(k) < betastp: converged
|
||||
int _Niter, // Max iterations
|
||||
int _Nminres,
|
||||
int _orth_period = 1) :
|
||||
_HermOp(HermOp),
|
||||
_HermOpTest(HermOpTest),
|
||||
Nstop(_Nk),
|
||||
Nk(_Nk),
|
||||
Nm(_Nm),
|
||||
eresid(_eresid),
|
||||
betastp(_betastp),
|
||||
Niter(_Niter),
|
||||
Nminres(_Nminres),
|
||||
orth_period(_orth_period)
|
||||
{
|
||||
Np = Nm-Nk; assert(Np>0);
|
||||
};
|
||||
|
||||
|
||||
/* Saad PP. 195
|
||||
1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
|
||||
2. For k = 1,2,...,m Do:
|
||||
3. wk:=Avk−βkv_{k−1}
|
||||
4. αk:=(wk,vk) //
|
||||
5. wk:=wk−αkvk // wk orthog vk
|
||||
6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||
7. vk+1 := wk/βk+1
|
||||
8. EndDo
|
||||
*/
|
||||
void step(std::vector<RealD>& lmd,
|
||||
std::vector<RealD>& lme,
|
||||
BasisFieldVector<Field>& evec,
|
||||
Field& w,int Nm,int k)
|
||||
{
|
||||
assert( k< Nm );
|
||||
|
||||
GridStopWatch gsw_op,gsw_o;
|
||||
|
||||
Field& evec_k = evec[k];
|
||||
|
||||
gsw_op.Start();
|
||||
_HermOp(evec_k,w);
|
||||
gsw_op.Stop();
|
||||
|
||||
if(k>0){
|
||||
w -= lme[k-1] * evec[k-1];
|
||||
}
|
||||
|
||||
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
|
||||
RealD alph = real(zalph);
|
||||
|
||||
w = w - alph * evec_k;// 5. wk:=wk−αkvk
|
||||
|
||||
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||
// 7. vk+1 := wk/βk+1
|
||||
|
||||
std::cout<<GridLogMessage << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
|
||||
const RealD tiny = 1.0e-20;
|
||||
if ( beta < tiny ) {
|
||||
std::cout<<GridLogMessage << " beta is tiny "<<beta<<std::endl;
|
||||
}
|
||||
lmd[k] = alph;
|
||||
lme[k] = beta;
|
||||
|
||||
gsw_o.Start();
|
||||
if (k>0 && k % orth_period == 0) {
|
||||
orthogonalize(w,evec,k); // orthonormalise
|
||||
}
|
||||
gsw_o.Stop();
|
||||
|
||||
if(k < Nm-1) {
|
||||
evec[k+1] = w;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Timing: operator=" << gsw_op.Elapsed() <<
|
||||
" orth=" << gsw_o.Elapsed() << std::endl;
|
||||
|
||||
}
|
||||
|
||||
void qr_decomp(std::vector<RealD>& lmd,
|
||||
std::vector<RealD>& lme,
|
||||
int Nk,
|
||||
int Nm,
|
||||
std::vector<RealD>& Qt,
|
||||
RealD Dsh,
|
||||
int kmin,
|
||||
int kmax)
|
||||
{
|
||||
int k = kmin-1;
|
||||
RealD x;
|
||||
|
||||
RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]);
|
||||
RealD c = ( lmd[k] -Dsh) *Fden;
|
||||
RealD s = -lme[k] *Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k+1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||
x =-s*lme[k+1];
|
||||
lme[k+1] = c*lme[k+1];
|
||||
|
||||
for(int i=0; i<Nk; ++i){
|
||||
RealD Qtmp1 = Qt[i+Nm*k ];
|
||||
RealD Qtmp2 = Qt[i+Nm*(k+1)];
|
||||
Qt[i+Nm*k ] = c*Qtmp1 - s*Qtmp2;
|
||||
Qt[i+Nm*(k+1)] = s*Qtmp1 + c*Qtmp2;
|
||||
}
|
||||
|
||||
// Givens transformations
|
||||
for(int k = kmin; k < kmax-1; ++k){
|
||||
|
||||
RealD Fden = 1.0/hypot(x,lme[k-1]);
|
||||
RealD c = lme[k-1]*Fden;
|
||||
RealD s = - x*Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k+1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||
lme[k-1] = c*lme[k-1] -s*x;
|
||||
|
||||
if(k != kmax-2){
|
||||
x = -s*lme[k+1];
|
||||
lme[k+1] = c*lme[k+1];
|
||||
}
|
||||
|
||||
for(int i=0; i<Nk; ++i){
|
||||
RealD Qtmp1 = Qt[i+Nm*k ];
|
||||
RealD Qtmp2 = Qt[i+Nm*(k+1)];
|
||||
Qt[i+Nm*k ] = c*Qtmp1 -s*Qtmp2;
|
||||
Qt[i+Nm*(k+1)] = s*Qtmp1 +c*Qtmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_LAPACK_IRL
|
||||
#define LAPACK_INT int
|
||||
//long long
|
||||
void diagonalize_lapack(std::vector<RealD>& lmd,
|
||||
std::vector<RealD>& lme,
|
||||
int N1,
|
||||
int N2,
|
||||
std::vector<RealD>& Qt,
|
||||
GridBase *grid){
|
||||
|
||||
std::cout << GridLogMessage << "diagonalize_lapack start\n";
|
||||
GridStopWatch gsw;
|
||||
|
||||
const int size = Nm;
|
||||
// tevals.resize(size);
|
||||
// tevecs.resize(size);
|
||||
LAPACK_INT NN = N1;
|
||||
std::vector<double> evals_tmp(NN);
|
||||
std::vector<double> evec_tmp(NN*NN);
|
||||
memset(&evec_tmp[0],0,sizeof(double)*NN*NN);
|
||||
// double AA[NN][NN];
|
||||
std::vector<double> DD(NN);
|
||||
std::vector<double> EE(NN);
|
||||
for (int i = 0; i< NN; i++)
|
||||
for (int j = i - 1; j <= i + 1; j++)
|
||||
if ( j < NN && j >= 0 ) {
|
||||
if (i==j) DD[i] = lmd[i];
|
||||
if (i==j) evals_tmp[i] = lmd[i];
|
||||
if (j==(i-1)) EE[j] = lme[j];
|
||||
}
|
||||
LAPACK_INT evals_found;
|
||||
LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
|
||||
LAPACK_INT liwork = 3+NN*10 ;
|
||||
std::vector<LAPACK_INT> iwork(liwork);
|
||||
std::vector<double> work(lwork);
|
||||
std::vector<LAPACK_INT> isuppz(2*NN);
|
||||
char jobz = 'V'; // calculate evals & evecs
|
||||
char range = 'I'; // calculate all evals
|
||||
// char range = 'A'; // calculate all evals
|
||||
char uplo = 'U'; // refer to upper half of original matrix
|
||||
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
|
||||
std::vector<int> ifail(NN);
|
||||
LAPACK_INT info;
|
||||
// int total = QMP_get_number_of_nodes();
|
||||
// int node = QMP_get_node_number();
|
||||
// GridBase *grid = evec[0]._grid;
|
||||
int total = grid->_Nprocessors;
|
||||
int node = grid->_processor;
|
||||
int interval = (NN/total)+1;
|
||||
double vl = 0.0, vu = 0.0;
|
||||
LAPACK_INT il = interval*node+1 , iu = interval*(node+1);
|
||||
if (iu > NN) iu=NN;
|
||||
double tol = 0.0;
|
||||
if (1) {
|
||||
memset(&evals_tmp[0],0,sizeof(double)*NN);
|
||||
if ( il <= NN){
|
||||
std::cout << GridLogMessage << "dstegr started" << std::endl;
|
||||
gsw.Start();
|
||||
dstegr(&jobz, &range, &NN,
|
||||
(double*)&DD[0], (double*)&EE[0],
|
||||
&vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
|
||||
&tol, // tolerance
|
||||
&evals_found, &evals_tmp[0], (double*)&evec_tmp[0], &NN,
|
||||
&isuppz[0],
|
||||
&work[0], &lwork, &iwork[0], &liwork,
|
||||
&info);
|
||||
gsw.Stop();
|
||||
std::cout << GridLogMessage << "dstegr completed in " << gsw.Elapsed() << std::endl;
|
||||
for (int i = iu-1; i>= il-1; i--){
|
||||
evals_tmp[i] = evals_tmp[i - (il-1)];
|
||||
if (il>1) evals_tmp[i-(il-1)]=0.;
|
||||
for (int j = 0; j< NN; j++){
|
||||
evec_tmp[i*NN + j] = evec_tmp[(i - (il-1)) * NN + j];
|
||||
if (il>1) evec_tmp[(i-(il-1)) * NN + j]=0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
// QMP_sum_double_array(evals_tmp,NN);
|
||||
// QMP_sum_double_array((double *)evec_tmp,NN*NN);
|
||||
grid->GlobalSumVector(&evals_tmp[0],NN);
|
||||
grid->GlobalSumVector(&evec_tmp[0],NN*NN);
|
||||
}
|
||||
}
|
||||
// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
|
||||
for(int i=0;i<NN;i++){
|
||||
for(int j=0;j<NN;j++)
|
||||
Qt[(NN-1-i)*N2+j]=evec_tmp[i*NN + j];
|
||||
lmd [NN-1-i]=evals_tmp[i];
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "diagonalize_lapack complete\n";
|
||||
}
|
||||
#undef LAPACK_INT
|
||||
#endif
|
||||
|
||||
|
||||
void diagonalize(std::vector<RealD>& lmd,
|
||||
std::vector<RealD>& lme,
|
||||
int N2,
|
||||
int N1,
|
||||
std::vector<RealD>& Qt,
|
||||
GridBase *grid)
|
||||
{
|
||||
|
||||
#ifdef USE_LAPACK_IRL
|
||||
const int check_lapack=0; // just use lapack if 0, check against lapack if 1
|
||||
|
||||
if(!check_lapack)
|
||||
return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid);
|
||||
|
||||
std::vector <RealD> lmd2(N1);
|
||||
std::vector <RealD> lme2(N1);
|
||||
std::vector<RealD> Qt2(N1*N1);
|
||||
for(int k=0; k<N1; ++k){
|
||||
lmd2[k] = lmd[k];
|
||||
lme2[k] = lme[k];
|
||||
}
|
||||
for(int k=0; k<N1*N1; ++k)
|
||||
Qt2[k] = Qt[k];
|
||||
|
||||
// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
|
||||
#endif
|
||||
|
||||
int Niter = 10000*N1;
|
||||
int kmin = 1;
|
||||
int kmax = N2;
|
||||
// (this should be more sophisticated)
|
||||
|
||||
for(int iter=0; ; ++iter){
|
||||
if ( (iter+1)%(100*N1)==0)
|
||||
std::cout<<GridLogMessage << "[QL method] Not converged - iteration "<<iter+1<<"\n";
|
||||
|
||||
// determination of 2x2 leading submatrix
|
||||
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
|
||||
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
|
||||
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
|
||||
// (Dsh: shift)
|
||||
|
||||
// transformation
|
||||
qr_decomp(lmd,lme,N2,N1,Qt,Dsh,kmin,kmax);
|
||||
|
||||
// Convergence criterion (redef of kmin and kamx)
|
||||
for(int j=kmax-1; j>= kmin; --j){
|
||||
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
|
||||
if(fabs(lme[j-1])+dds > dds){
|
||||
kmax = j+1;
|
||||
goto continued;
|
||||
}
|
||||
}
|
||||
Niter = iter;
|
||||
#ifdef USE_LAPACK_IRL
|
||||
if(check_lapack){
|
||||
const double SMALL=1e-8;
|
||||
diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid);
|
||||
std::vector <RealD> lmd3(N2);
|
||||
for(int k=0; k<N2; ++k) lmd3[k]=lmd[k];
|
||||
_sort.push(lmd3,N2);
|
||||
_sort.push(lmd2,N2);
|
||||
for(int k=0; k<N2; ++k){
|
||||
if (fabs(lmd2[k] - lmd3[k]) >SMALL) std::cout<<GridLogMessage <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
|
||||
// if (fabs(lme2[k] - lme[k]) >SMALL) std::cout<<GridLogMessage <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
|
||||
}
|
||||
for(int k=0; k<N1*N1; ++k){
|
||||
// if (fabs(Qt2[k] - Qt[k]) >SMALL) std::cout<<GridLogMessage <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return;
|
||||
|
||||
continued:
|
||||
for(int j=0; j<kmax-1; ++j){
|
||||
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
|
||||
if(fabs(lme[j])+dds > dds){
|
||||
kmin = j+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout<<GridLogMessage << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
|
||||
abort();
|
||||
}
|
||||
|
||||
#if 1
|
||||
template<typename T>
|
||||
static RealD normalise(T& v)
|
||||
{
|
||||
RealD nn = norm2(v);
|
||||
nn = sqrt(nn);
|
||||
v = v * (1.0/nn);
|
||||
return nn;
|
||||
}
|
||||
|
||||
void orthogonalize(Field& w,
|
||||
BasisFieldVector<Field>& evec,
|
||||
int k)
|
||||
{
|
||||
double t0=-usecond()/1e6;
|
||||
|
||||
evec.orthogonalize(w,k);
|
||||
|
||||
normalise(w);
|
||||
t0+=usecond()/1e6;
|
||||
OrthoTime +=t0;
|
||||
}
|
||||
|
||||
void setUnit_Qt(int Nm, std::vector<RealD> &Qt) {
|
||||
for(int i=0; i<Qt.size(); ++i) Qt[i] = 0.0;
|
||||
for(int k=0; k<Nm; ++k) Qt[k + k*Nm] = 1.0;
|
||||
}
|
||||
|
||||
/* Rudy Arthur's thesis pp.137
|
||||
------------------------
|
||||
Require: M > K P = M − K †
|
||||
Compute the factorization AVM = VM HM + fM eM
|
||||
repeat
|
||||
Q=I
|
||||
for i = 1,...,P do
|
||||
QiRi =HM −θiI Q = QQi
|
||||
H M = Q †i H M Q i
|
||||
end for
|
||||
βK =HM(K+1,K) σK =Q(M,K)
|
||||
r=vK+1βK +rσK
|
||||
VK =VM(1:M)Q(1:M,1:K)
|
||||
HK =HM(1:K,1:K)
|
||||
→AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM
|
||||
until convergence
|
||||
*/
|
||||
|
||||
void calc(std::vector<RealD>& eval,
|
||||
BasisFieldVector<Field>& evec,
|
||||
const Field& src,
|
||||
int& Nconv,
|
||||
bool reverse,
|
||||
int SkipTest)
|
||||
{
|
||||
|
||||
GridBase *grid = evec._v[0]._grid;//evec.get(0 + evec_offset)._grid;
|
||||
assert(grid == src._grid);
|
||||
|
||||
std::cout<<GridLogMessage << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
|
||||
std::cout<<GridLogMessage << " -- Nm = " << Nm << std::endl;
|
||||
std::cout<<GridLogMessage << " -- size of eval = " << eval.size() << std::endl;
|
||||
std::cout<<GridLogMessage << " -- size of evec = " << evec.size() << std::endl;
|
||||
|
||||
assert(Nm <= evec.size() && Nm <= eval.size());
|
||||
|
||||
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
|
||||
RealD evalMaxApprox = 0.0;
|
||||
{
|
||||
auto src_n = src;
|
||||
auto tmp = src;
|
||||
const int _MAX_ITER_IRL_MEVAPP_ = 50;
|
||||
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
|
||||
_HermOpTest(src_n,tmp);
|
||||
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
|
||||
RealD vden = norm2(src_n);
|
||||
RealD na = vnum/vden;
|
||||
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
|
||||
i=_MAX_ITER_IRL_MEVAPP_;
|
||||
evalMaxApprox = na;
|
||||
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
|
||||
src_n = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<RealD> lme(Nm);
|
||||
std::vector<RealD> lme2(Nm);
|
||||
std::vector<RealD> eval2(Nm);
|
||||
std::vector<RealD> eval2_copy(Nm);
|
||||
std::vector<RealD> Qt(Nm*Nm);
|
||||
|
||||
|
||||
Field f(grid);
|
||||
Field v(grid);
|
||||
|
||||
int k1 = 1;
|
||||
int k2 = Nk;
|
||||
|
||||
Nconv = 0;
|
||||
|
||||
RealD beta_k;
|
||||
|
||||
// Set initial vector
|
||||
evec[0] = src;
|
||||
normalise(evec[0]);
|
||||
std:: cout<<GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0])<<std::endl;
|
||||
|
||||
// Initial Nk steps
|
||||
OrthoTime=0.;
|
||||
double t0=usecond()/1e6;
|
||||
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
|
||||
double t1=usecond()/1e6;
|
||||
std::cout<<GridLogMessage <<"IRL::Initial steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
|
||||
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
|
||||
t1=usecond()/1e6;
|
||||
|
||||
// Restarting loop begins
|
||||
for(int iter = 0; iter<Niter; ++iter){
|
||||
|
||||
std::cout<<GridLogMessage<<"\n Restart iteration = "<< iter << std::endl;
|
||||
|
||||
//
|
||||
// Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs.
|
||||
// We loop over
|
||||
//
|
||||
OrthoTime=0.;
|
||||
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
|
||||
t1=usecond()/1e6;
|
||||
std::cout<<GridLogMessage <<"IRL:: "<<Np <<" steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
|
||||
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
|
||||
f *= lme[Nm-1];
|
||||
|
||||
t1=usecond()/1e6;
|
||||
|
||||
|
||||
// getting eigenvalues
|
||||
for(int k=0; k<Nm; ++k){
|
||||
eval2[k] = eval[k+k1-1];
|
||||
lme2[k] = lme[k+k1-1];
|
||||
}
|
||||
setUnit_Qt(Nm,Qt);
|
||||
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
|
||||
t1=usecond()/1e6;
|
||||
std::cout<<GridLogMessage <<"IRL:: diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
|
||||
|
||||
// sorting
|
||||
eval2_copy = eval2;
|
||||
|
||||
_sort.push(eval2,Nm);
|
||||
t1=usecond()/1e6;
|
||||
std::cout<<GridLogMessage <<"IRL:: eval sorting: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
|
||||
|
||||
// Implicitly shifted QR transformations
|
||||
setUnit_Qt(Nm,Qt);
|
||||
for(int ip=0; ip<k2; ++ip){
|
||||
std::cout<<GridLogMessage << "eval "<< ip << " "<< eval2[ip] << std::endl;
|
||||
}
|
||||
|
||||
for(int ip=k2; ip<Nm; ++ip){
|
||||
std::cout<<GridLogMessage << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
|
||||
qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
|
||||
|
||||
}
|
||||
t1=usecond()/1e6;
|
||||
std::cout<<GridLogMessage <<"IRL::qr_decomp: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
|
||||
assert(k2<Nm);
|
||||
|
||||
|
||||
assert(k2<Nm);
|
||||
assert(k1>0);
|
||||
evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm);
|
||||
|
||||
t1=usecond()/1e6;
|
||||
std::cout<<GridLogMessage <<"IRL::QR rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
|
||||
fflush(stdout);
|
||||
|
||||
// Compressed vector f and beta(k2)
|
||||
f *= Qt[Nm-1+Nm*(k2-1)];
|
||||
f += lme[k2-1] * evec[k2];
|
||||
beta_k = norm2(f);
|
||||
beta_k = sqrt(beta_k);
|
||||
std::cout<<GridLogMessage<<" beta(k) = "<<beta_k<<std::endl;
|
||||
|
||||
RealD betar = 1.0/beta_k;
|
||||
evec[k2] = betar * f;
|
||||
lme[k2-1] = beta_k;
|
||||
|
||||
// Convergence test
|
||||
for(int k=0; k<Nm; ++k){
|
||||
eval2[k] = eval[k];
|
||||
lme2[k] = lme[k];
|
||||
|
||||
std::cout<<GridLogMessage << "eval2[" << k << "] = " << eval2[k] << std::endl;
|
||||
}
|
||||
setUnit_Qt(Nm,Qt);
|
||||
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
|
||||
t1=usecond()/1e6;
|
||||
std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
|
||||
|
||||
|
||||
Nconv = 0;
|
||||
|
||||
if (iter >= Nminres) {
|
||||
std::cout << GridLogMessage << "Rotation to test convergence " << std::endl;
|
||||
|
||||
Field ev0_orig(grid);
|
||||
ev0_orig = evec[0];
|
||||
|
||||
evec.rotate(Qt,0,Nk,0,Nk,Nm);
|
||||
|
||||
{
|
||||
std::cout << GridLogMessage << "Test convergence" << std::endl;
|
||||
Field B(grid);
|
||||
|
||||
for(int j = 0; j<Nk; j+=SkipTest){
|
||||
B=evec[j];
|
||||
//std::cout << "Checkerboard: " << evec[j].checkerboard << std::endl;
|
||||
B.checkerboard = evec[0].checkerboard;
|
||||
|
||||
_HermOpTest(B,v);
|
||||
|
||||
RealD vnum = real(innerProduct(B,v)); // HermOp.
|
||||
RealD vden = norm2(B);
|
||||
RealD vv0 = norm2(v);
|
||||
eval2[j] = vnum/vden;
|
||||
v -= eval2[j]*B;
|
||||
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
|
||||
std::cout.precision(13);
|
||||
std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<j<<"] "
|
||||
<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[j] << " (" << eval2_copy[j] << ")"
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv
|
||||
<<" "<< vnum/(sqrt(vden)*sqrt(vv0))
|
||||
<< " norm(B["<<j<<"])="<< vden <<std::endl;
|
||||
|
||||
// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
|
||||
if((vv<eresid*eresid) && (j == Nconv) ){
|
||||
Nconv+=SkipTest;
|
||||
}
|
||||
}
|
||||
|
||||
// test if we converged, if so, terminate
|
||||
t1=usecond()/1e6;
|
||||
std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
|
||||
|
||||
std::cout<<GridLogMessage<<" #modes converged: "<<Nconv<<std::endl;
|
||||
|
||||
if( Nconv>=Nstop || beta_k < betastp){
|
||||
goto converged;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Rotate back" << std::endl;
|
||||
//B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss];
|
||||
{
|
||||
Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk);
|
||||
for (int k=0;k<Nk;k++)
|
||||
for (int j=0;j<Nk;j++)
|
||||
qm(j,k) = Qt[k+Nm*j];
|
||||
GridStopWatch timeInv;
|
||||
timeInv.Start();
|
||||
Eigen::MatrixXd qmI = qm.inverse();
|
||||
timeInv.Stop();
|
||||
std::vector<RealD> QtI(Nm*Nm);
|
||||
for (int k=0;k<Nk;k++)
|
||||
for (int j=0;j<Nk;j++)
|
||||
QtI[k+Nm*j] = qmI(j,k);
|
||||
|
||||
RealD res_check_rotate_inverse = (qm*qmI - Eigen::MatrixXd::Identity(Nk,Nk)).norm(); // sqrt( |X|^2 )
|
||||
assert(res_check_rotate_inverse < 1e-7);
|
||||
evec.rotate(QtI,0,Nk,0,Nk,Nm);
|
||||
|
||||
axpy(ev0_orig,-1.0,evec[0],ev0_orig);
|
||||
std::cout << GridLogMessage << "Rotation done (in " << timeInv.Elapsed() << " = " << timeInv.useconds() << " us" <<
|
||||
", error = " << res_check_rotate_inverse <<
|
||||
"); | evec[0] - evec[0]_orig | = " << ::sqrt(norm2(ev0_orig)) << std::endl;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::cout << GridLogMessage << "iter < Nminres: do not yet test for convergence\n";
|
||||
} // end of iter loop
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<"\n NOT converged.\n";
|
||||
abort();
|
||||
|
||||
converged:
|
||||
|
||||
if (SkipTest == 1) {
|
||||
eval = eval2;
|
||||
} else {
|
||||
|
||||
// test quickly
|
||||
for (int j=0;j<Nstop;j+=SkipTest) {
|
||||
std::cout<<GridLogMessage << "Eigenvalue[" << j << "] = " << eval2[j] << " (" << eval2_copy[j] << ")" << std::endl;
|
||||
}
|
||||
|
||||
eval2_copy.resize(eval2.size());
|
||||
eval = eval2_copy;
|
||||
}
|
||||
|
||||
evec.sortInPlace(eval,reverse);
|
||||
|
||||
{
|
||||
|
||||
// test
|
||||
for (int j=0;j<Nstop;j++) {
|
||||
std::cout<<GridLogMessage << " |e[" << j << "]|^2 = " << norm2(evec[j]) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
//_sort.push(eval,evec,Nconv);
|
||||
//evec.sort(eval,Nconv);
|
||||
|
||||
std::cout<<GridLogMessage << "\n Converged\n Summary :\n";
|
||||
std::cout<<GridLogMessage << " -- Iterations = "<< Nconv << "\n";
|
||||
std::cout<<GridLogMessage << " -- beta(k) = "<< beta_k << "\n";
|
||||
std::cout<<GridLogMessage << " -- Nconv = "<< Nconv << "\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -0,0 +1,143 @@
|
||||
namespace Grid {
|
||||
|
||||
/*
|
||||
BlockProjector
|
||||
|
||||
If _HP_BLOCK_PROJECTORS_ is defined, we assume that _evec is a basis that is not
|
||||
fully orthonormalized (to the precision of the coarse field) and we allow for higher-precision
|
||||
coarse field than basis field.
|
||||
|
||||
*/
|
||||
//#define _HP_BLOCK_PROJECTORS_
|
||||
|
||||
template<typename Field>
|
||||
class BlockProjector {
|
||||
public:
|
||||
|
||||
BasisFieldVector<Field>& _evec;
|
||||
BlockedGrid<Field>& _bgrid;
|
||||
|
||||
BlockProjector(BasisFieldVector<Field>& evec, BlockedGrid<Field>& bgrid) : _evec(evec), _bgrid(bgrid) {
|
||||
}
|
||||
|
||||
void createOrthonormalBasis(RealD thres = 0.0) {
|
||||
|
||||
GridStopWatch sw;
|
||||
sw.Start();
|
||||
|
||||
int cnt = 0;
|
||||
|
||||
#pragma omp parallel shared(cnt)
|
||||
{
|
||||
int lcnt = 0;
|
||||
|
||||
#pragma omp for
|
||||
for (int b=0;b<_bgrid._o_blocks;b++) {
|
||||
|
||||
for (int i=0;i<_evec._Nm;i++) {
|
||||
|
||||
auto nrm0 = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]);
|
||||
|
||||
// |i> -= <j|i> |j>
|
||||
for (int j=0;j<i;j++) {
|
||||
_bgrid.block_caxpy(b,_evec._v[i],-_bgrid.block_sp(b,_evec._v[j],_evec._v[i]),_evec._v[j],_evec._v[i]);
|
||||
}
|
||||
|
||||
auto nrm = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]);
|
||||
|
||||
auto eps = nrm/nrm0;
|
||||
if (Reduce(eps).real() < thres) {
|
||||
lcnt++;
|
||||
}
|
||||
|
||||
// TODO: if norm is too small, remove this eigenvector/mark as not needed; in practice: set it to zero norm here and return a mask
|
||||
// that is then used later to decide not to write certain eigenvectors to disk (add a norm calculation before subtraction step and look at nrm/nrm0 < eps to decide)
|
||||
_bgrid.block_cscale(b,1.0 / sqrt(nrm),_evec._v[i]);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#pragma omp critical
|
||||
{
|
||||
cnt += lcnt;
|
||||
}
|
||||
}
|
||||
sw.Stop();
|
||||
std::cout << GridLogMessage << "Gram-Schmidt to create blocked basis took " << sw.Elapsed() << " (" << ((RealD)cnt / (RealD)_bgrid._o_blocks / (RealD)_evec._Nm)
|
||||
<< " below threshold)" << std::endl;
|
||||
|
||||
}
|
||||
|
||||
template<typename CoarseField>
|
||||
void coarseToFine(const CoarseField& in, Field& out) {
|
||||
|
||||
out = zero;
|
||||
out.checkerboard = _evec._v[0].checkerboard;
|
||||
|
||||
int Nbasis = sizeof(in._odata[0]._internal._internal) / sizeof(in._odata[0]._internal._internal[0]);
|
||||
assert(Nbasis == _evec._Nm);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int b=0;b<_bgrid._o_blocks;b++) {
|
||||
for (int j=0;j<_evec._Nm;j++) {
|
||||
_bgrid.block_caxpy(b,out,in._odata[b]._internal._internal[j],_evec._v[j],out);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<typename CoarseField>
|
||||
void fineToCoarse(const Field& in, CoarseField& out) {
|
||||
|
||||
out = zero;
|
||||
|
||||
int Nbasis = sizeof(out._odata[0]._internal._internal) / sizeof(out._odata[0]._internal._internal[0]);
|
||||
assert(Nbasis == _evec._Nm);
|
||||
|
||||
|
||||
Field tmp(_bgrid._grid);
|
||||
tmp = in;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int b=0;b<_bgrid._o_blocks;b++) {
|
||||
for (int j=0;j<_evec._Nm;j++) {
|
||||
// |rhs> -= <j|rhs> |j>
|
||||
auto c = _bgrid.block_sp(b,_evec._v[j],tmp);
|
||||
_bgrid.block_caxpy(b,tmp,-c,_evec._v[j],tmp); // may make this more numerically stable
|
||||
out._odata[b]._internal._internal[j] = c;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<typename CoarseField>
|
||||
void deflateFine(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) {
|
||||
result = zero;
|
||||
for (int i=0;i<N;i++) {
|
||||
Field tmp(result._grid);
|
||||
coarseToFine(_coef._v[i],tmp);
|
||||
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename CoarseField>
|
||||
void deflateCoarse(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) {
|
||||
CoarseField src_coarse(_coef._v[0]._grid);
|
||||
CoarseField result_coarse = src_coarse;
|
||||
result_coarse = zero;
|
||||
fineToCoarse(src_orig,src_coarse);
|
||||
for (int i=0;i<N;i++) {
|
||||
axpy(result_coarse,TensorRemove(innerProduct(_coef._v[i],src_coarse)) / eval[i],_coef._v[i],result_coarse);
|
||||
}
|
||||
coarseToFine(result_coarse,result);
|
||||
}
|
||||
|
||||
template<typename CoarseField>
|
||||
void deflate(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) {
|
||||
// Deflation on coarse Grid is much faster, so use it by default. Deflation on fine Grid is kept for legacy reasons for now.
|
||||
deflateCoarse(_coef,eval,N,src_orig,result);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
@ -0,0 +1,401 @@
|
||||
namespace Grid {
|
||||
|
||||
template<typename Field>
|
||||
class BlockedGrid {
|
||||
public:
|
||||
GridBase* _grid;
|
||||
typedef typename Field::scalar_type Coeff_t;
|
||||
typedef typename Field::vector_type vCoeff_t;
|
||||
|
||||
std::vector<int> _bs; // block size
|
||||
std::vector<int> _nb; // number of blocks
|
||||
std::vector<int> _l; // local dimensions irrespective of cb
|
||||
std::vector<int> _l_cb; // local dimensions of checkerboarded vector
|
||||
std::vector<int> _l_cb_o; // local dimensions of inner checkerboarded vector
|
||||
std::vector<int> _bs_cb; // block size in checkerboarded vector
|
||||
std::vector<int> _nb_o; // number of blocks of simd o-sites
|
||||
|
||||
int _nd, _blocks, _cf_size, _cf_block_size, _cf_o_block_size, _o_blocks, _block_sites;
|
||||
|
||||
BlockedGrid(GridBase* grid, const std::vector<int>& block_size) :
|
||||
_grid(grid), _bs(block_size), _nd((int)_bs.size()),
|
||||
_nb(block_size), _l(block_size), _l_cb(block_size), _nb_o(block_size),
|
||||
_l_cb_o(block_size), _bs_cb(block_size) {
|
||||
|
||||
_blocks = 1;
|
||||
_o_blocks = 1;
|
||||
_l = grid->FullDimensions();
|
||||
_l_cb = grid->LocalDimensions();
|
||||
_l_cb_o = grid->_rdimensions;
|
||||
|
||||
_cf_size = 1;
|
||||
_block_sites = 1;
|
||||
for (int i=0;i<_nd;i++) {
|
||||
_l[i] /= grid->_processors[i];
|
||||
|
||||
assert(!(_l[i] % _bs[i])); // lattice must accommodate choice of blocksize
|
||||
|
||||
int r = _l[i] / _l_cb[i];
|
||||
assert(!(_bs[i] % r)); // checkerboarding must accommodate choice of blocksize
|
||||
_bs_cb[i] = _bs[i] / r;
|
||||
_block_sites *= _bs_cb[i];
|
||||
_nb[i] = _l[i] / _bs[i];
|
||||
_nb_o[i] = _nb[i] / _grid->_simd_layout[i];
|
||||
if (_nb[i] % _grid->_simd_layout[i]) { // simd must accommodate choice of blocksize
|
||||
std::cout << GridLogMessage << "Problem: _nb[" << i << "] = " << _nb[i] << " _grid->_simd_layout[" << i << "] = " << _grid->_simd_layout[i] << std::endl;
|
||||
assert(0);
|
||||
}
|
||||
_blocks *= _nb[i];
|
||||
_o_blocks *= _nb_o[i];
|
||||
_cf_size *= _l[i];
|
||||
}
|
||||
|
||||
_cf_size *= 12 / 2;
|
||||
_cf_block_size = _cf_size / _blocks;
|
||||
_cf_o_block_size = _cf_size / _o_blocks;
|
||||
|
||||
std::cout << GridLogMessage << "BlockedGrid:" << std::endl;
|
||||
std::cout << GridLogMessage << " _l = " << _l << std::endl;
|
||||
std::cout << GridLogMessage << " _l_cb = " << _l_cb << std::endl;
|
||||
std::cout << GridLogMessage << " _l_cb_o = " << _l_cb_o << std::endl;
|
||||
std::cout << GridLogMessage << " _bs = " << _bs << std::endl;
|
||||
std::cout << GridLogMessage << " _bs_cb = " << _bs_cb << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << " _nb = " << _nb << std::endl;
|
||||
std::cout << GridLogMessage << " _nb_o = " << _nb_o << std::endl;
|
||||
std::cout << GridLogMessage << " _blocks = " << _blocks << std::endl;
|
||||
std::cout << GridLogMessage << " _o_blocks = " << _o_blocks << std::endl;
|
||||
std::cout << GridLogMessage << " sizeof(vCoeff_t) = " << sizeof(vCoeff_t) << std::endl;
|
||||
std::cout << GridLogMessage << " _cf_size = " << _cf_size << std::endl;
|
||||
std::cout << GridLogMessage << " _cf_block_size = " << _cf_block_size << std::endl;
|
||||
std::cout << GridLogMessage << " _block_sites = " << _block_sites << std::endl;
|
||||
std::cout << GridLogMessage << " _grid->oSites() = " << _grid->oSites() << std::endl;
|
||||
|
||||
// _grid->Barrier();
|
||||
//abort();
|
||||
}
|
||||
|
||||
void block_to_coor(int b, std::vector<int>& x0) {
|
||||
|
||||
std::vector<int> bcoor;
|
||||
bcoor.resize(_nd);
|
||||
x0.resize(_nd);
|
||||
assert(b < _o_blocks);
|
||||
Lexicographic::CoorFromIndex(bcoor,b,_nb_o);
|
||||
int i;
|
||||
|
||||
for (i=0;i<_nd;i++) {
|
||||
x0[i] = bcoor[i]*_bs_cb[i];
|
||||
}
|
||||
|
||||
//std::cout << GridLogMessage << "Map block b -> " << x0 << std::endl;
|
||||
|
||||
}
|
||||
|
||||
void block_site_to_o_coor(const std::vector<int>& x0, std::vector<int>& coor, int i) {
|
||||
Lexicographic::CoorFromIndex(coor,i,_bs_cb);
|
||||
for (int j=0;j<_nd;j++)
|
||||
coor[j] += x0[j];
|
||||
}
|
||||
|
||||
int block_site_to_o_site(const std::vector<int>& x0, int i) {
|
||||
std::vector<int> coor; coor.resize(_nd);
|
||||
block_site_to_o_coor(x0,coor,i);
|
||||
Lexicographic::IndexFromCoor(coor,i,_l_cb_o);
|
||||
return i;
|
||||
}
|
||||
|
||||
vCoeff_t block_sp(int b, const Field& x, const Field& y) {
|
||||
|
||||
std::vector<int> x0;
|
||||
block_to_coor(b,x0);
|
||||
|
||||
vCoeff_t ret = 0.0;
|
||||
for (int i=0;i<_block_sites;i++) { // only odd sites
|
||||
int ss = block_site_to_o_site(x0,i);
|
||||
ret += TensorRemove(innerProduct(x._odata[ss],y._odata[ss]));
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
vCoeff_t block_sp(int b, const Field& x, const std::vector< ComplexD >& y) {
|
||||
|
||||
std::vector<int> x0;
|
||||
block_to_coor(b,x0);
|
||||
|
||||
constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t);
|
||||
int lsize = _cf_o_block_size / _block_sites;
|
||||
|
||||
std::vector< ComplexD > ret(nsimd);
|
||||
for (int i=0;i<nsimd;i++)
|
||||
ret[i] = 0.0;
|
||||
|
||||
for (int i=0;i<_block_sites;i++) { // only odd sites
|
||||
int ss = block_site_to_o_site(x0,i);
|
||||
|
||||
int n = lsize / nsimd;
|
||||
for (int l=0;l<n;l++) {
|
||||
for (int j=0;j<nsimd;j++) {
|
||||
int t = lsize * i + l*nsimd + j;
|
||||
|
||||
ret[j] += conjugate(((Coeff_t*)&x._odata[ss]._internal)[l*nsimd + j]) * y[t];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vCoeff_t vret;
|
||||
for (int i=0;i<nsimd;i++)
|
||||
((Coeff_t*)&vret)[i] = (Coeff_t)ret[i];
|
||||
|
||||
return vret;
|
||||
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void vcaxpy(iScalar<T>& r,const vCoeff_t& a,const iScalar<T>& x,const iScalar<T>& y) {
|
||||
vcaxpy(r._internal,a,x._internal,y._internal);
|
||||
}
|
||||
|
||||
template<class T,int N>
|
||||
void vcaxpy(iVector<T,N>& r,const vCoeff_t& a,const iVector<T,N>& x,const iVector<T,N>& y) {
|
||||
for (int i=0;i<N;i++)
|
||||
vcaxpy(r._internal[i],a,x._internal[i],y._internal[i]);
|
||||
}
|
||||
|
||||
void vcaxpy(vCoeff_t& r,const vCoeff_t& a,const vCoeff_t& x,const vCoeff_t& y) {
|
||||
r = a*x + y;
|
||||
}
|
||||
|
||||
void block_caxpy(int b, Field& ret, const vCoeff_t& a, const Field& x, const Field& y) {
|
||||
|
||||
std::vector<int> x0;
|
||||
block_to_coor(b,x0);
|
||||
|
||||
for (int i=0;i<_block_sites;i++) { // only odd sites
|
||||
int ss = block_site_to_o_site(x0,i);
|
||||
vcaxpy(ret._odata[ss],a,x._odata[ss],y._odata[ss]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void block_caxpy(int b, std::vector< ComplexD >& ret, const vCoeff_t& a, const Field& x, const std::vector< ComplexD >& y) {
|
||||
std::vector<int> x0;
|
||||
block_to_coor(b,x0);
|
||||
|
||||
constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t);
|
||||
int lsize = _cf_o_block_size / _block_sites;
|
||||
|
||||
for (int i=0;i<_block_sites;i++) { // only odd sites
|
||||
int ss = block_site_to_o_site(x0,i);
|
||||
|
||||
int n = lsize / nsimd;
|
||||
for (int l=0;l<n;l++) {
|
||||
vCoeff_t r = a* ((vCoeff_t*)&x._odata[ss]._internal)[l];
|
||||
|
||||
for (int j=0;j<nsimd;j++) {
|
||||
int t = lsize * i + l*nsimd + j;
|
||||
ret[t] = y[t] + ((Coeff_t*)&r)[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void block_set(int b, Field& ret, const std::vector< ComplexD >& x) {
|
||||
std::vector<int> x0;
|
||||
block_to_coor(b,x0);
|
||||
|
||||
int lsize = _cf_o_block_size / _block_sites;
|
||||
|
||||
for (int i=0;i<_block_sites;i++) { // only odd sites
|
||||
int ss = block_site_to_o_site(x0,i);
|
||||
|
||||
for (int l=0;l<lsize;l++)
|
||||
((Coeff_t*)&ret._odata[ss]._internal)[l] = (Coeff_t)x[lsize * i + l]; // convert precision
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void block_get(int b, const Field& ret, std::vector< ComplexD >& x) {
|
||||
std::vector<int> x0;
|
||||
block_to_coor(b,x0);
|
||||
|
||||
int lsize = _cf_o_block_size / _block_sites;
|
||||
|
||||
for (int i=0;i<_block_sites;i++) { // only odd sites
|
||||
int ss = block_site_to_o_site(x0,i);
|
||||
|
||||
for (int l=0;l<lsize;l++)
|
||||
x[lsize * i + l] = (ComplexD)((Coeff_t*)&ret._odata[ss]._internal)[l];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void vcscale(iScalar<T>& r,const vCoeff_t& a,const iScalar<T>& x) {
|
||||
vcscale(r._internal,a,x._internal);
|
||||
}
|
||||
|
||||
template<class T,int N>
|
||||
void vcscale(iVector<T,N>& r,const vCoeff_t& a,const iVector<T,N>& x) {
|
||||
for (int i=0;i<N;i++)
|
||||
vcscale(r._internal[i],a,x._internal[i]);
|
||||
}
|
||||
|
||||
void vcscale(vCoeff_t& r,const vCoeff_t& a,const vCoeff_t& x) {
|
||||
r = a*x;
|
||||
}
|
||||
|
||||
void block_cscale(int b, const vCoeff_t& a, Field& ret) {
|
||||
|
||||
std::vector<int> x0;
|
||||
block_to_coor(b,x0);
|
||||
|
||||
for (int i=0;i<_block_sites;i++) { // only odd sites
|
||||
int ss = block_site_to_o_site(x0,i);
|
||||
vcscale(ret._odata[ss],a,ret._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
void getCanonicalBlockOffset(int cb, std::vector<int>& x0) {
|
||||
const int ndim = 5;
|
||||
assert(_nb.size() == ndim);
|
||||
std::vector<int> _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] };
|
||||
std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] };
|
||||
x0.resize(ndim);
|
||||
|
||||
assert(cb >= 0);
|
||||
assert(cb < _nbc[0]*_nbc[1]*_nbc[2]*_nbc[3]*_nbc[4]);
|
||||
|
||||
Lexicographic::CoorFromIndex(x0,cb,_nbc);
|
||||
int i;
|
||||
|
||||
for (i=0;i<ndim;i++) {
|
||||
x0[i] *= _bsc[i];
|
||||
}
|
||||
|
||||
//if (cb < 2)
|
||||
// std::cout << GridLogMessage << "Map: " << cb << " To: " << x0 << std::endl;
|
||||
}
|
||||
|
||||
void pokeBlockOfVectorCanonical(int cb,Field& v,const std::vector<float>& buf) {
|
||||
std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] };
|
||||
std::vector<int> ldim = v._grid->LocalDimensions();
|
||||
std::vector<int> cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] };
|
||||
const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4];
|
||||
// take canonical block cb of v and put it in canonical ordering in buf
|
||||
std::vector<int> cx0;
|
||||
getCanonicalBlockOffset(cb,cx0);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<int> co0,cl0;
|
||||
co0=cx0; cl0=cx0;
|
||||
|
||||
#pragma omp for
|
||||
for (int i=0;i<_nbsc;i++) {
|
||||
Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo
|
||||
for (int j=0;j<(int)_bsc.size();j++)
|
||||
cl0[j] = cx0[j] + co0[j];
|
||||
|
||||
std::vector<int> l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] };
|
||||
int oi = v._grid->oIndex(l0);
|
||||
int ii = v._grid->iIndex(l0);
|
||||
int lti = i;
|
||||
|
||||
//if (cb < 2 && i<2)
|
||||
// std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl;
|
||||
|
||||
for (int s=0;s<4;s++)
|
||||
for (int c=0;c<3;c++) {
|
||||
Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii];
|
||||
int ti = 12*lti + 3*s + c;
|
||||
ld = Coeff_t(buf[2*ti+0], buf[2*ti+1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void peekBlockOfVectorCanonical(int cb,const Field& v,std::vector<float>& buf) {
|
||||
std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] };
|
||||
std::vector<int> ldim = v._grid->LocalDimensions();
|
||||
std::vector<int> cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] };
|
||||
const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4];
|
||||
// take canonical block cb of v and put it in canonical ordering in buf
|
||||
std::vector<int> cx0;
|
||||
getCanonicalBlockOffset(cb,cx0);
|
||||
|
||||
buf.resize(_cf_block_size * 2);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<int> co0,cl0;
|
||||
co0=cx0; cl0=cx0;
|
||||
|
||||
#pragma omp for
|
||||
for (int i=0;i<_nbsc;i++) {
|
||||
Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo
|
||||
for (int j=0;j<(int)_bsc.size();j++)
|
||||
cl0[j] = cx0[j] + co0[j];
|
||||
|
||||
std::vector<int> l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] };
|
||||
int oi = v._grid->oIndex(l0);
|
||||
int ii = v._grid->iIndex(l0);
|
||||
int lti = i;
|
||||
|
||||
//if (cb < 2 && i<2)
|
||||
// std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl;
|
||||
|
||||
for (int s=0;s<4;s++)
|
||||
for (int c=0;c<3;c++) {
|
||||
Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii];
|
||||
int ti = 12*lti + 3*s + c;
|
||||
buf[2*ti+0] = ld.real();
|
||||
buf[2*ti+1] = ld.imag();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int globalToLocalCanonicalBlock(int slot,const std::vector<int>& src_nodes,int nb) {
|
||||
// processor coordinate
|
||||
int _nd = (int)src_nodes.size();
|
||||
std::vector<int> _src_nodes = src_nodes;
|
||||
std::vector<int> pco(_nd);
|
||||
Lexicographic::CoorFromIndex(pco,slot,_src_nodes);
|
||||
std::vector<int> cpco = { pco[1], pco[2], pco[3], pco[4], pco[0] };
|
||||
|
||||
// get local block
|
||||
std::vector<int> _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] };
|
||||
assert(_nd == 5);
|
||||
std::vector<int> c_src_local_blocks(_nd);
|
||||
for (int i=0;i<_nd;i++) {
|
||||
assert(_grid->_fdimensions[i] % (src_nodes[i] * _bs[i]) == 0);
|
||||
c_src_local_blocks[(i+4) % 5] = _grid->_fdimensions[i] / src_nodes[i] / _bs[i];
|
||||
}
|
||||
std::vector<int> cbcoor(_nd); // coordinate of block in slot in canonical form
|
||||
Lexicographic::CoorFromIndex(cbcoor,nb,c_src_local_blocks);
|
||||
|
||||
// cpco, cbcoor
|
||||
std::vector<int> clbcoor(_nd);
|
||||
for (int i=0;i<_nd;i++) {
|
||||
int cgcoor = cpco[i] * c_src_local_blocks[i] + cbcoor[i]; // global block coordinate
|
||||
int pcoor = cgcoor / _nbc[i]; // processor coordinate in my Grid
|
||||
int tpcoor = _grid->_processor_coor[(i+1)%5];
|
||||
if (pcoor != tpcoor)
|
||||
return -1;
|
||||
clbcoor[i] = cgcoor - tpcoor * _nbc[i]; // canonical local block coordinate for canonical dimension i
|
||||
}
|
||||
|
||||
int lnb;
|
||||
Lexicographic::IndexFromCoor(clbcoor,lnb,_nbc);
|
||||
//std::cout << "Mapped slot = " << slot << " nb = " << nb << " to " << lnb << std::endl;
|
||||
return lnb;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -0,0 +1,163 @@
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class BasisFieldVector {
|
||||
public:
|
||||
int _Nm;
|
||||
|
||||
typedef typename Field::scalar_type Coeff_t;
|
||||
typedef typename Field::vector_type vCoeff_t;
|
||||
typedef typename Field::vector_object vobj;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
std::vector<Field> _v; // _Nfull vectors
|
||||
|
||||
void report(int n,GridBase* value) {
|
||||
|
||||
std::cout << GridLogMessage << "BasisFieldVector allocated:\n";
|
||||
std::cout << GridLogMessage << " Delta N = " << n << "\n";
|
||||
std::cout << GridLogMessage << " Size of full vectors (size) = " <<
|
||||
((double)n*sizeof(vobj)*value->oSites() / 1024./1024./1024.) << " GB\n";
|
||||
std::cout << GridLogMessage << " Size = " << _v.size() << " Capacity = " << _v.capacity() << std::endl;
|
||||
|
||||
value->Barrier();
|
||||
|
||||
if (value->IsBoss()) {
|
||||
system("cat /proc/meminfo");
|
||||
}
|
||||
|
||||
value->Barrier();
|
||||
|
||||
}
|
||||
|
||||
BasisFieldVector(int Nm,GridBase* value) : _Nm(Nm), _v(Nm,value) {
|
||||
report(Nm,value);
|
||||
}
|
||||
|
||||
~BasisFieldVector() {
|
||||
}
|
||||
|
||||
Field& operator[](int i) {
|
||||
return _v[i];
|
||||
}
|
||||
|
||||
void orthogonalize(Field& w, int k) {
|
||||
for(int j=0; j<k; ++j){
|
||||
Coeff_t ip = (Coeff_t)innerProduct(_v[j],w);
|
||||
w = w - ip*_v[j];
|
||||
}
|
||||
}
|
||||
|
||||
void rotate(std::vector<RealD>& Qt,int j0, int j1, int k0,int k1,int Nm) {
|
||||
|
||||
GridBase* grid = _v[0]._grid;
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector < vobj > B(Nm);
|
||||
|
||||
#pragma omp for
|
||||
for(int ss=0;ss < grid->oSites();ss++){
|
||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
||||
|
||||
for(int j=j0; j<j1; ++j){
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B[j] +=Qt[k+Nm*j] * _v[k]._odata[ss];
|
||||
}
|
||||
}
|
||||
for(int j=j0; j<j1; ++j){
|
||||
_v[j]._odata[ss] = B[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return _Nm;
|
||||
}
|
||||
|
||||
void resize(int n) {
|
||||
if (n > _Nm)
|
||||
_v.reserve(n);
|
||||
|
||||
_v.resize(n,_v[0]._grid);
|
||||
|
||||
if (n < _Nm)
|
||||
_v.shrink_to_fit();
|
||||
|
||||
report(n - _Nm,_v[0]._grid);
|
||||
|
||||
_Nm = n;
|
||||
}
|
||||
|
||||
std::vector<int> getIndex(std::vector<RealD>& sort_vals) {
|
||||
|
||||
std::vector<int> idx(sort_vals.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
|
||||
// sort indexes based on comparing values in v
|
||||
sort(idx.begin(), idx.end(),
|
||||
[&sort_vals](int i1, int i2) {return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);});
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
void reorderInPlace(std::vector<RealD>& sort_vals, std::vector<int>& idx) {
|
||||
GridStopWatch gsw;
|
||||
gsw.Start();
|
||||
|
||||
int nswaps = 0;
|
||||
for (size_t i=0;i<idx.size();i++) {
|
||||
if (idx[i] != i) {
|
||||
|
||||
// find proper place (this could be done in logarithmic time, don't bother for now)
|
||||
size_t j;
|
||||
for (j=i;j<idx.size();j++)
|
||||
if (idx[j]==i)
|
||||
break;
|
||||
assert(j!=idx.size());
|
||||
|
||||
Field _t(_v[0]._grid);
|
||||
_t = _v[idx[j]];
|
||||
_v[idx[j]] = _v[idx[i]];
|
||||
_v[idx[i]] = _t;
|
||||
|
||||
RealD _td = sort_vals[idx[j]];
|
||||
sort_vals[idx[j]] = sort_vals[idx[i]];
|
||||
sort_vals[idx[i]] = _td;
|
||||
|
||||
int _tt = idx[i];
|
||||
idx[i] = idx[j];
|
||||
idx[j] = _tt;
|
||||
|
||||
nswaps++;
|
||||
}
|
||||
}
|
||||
|
||||
// sort values
|
||||
gsw.Stop();
|
||||
std::cout << GridLogMessage << "Sorted eigenspace in place in " << gsw.Elapsed() << " using " << nswaps << " swaps" << std::endl;
|
||||
}
|
||||
|
||||
void sortInPlace(std::vector<RealD>& sort_vals, bool reverse) {
|
||||
|
||||
std::vector<int> idx = getIndex(sort_vals);
|
||||
if (reverse)
|
||||
std::reverse(idx.begin(), idx.end());
|
||||
|
||||
reorderInPlace(sort_vals,idx);
|
||||
|
||||
}
|
||||
|
||||
void deflate(const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
|
||||
result = zero;
|
||||
int N = (int)_v.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
Field& tmp = _v[i];
|
||||
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
@ -52,8 +52,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
MaxIterations(maxit),
|
||||
ErrorOnNoConverge(err_on_no_conv){};
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
|
||||
Field &psi) {
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
||||
|
||||
psi.checkerboard = src.checkerboard;
|
||||
conformable(psi, src);
|
||||
|
||||
@ -123,8 +123,11 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
p = p * b + r;
|
||||
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
|
||||
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
@ -132,8 +135,6 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
||||
p = mmp - src;
|
||||
|
||||
RealD mmpnorm = sqrt(norm2(mmp));
|
||||
RealD psinorm = sqrt(norm2(psi));
|
||||
RealD srcnorm = sqrt(norm2(src));
|
||||
RealD resnorm = sqrt(norm2(p));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
@ -157,8 +158,10 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
}
|
||||
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
|
||||
<< std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -60,6 +60,7 @@ namespace Grid {
|
||||
}
|
||||
|
||||
void operator() (const FieldD &src_d_in, FieldD &sol_d){
|
||||
|
||||
TotalInnerIterations = 0;
|
||||
|
||||
GridStopWatch TotalTimer;
|
||||
|
256
lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
Normal file
256
lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
Normal file
@ -0,0 +1,256 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
|
||||
#define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class ConjugateGradientReliableUpdate : public LinearFunction<FieldD> {
|
||||
public:
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
Integer ReliableUpdatesPerformed;
|
||||
|
||||
bool DoFinalCleanup; //Final DP cleanup, defaults to true
|
||||
Integer IterationsToCleanup; //Final DP cleanup step iterations
|
||||
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
GridBase* SinglePrecGrid;
|
||||
RealD Delta; //reliable update parameter
|
||||
|
||||
//Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single
|
||||
LinearOperatorBase<FieldF> *Linop_fallback;
|
||||
RealD fallback_transition_tol;
|
||||
|
||||
|
||||
ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
Delta(_delta),
|
||||
Linop_f(_Linop_f),
|
||||
Linop_d(_Linop_d),
|
||||
SinglePrecGrid(_sp_grid),
|
||||
ErrorOnNoConverge(err_on_no_conv),
|
||||
DoFinalCleanup(true),
|
||||
Linop_fallback(NULL)
|
||||
{};
|
||||
|
||||
void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){
|
||||
Linop_fallback = &_Linop_fallback;
|
||||
fallback_transition_tol = _fallback_transition_tol;
|
||||
}
|
||||
|
||||
void operator()(const FieldD &src, FieldD &psi) {
|
||||
LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f;
|
||||
bool using_fallback = false;
|
||||
|
||||
psi.checkerboard = src.checkerboard;
|
||||
conformable(psi, src);
|
||||
|
||||
RealD cp, c, a, d, b, ssq, qq, b_pred;
|
||||
|
||||
FieldD p(src);
|
||||
FieldD mmp(src);
|
||||
FieldD r(src);
|
||||
|
||||
// Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, b);
|
||||
|
||||
r = src - mmp;
|
||||
p = r;
|
||||
|
||||
a = norm2(p);
|
||||
cp = a;
|
||||
ssq = norm2(src);
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mp " << d << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mmp " << b << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: cp,r " << cp << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: p " << a << std::endl;
|
||||
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
// Check if guess is really REALLY good :)
|
||||
if (cp <= rsq) {
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n";
|
||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
//Single prec initialization
|
||||
FieldF r_f(SinglePrecGrid);
|
||||
r_f.checkerboard = r.checkerboard;
|
||||
precisionChange(r_f, r);
|
||||
|
||||
FieldF psi_f(r_f);
|
||||
psi_f = zero;
|
||||
|
||||
FieldF p_f(r_f);
|
||||
FieldF mmp_f(r_f);
|
||||
|
||||
RealD MaxResidSinceLastRelUp = cp; //initial residual
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(4)
|
||||
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k = 0;
|
||||
int l = 0;
|
||||
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
c = cp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop_f_use->HermOpAndNorm(p_f, mmp_f, d, qq);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
a = c / d;
|
||||
b_pred = a * (a * qq - d) / c;
|
||||
|
||||
cp = axpy_norm(r_f, -a, mmp_f, r_f);
|
||||
b = cp / c;
|
||||
|
||||
// Fuse these loops ; should be really easy
|
||||
psi_f = a * p_f + psi_f;
|
||||
//p_f = p_f * b + r_f;
|
||||
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
|
||||
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
|
||||
|
||||
if(cp > MaxResidSinceLastRelUp){
|
||||
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl;
|
||||
MaxResidSinceLastRelUp = cp;
|
||||
}
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
//Although not written in the paper, I assume that I have to add on the final solution
|
||||
precisionChange(mmp, psi_f);
|
||||
psi = psi + mmp;
|
||||
|
||||
|
||||
SolverTimer.Stop();
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||
p = mmp - src;
|
||||
|
||||
RealD srcnorm = sqrt(norm2(src));
|
||||
RealD resnorm = sqrt(norm2(p));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl;
|
||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
ReliableUpdatesPerformed = l;
|
||||
|
||||
if(DoFinalCleanup){
|
||||
//Do a final CG to cleanup
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate performing final cleanup.\n";
|
||||
ConjugateGradient<FieldD> CG(Tolerance,MaxIterations);
|
||||
CG.ErrorOnNoConverge = ErrorOnNoConverge;
|
||||
CG(Linop_d,src,psi);
|
||||
IterationsToCleanup = CG.IterationsToComplete;
|
||||
}
|
||||
else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n";
|
||||
return;
|
||||
}
|
||||
else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate "
|
||||
<< cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n";
|
||||
precisionChange(mmp, psi_f);
|
||||
psi = psi + mmp;
|
||||
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||
r = src - mmp;
|
||||
|
||||
psi_f = zero;
|
||||
precisionChange(r_f, r);
|
||||
cp = norm2(r);
|
||||
MaxResidSinceLastRelUp = cp;
|
||||
|
||||
b = cp/c;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl;
|
||||
|
||||
l = l+1;
|
||||
}
|
||||
|
||||
p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence
|
||||
|
||||
if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl;
|
||||
Linop_f_use = Linop_fallback;
|
||||
using_fallback = true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge"
|
||||
<< std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
ReliableUpdatesPerformed = l;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -1,81 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/EigenSort.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_EIGENSORT_H
|
||||
#define GRID_EIGENSORT_H
|
||||
|
||||
|
||||
namespace Grid {
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Eigen sorter to begin with
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class SortEigen {
|
||||
private:
|
||||
|
||||
//hacking for testing for now
|
||||
private:
|
||||
static bool less_lmd(RealD left,RealD right){
|
||||
return left > right;
|
||||
}
|
||||
static bool less_pair(std::pair<RealD,Field const*>& left,
|
||||
std::pair<RealD,Field const*>& right){
|
||||
return left.first > (right.first);
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
|
||||
void push(DenseVector<RealD>& lmd,
|
||||
DenseVector<Field>& evec,int N) {
|
||||
DenseVector<Field> cpy(lmd.size(),evec[0]._grid);
|
||||
for(int i=0;i<lmd.size();i++) cpy[i] = evec[i];
|
||||
|
||||
DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());
|
||||
for(int i=0;i<lmd.size();++i)
|
||||
emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]);
|
||||
|
||||
partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
|
||||
|
||||
typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin();
|
||||
for(int i=0;i<N;++i){
|
||||
lmd[i]=it->first;
|
||||
evec[i]=*(it->second);
|
||||
++it;
|
||||
}
|
||||
}
|
||||
void push(DenseVector<RealD>& lmd,int N) {
|
||||
std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd);
|
||||
}
|
||||
bool saturated(RealD lmd, RealD thrs) {
|
||||
return fabs(lmd) > fabs(thrs);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
1222
lib/algorithms/iterative/ImplicitlyRestartedLanczosCJ.h
Normal file
1222
lib/algorithms/iterative/ImplicitlyRestartedLanczosCJ.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -7,6 +7,7 @@
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -53,16 +54,194 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
* M psi = eta
|
||||
***********************
|
||||
*Odd
|
||||
* i) (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
|
||||
* i) D_oo psi_o = L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Wilson:
|
||||
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
|
||||
* Stag:
|
||||
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* L^-1 eta_o= (1 0 ) (e
|
||||
* (-MoeMee^{-1} 1 )
|
||||
*
|
||||
*Even
|
||||
* ii) Mee psi_e + Meo psi_o = src_e
|
||||
*
|
||||
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
*
|
||||
*
|
||||
* TODO: Other options:
|
||||
*
|
||||
* a) change checkerboards for Schur e<->o
|
||||
*
|
||||
* Left precon by Moo^-1
|
||||
* b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Right precon by Moo^-1
|
||||
* c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o
|
||||
* eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
* psi_o = M_oo^-1 phi_o
|
||||
* TODO: Deflation
|
||||
*/
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field> class SchurRedBlackStaggeredSolve {
|
||||
private:
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=0;
|
||||
};
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field src_e(grid);
|
||||
Field src_o(grid);
|
||||
Field sol_e(grid);
|
||||
Field sol_o(grid);
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
Field resid(fgrid);
|
||||
|
||||
pickCheckerboard(Even,src_e,in);
|
||||
pickCheckerboard(Odd ,src_o,in);
|
||||
pickCheckerboard(Even,sol_e,out);
|
||||
pickCheckerboard(Odd ,sol_o,out);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
src_o = tmp; assert(src_o.checkerboard ==Odd);
|
||||
// _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
|
||||
// Verify the unprec residual
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
}
|
||||
};
|
||||
// template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
|
||||
template<class Field> class SchurRedBlackStagSolve {
|
||||
private:
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackStagSolve(OperatorFunction<Field> &HermitianRBSolver, int cb) :
|
||||
_HermitianRBSolver(HermitianRBSolver), CBfactorise(cb) {}
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurStagOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
int Schur = CBfactorise;
|
||||
int Other = 1 - CBfactorise;
|
||||
|
||||
Field src_e(grid);
|
||||
Field src_o(grid);
|
||||
Field sol_e(grid);
|
||||
Field sol_o(grid);
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
Field resid(fgrid);
|
||||
|
||||
pickCheckerboard(Other,src_e,in);
|
||||
pickCheckerboard(Schur ,src_o,in);
|
||||
pickCheckerboard(Other,sol_e,out);
|
||||
pickCheckerboard(Schur ,sol_o,out);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Other);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Schur);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Schur);
|
||||
|
||||
#if 0
|
||||
// get the right MpcDag
|
||||
// _HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Schur);
|
||||
#else
|
||||
_Matrix.Mooee(tmp,src_o); assert(src_o.checkerboard ==Schur);
|
||||
#endif
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Schur);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Other);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Other);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Other);
|
||||
|
||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Other);
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Schur );
|
||||
|
||||
// Verify the unprec residual
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStag solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
@ -76,12 +255,10 @@ namespace Grid {
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=0;
|
||||
};
|
||||
|
||||
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0) : _HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=cb;
|
||||
};
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
|
||||
@ -141,5 +318,238 @@ namespace Grid {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagTwoSolve {
|
||||
private:
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=0;
|
||||
};
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field src_e(grid);
|
||||
Field src_o(grid);
|
||||
Field sol_e(grid);
|
||||
Field sol_o(grid);
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
Field resid(fgrid);
|
||||
|
||||
pickCheckerboard(Even,src_e,in);
|
||||
pickCheckerboard(Odd ,src_o,in);
|
||||
pickCheckerboard(Even,sol_e,out);
|
||||
pickCheckerboard(Odd ,sol_o,out);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
|
||||
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
|
||||
// Verify the unprec residual
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
}
|
||||
};
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagTwoMixed {
|
||||
private:
|
||||
LinearFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=0;
|
||||
};
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field src_e(grid);
|
||||
Field src_o(grid);
|
||||
Field sol_e(grid);
|
||||
Field sol_o(grid);
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
Field resid(fgrid);
|
||||
|
||||
pickCheckerboard(Even,src_e,in);
|
||||
pickCheckerboard(Odd ,src_o,in);
|
||||
pickCheckerboard(Even,sol_e,out);
|
||||
pickCheckerboard(Odd ,sol_o,out);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
// _HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
|
||||
_HermitianRBSolver(src_o,tmp); assert(tmp.checkerboard==Odd);
|
||||
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
|
||||
// Verify the unprec residual
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field> class SchurRedBlackStagMixed {
|
||||
private:
|
||||
LinearFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackStagMixed(LinearFunction<Field> &HermitianRBSolver, int cb) :
|
||||
_HermitianRBSolver(HermitianRBSolver), CBfactorise(cb) {}
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurStagOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
int Schur = CBfactorise;
|
||||
int Other = 1 - CBfactorise;
|
||||
|
||||
Field src_e(grid);
|
||||
Field src_o(grid);
|
||||
Field sol_e(grid);
|
||||
Field sol_o(grid);
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
Field resid(fgrid);
|
||||
|
||||
pickCheckerboard(Other,src_e,in);
|
||||
pickCheckerboard(Schur ,src_o,in);
|
||||
pickCheckerboard(Other,sol_e,out);
|
||||
pickCheckerboard(Schur ,sol_o,out);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Other);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Schur);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Schur);
|
||||
|
||||
_Matrix.Mooee(tmp,src_o); assert(src_o.checkerboard ==Schur);
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Schur);
|
||||
_HermitianRBSolver(src_o,sol_o); assert(sol_o.checkerboard==Other);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Other);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Other);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Other);
|
||||
|
||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Other);
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Schur );
|
||||
|
||||
// Verify the unprec residual
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStag solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
933
lib/algorithms/iterative/SimpleLanczos.h
Normal file
933
lib/algorithms/iterative/SimpleLanczos.h
Normal file
@ -0,0 +1,933 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LANC_H
|
||||
#define GRID_LANC_H
|
||||
|
||||
#include <string.h> //memset
|
||||
|
||||
#ifdef USE_LAPACK
|
||||
#ifdef USE_MKL
|
||||
#include<mkl_lapack.h>
|
||||
#else
|
||||
void LAPACK_dstegr (char *jobz, char *range, int *n, double *d, double *e,
|
||||
double *vl, double *vu, int *il, int *iu, double *abstol,
|
||||
int *m, double *w, double *z, int *ldz, int *isuppz,
|
||||
double *work, int *lwork, int *iwork, int *liwork,
|
||||
int *info);
|
||||
//#include <lapacke/lapacke.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <Grid/algorithms/densematrix/DenseMatrix.h>
|
||||
//#include <Grid/algorithms/iterative/EigenSort.h>
|
||||
|
||||
// eliminate temorary vector in calc()
|
||||
#define MEM_SAVE
|
||||
|
||||
namespace Grid
|
||||
{
|
||||
|
||||
struct Bisection
|
||||
{
|
||||
|
||||
#if 0
|
||||
static void get_eig2 (int row_num, std::vector < RealD > &ALPHA,
|
||||
std::vector < RealD > &BETA,
|
||||
std::vector < RealD > &eig)
|
||||
{
|
||||
int i, j;
|
||||
std::vector < RealD > evec1 (row_num + 3);
|
||||
std::vector < RealD > evec2 (row_num + 3);
|
||||
RealD eps2;
|
||||
ALPHA[1] = 0.;
|
||||
BETHA[1] = 0.;
|
||||
for (i = 0; i < row_num - 1; i++)
|
||||
{
|
||||
ALPHA[i + 1] = A[i * (row_num + 1)].real ();
|
||||
BETHA[i + 2] = A[i * (row_num + 1) + 1].real ();
|
||||
}
|
||||
ALPHA[row_num] = A[(row_num - 1) * (row_num + 1)].real ();
|
||||
bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-10, 1e-10, evec1, eps2);
|
||||
bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-16, 1e-16, evec2, eps2);
|
||||
|
||||
// Do we really need to sort here?
|
||||
int begin = 1;
|
||||
int end = row_num;
|
||||
int swapped = 1;
|
||||
while (swapped)
|
||||
{
|
||||
swapped = 0;
|
||||
for (i = begin; i < end; i++)
|
||||
{
|
||||
if (mag (evec2[i]) > mag (evec2[i + 1]))
|
||||
{
|
||||
swap (evec2 + i, evec2 + i + 1);
|
||||
swapped = 1;
|
||||
}
|
||||
}
|
||||
end--;
|
||||
for (i = end - 1; i >= begin; i--)
|
||||
{
|
||||
if (mag (evec2[i]) > mag (evec2[i + 1]))
|
||||
{
|
||||
swap (evec2 + i, evec2 + i + 1);
|
||||
swapped = 1;
|
||||
}
|
||||
}
|
||||
begin++;
|
||||
}
|
||||
|
||||
for (i = 0; i < row_num; i++)
|
||||
{
|
||||
for (j = 0; j < row_num; j++)
|
||||
{
|
||||
if (i == j)
|
||||
H[i * row_num + j] = evec2[i + 1];
|
||||
else
|
||||
H[i * row_num + j] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void bisec (std::vector < RealD > &c,
|
||||
std::vector < RealD > &b,
|
||||
int n,
|
||||
int m1,
|
||||
int m2,
|
||||
RealD eps1,
|
||||
RealD relfeh, std::vector < RealD > &x, RealD & eps2)
|
||||
{
|
||||
std::vector < RealD > wu (n + 2);
|
||||
|
||||
RealD h, q, x1, xu, x0, xmin, xmax;
|
||||
int i, a, k;
|
||||
|
||||
b[1] = 0.0;
|
||||
xmin = c[n] - fabs (b[n]);
|
||||
xmax = c[n] + fabs (b[n]);
|
||||
for (i = 1; i < n; i++)
|
||||
{
|
||||
h = fabs (b[i]) + fabs (b[i + 1]);
|
||||
if (c[i] + h > xmax)
|
||||
xmax = c[i] + h;
|
||||
if (c[i] - h < xmin)
|
||||
xmin = c[i] - h;
|
||||
}
|
||||
xmax *= 2.;
|
||||
|
||||
eps2 = relfeh * ((xmin + xmax) > 0.0 ? xmax : -xmin);
|
||||
if (eps1 <= 0.0)
|
||||
eps1 = eps2;
|
||||
eps2 = 0.5 * eps1 + 7.0 * (eps2);
|
||||
x0 = xmax;
|
||||
for (i = m1; i <= m2; i++)
|
||||
{
|
||||
x[i] = xmax;
|
||||
wu[i] = xmin;
|
||||
}
|
||||
|
||||
for (k = m2; k >= m1; k--)
|
||||
{
|
||||
xu = xmin;
|
||||
i = k;
|
||||
do
|
||||
{
|
||||
if (xu < wu[i])
|
||||
{
|
||||
xu = wu[i];
|
||||
i = m1 - 1;
|
||||
}
|
||||
i--;
|
||||
}
|
||||
while (i >= m1);
|
||||
if (x0 > x[k])
|
||||
x0 = x[k];
|
||||
while ((x0 - xu) > 2 * relfeh * (fabs (xu) + fabs (x0)) + eps1)
|
||||
{
|
||||
x1 = (xu + x0) / 2;
|
||||
|
||||
a = 0;
|
||||
q = 1.0;
|
||||
for (i = 1; i <= n; i++)
|
||||
{
|
||||
q =
|
||||
c[i] - x1 -
|
||||
((q != 0.0) ? b[i] * b[i] / q : fabs (b[i]) / relfeh);
|
||||
if (q < 0)
|
||||
a++;
|
||||
}
|
||||
// printf("x1=%0.14e a=%d\n",x1,a);
|
||||
if (a < k)
|
||||
{
|
||||
if (a < m1)
|
||||
{
|
||||
xu = x1;
|
||||
wu[m1] = x1;
|
||||
}
|
||||
else
|
||||
{
|
||||
xu = x1;
|
||||
wu[a + 1] = x1;
|
||||
if (x[a] > x1)
|
||||
x[a] = x1;
|
||||
}
|
||||
}
|
||||
else
|
||||
x0 = x1;
|
||||
}
|
||||
printf ("x0=%0.14e xu=%0.14e k=%d\n", x0, xu, k);
|
||||
x[k] = (x0 + xu) / 2;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Implicitly restarted lanczos
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
template < class Field > class SimpleLanczos
|
||||
{
|
||||
|
||||
const RealD small = 1.0e-16;
|
||||
public:
|
||||
int lock;
|
||||
int get;
|
||||
int Niter;
|
||||
int converged;
|
||||
|
||||
int Nstop; // Number of evecs checked for convergence
|
||||
int Nk; // Number of converged sought
|
||||
int Np; // Np -- Number of spare vecs in kryloc space
|
||||
int Nm; // Nm -- total number of vectors
|
||||
|
||||
|
||||
RealD OrthoTime;
|
||||
|
||||
RealD eresid;
|
||||
|
||||
SortEigen < Field > _sort;
|
||||
|
||||
LinearOperatorBase < Field > &_Linop;
|
||||
|
||||
OperatorFunction < Field > &_poly;
|
||||
|
||||
/////////////////////////
|
||||
// Constructor
|
||||
/////////////////////////
|
||||
void init (void)
|
||||
{
|
||||
};
|
||||
void Abort (int ff, DenseVector < RealD > &evals,
|
||||
DenseVector < DenseVector < RealD > >&evecs);
|
||||
|
||||
SimpleLanczos (LinearOperatorBase < Field > &Linop, // op
|
||||
OperatorFunction < Field > &poly, // polynmial
|
||||
int _Nstop, // sought vecs
|
||||
int _Nk, // sought vecs
|
||||
int _Nm, // spare vecs
|
||||
RealD _eresid, // resid in lmdue deficit
|
||||
int _Niter): // Max iterations
|
||||
|
||||
_Linop (Linop),
|
||||
_poly (poly),
|
||||
Nstop (_Nstop), Nk (_Nk), Nm (_Nm), eresid (_eresid), Niter (_Niter)
|
||||
{
|
||||
Np = Nm - Nk;
|
||||
assert (Np > 0);
|
||||
};
|
||||
|
||||
/////////////////////////
|
||||
// Sanity checked this routine (step) against Saad.
|
||||
/////////////////////////
|
||||
void RitzMatrix (DenseVector < Field > &evec, int k)
|
||||
{
|
||||
|
||||
if (1)
|
||||
return;
|
||||
|
||||
GridBase *grid = evec[0]._grid;
|
||||
Field w (grid);
|
||||
std::cout << GridLogMessage << "RitzMatrix " << std::endl;
|
||||
for (int i = 0; i < k; i++)
|
||||
{
|
||||
_Linop.HermOp (evec[i], w);
|
||||
// _poly(_Linop,evec[i],w);
|
||||
std::cout << GridLogMessage << "[" << i << "] ";
|
||||
for (int j = 0; j < k; j++)
|
||||
{
|
||||
ComplexD in = innerProduct (evec[j], w);
|
||||
if (fabs ((double) i - j) > 1)
|
||||
{
|
||||
if (abs (in) > 1.0e-9)
|
||||
{
|
||||
std::cout << GridLogMessage << "oops" << std::endl;
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
std::cout << GridLogMessage << " 0 ";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << GridLogMessage << " " << in << " ";
|
||||
}
|
||||
}
|
||||
std::cout << GridLogMessage << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void step (DenseVector < RealD > &lmd,
|
||||
DenseVector < RealD > &lme,
|
||||
Field & last, Field & current, Field & next, uint64_t k)
|
||||
{
|
||||
if (lmd.size () <= k)
|
||||
lmd.resize (k + Nm);
|
||||
if (lme.size () <= k)
|
||||
lme.resize (k + Nm);
|
||||
|
||||
|
||||
// _poly(_Linop,current,next ); // 3. wk:=Avk−βkv_{k−1}
|
||||
_Linop.HermOp (current, next); // 3. wk:=Avk−βkv_{k−1}
|
||||
if (k > 0)
|
||||
{
|
||||
next -= lme[k - 1] * last;
|
||||
}
|
||||
// std::cout<<GridLogMessage << "<last|next>" << innerProduct(last,next) <<std::endl;
|
||||
|
||||
ComplexD zalph = innerProduct (current, next); // 4. αk:=(wk,vk)
|
||||
RealD alph = real (zalph);
|
||||
|
||||
next = next - alph * current; // 5. wk:=wk−αkvk
|
||||
// std::cout<<GridLogMessage << "<current|next>" << innerProduct(current,next) <<std::endl;
|
||||
|
||||
RealD beta = normalise (next); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||
// 7. vk+1 := wk/βk+1
|
||||
// norm=beta;
|
||||
|
||||
int interval = Nm / 100 + 1;
|
||||
if ((k % interval) == 0)
|
||||
std::
|
||||
cout << GridLogMessage << k << " : alpha = " << zalph << " beta " <<
|
||||
beta << std::endl;
|
||||
const RealD tiny = 1.0e-20;
|
||||
if (beta < tiny)
|
||||
{
|
||||
std::cout << GridLogMessage << " beta is tiny " << beta << std::
|
||||
endl;
|
||||
}
|
||||
lmd[k] = alph;
|
||||
lme[k] = beta;
|
||||
|
||||
}
|
||||
|
||||
void qr_decomp (DenseVector < RealD > &lmd,
|
||||
DenseVector < RealD > &lme,
|
||||
int Nk,
|
||||
int Nm,
|
||||
DenseVector < RealD > &Qt, RealD Dsh, int kmin, int kmax)
|
||||
{
|
||||
int k = kmin - 1;
|
||||
RealD x;
|
||||
|
||||
RealD Fden = 1.0 / hypot (lmd[k] - Dsh, lme[k]);
|
||||
RealD c = (lmd[k] - Dsh) * Fden;
|
||||
RealD s = -lme[k] * Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k + 1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
|
||||
lmd[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
|
||||
lme[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
|
||||
x = -s * lme[k + 1];
|
||||
lme[k + 1] = c * lme[k + 1];
|
||||
|
||||
for (int i = 0; i < Nk; ++i)
|
||||
{
|
||||
RealD Qtmp1 = Qt[i + Nm * k];
|
||||
RealD Qtmp2 = Qt[i + Nm * (k + 1)];
|
||||
Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
|
||||
Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
|
||||
}
|
||||
|
||||
// Givens transformations
|
||||
for (int k = kmin; k < kmax - 1; ++k)
|
||||
{
|
||||
|
||||
RealD Fden = 1.0 / hypot (x, lme[k - 1]);
|
||||
RealD c = lme[k - 1] * Fden;
|
||||
RealD s = -x * Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k + 1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
|
||||
lmd[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
|
||||
lme[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
|
||||
lme[k - 1] = c * lme[k - 1] - s * x;
|
||||
|
||||
if (k != kmax - 2)
|
||||
{
|
||||
x = -s * lme[k + 1];
|
||||
lme[k + 1] = c * lme[k + 1];
|
||||
}
|
||||
|
||||
for (int i = 0; i < Nk; ++i)
|
||||
{
|
||||
RealD Qtmp1 = Qt[i + Nm * k];
|
||||
RealD Qtmp2 = Qt[i + Nm * (k + 1)];
|
||||
Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
|
||||
Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_LAPACK
|
||||
#ifdef USE_MKL
|
||||
#define LAPACK_INT MKL_INT
|
||||
#else
|
||||
#define LAPACK_INT long long
|
||||
#endif
|
||||
void diagonalize_lapack (DenseVector < RealD > &lmd, DenseVector < RealD > &lme, int N1, // all
|
||||
int N2, // get
|
||||
GridBase * grid)
|
||||
{
|
||||
const int size = Nm;
|
||||
LAPACK_INT NN = N1;
|
||||
double evals_tmp[NN];
|
||||
double DD[NN];
|
||||
double EE[NN];
|
||||
for (int i = 0; i < NN; i++)
|
||||
for (int j = i - 1; j <= i + 1; j++)
|
||||
if (j < NN && j >= 0)
|
||||
{
|
||||
if (i == j)
|
||||
DD[i] = lmd[i];
|
||||
if (i == j)
|
||||
evals_tmp[i] = lmd[i];
|
||||
if (j == (i - 1))
|
||||
EE[j] = lme[j];
|
||||
}
|
||||
LAPACK_INT evals_found;
|
||||
LAPACK_INT lwork =
|
||||
((18 * NN) >
|
||||
(1 + 4 * NN + NN * NN) ? (18 * NN) : (1 + 4 * NN + NN * NN));
|
||||
LAPACK_INT liwork = 3 + NN * 10;
|
||||
LAPACK_INT iwork[liwork];
|
||||
double work[lwork];
|
||||
LAPACK_INT isuppz[2 * NN];
|
||||
char jobz = 'N'; // calculate evals only
|
||||
char range = 'I'; // calculate il-th to iu-th evals
|
||||
// char range = 'A'; // calculate all evals
|
||||
char uplo = 'U'; // refer to upper half of original matrix
|
||||
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
|
||||
int ifail[NN];
|
||||
LAPACK_INT info;
|
||||
// int total = QMP_get_number_of_nodes();
|
||||
// int node = QMP_get_node_number();
|
||||
// GridBase *grid = evec[0]._grid;
|
||||
int total = grid->_Nprocessors;
|
||||
int node = grid->_processor;
|
||||
int interval = (NN / total) + 1;
|
||||
double vl = 0.0, vu = 0.0;
|
||||
LAPACK_INT il = interval * node + 1, iu = interval * (node + 1);
|
||||
if (iu > NN)
|
||||
iu = NN;
|
||||
double tol = 0.0;
|
||||
if (1)
|
||||
{
|
||||
memset (evals_tmp, 0, sizeof (double) * NN);
|
||||
if (il <= NN)
|
||||
{
|
||||
printf ("total=%d node=%d il=%d iu=%d\n", total, node, il, iu);
|
||||
#ifdef USE_MKL
|
||||
dstegr (&jobz, &range, &NN,
|
||||
#else
|
||||
LAPACK_dstegr (&jobz, &range, &NN,
|
||||
#endif
|
||||
(double *) DD, (double *) EE, &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
|
||||
&tol, // tolerance
|
||||
&evals_found, evals_tmp, (double *) NULL, &NN,
|
||||
isuppz, work, &lwork, iwork, &liwork, &info);
|
||||
for (int i = iu - 1; i >= il - 1; i--)
|
||||
{
|
||||
printf ("node=%d evals_found=%d evals_tmp[%d] = %g\n", node,
|
||||
evals_found, i - (il - 1), evals_tmp[i - (il - 1)]);
|
||||
evals_tmp[i] = evals_tmp[i - (il - 1)];
|
||||
if (il > 1)
|
||||
evals_tmp[i - (il - 1)] = 0.;
|
||||
}
|
||||
}
|
||||
{
|
||||
grid->GlobalSumVector (evals_tmp, NN);
|
||||
}
|
||||
}
|
||||
// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
|
||||
}
|
||||
#undef LAPACK_INT
|
||||
#endif
|
||||
|
||||
|
||||
void diagonalize (DenseVector < RealD > &lmd,
|
||||
DenseVector < RealD > &lme,
|
||||
int N2, int N1, GridBase * grid)
|
||||
{
|
||||
|
||||
#ifdef USE_LAPACK
|
||||
const int check_lapack = 0; // just use lapack if 0, check against lapack if 1
|
||||
|
||||
if (!check_lapack)
|
||||
return diagonalize_lapack (lmd, lme, N2, N1, grid);
|
||||
|
||||
// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if 1
|
||||
static RealD normalise (Field & v)
|
||||
{
|
||||
RealD nn = norm2 (v);
|
||||
nn = sqrt (nn);
|
||||
v = v * (1.0 / nn);
|
||||
return nn;
|
||||
}
|
||||
|
||||
void orthogonalize (Field & w, DenseVector < Field > &evec, int k)
|
||||
{
|
||||
double t0 = -usecond () / 1e6;
|
||||
typedef typename Field::scalar_type MyComplex;
|
||||
MyComplex ip;
|
||||
|
||||
if (0)
|
||||
{
|
||||
for (int j = 0; j < k; ++j)
|
||||
{
|
||||
normalise (evec[j]);
|
||||
for (int i = 0; i < j; i++)
|
||||
{
|
||||
ip = innerProduct (evec[i], evec[j]); // are the evecs normalised? ; this assumes so.
|
||||
evec[j] = evec[j] - ip * evec[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < k; ++j)
|
||||
{
|
||||
ip = innerProduct (evec[j], w); // are the evecs normalised? ; this assumes so.
|
||||
w = w - ip * evec[j];
|
||||
}
|
||||
normalise (w);
|
||||
t0 += usecond () / 1e6;
|
||||
OrthoTime += t0;
|
||||
}
|
||||
|
||||
void setUnit_Qt (int Nm, DenseVector < RealD > &Qt)
|
||||
{
|
||||
for (int i = 0; i < Qt.size (); ++i)
|
||||
Qt[i] = 0.0;
|
||||
for (int k = 0; k < Nm; ++k)
|
||||
Qt[k + k * Nm] = 1.0;
|
||||
}
|
||||
|
||||
|
||||
void calc (DenseVector < RealD > &eval, const Field & src, int &Nconv)
|
||||
{
|
||||
|
||||
GridBase *grid = src._grid;
|
||||
// assert(grid == src._grid);
|
||||
|
||||
std::
|
||||
cout << GridLogMessage << " -- Nk = " << Nk << " Np = " << Np << std::
|
||||
endl;
|
||||
std::cout << GridLogMessage << " -- Nm = " << Nm << std::endl;
|
||||
std::cout << GridLogMessage << " -- size of eval = " << eval.
|
||||
size () << std::endl;
|
||||
|
||||
// assert(c.size() && Nm == eval.size());
|
||||
|
||||
DenseVector < RealD > lme (Nm);
|
||||
DenseVector < RealD > lmd (Nm);
|
||||
|
||||
|
||||
Field current (grid);
|
||||
Field last (grid);
|
||||
Field next (grid);
|
||||
|
||||
Nconv = 0;
|
||||
|
||||
RealD beta_k;
|
||||
|
||||
// Set initial vector
|
||||
// (uniform vector) Why not src??
|
||||
// evec[0] = 1.0;
|
||||
current = src;
|
||||
std::cout << GridLogMessage << "norm2(src)= " << norm2 (src) << std::
|
||||
endl;
|
||||
normalise (current);
|
||||
std::
|
||||
cout << GridLogMessage << "norm2(evec[0])= " << norm2 (current) <<
|
||||
std::endl;
|
||||
|
||||
// Initial Nk steps
|
||||
OrthoTime = 0.;
|
||||
double t0 = usecond () / 1e6;
|
||||
RealD norm; // sqrt norm of last vector
|
||||
|
||||
uint64_t iter = 0;
|
||||
|
||||
bool initted = false;
|
||||
std::vector < RealD > low (Nstop * 10);
|
||||
std::vector < RealD > high (Nstop * 10);
|
||||
RealD cont = 0.;
|
||||
while (1) {
|
||||
cont = 0.;
|
||||
std::vector < RealD > lme2 (Nm);
|
||||
std::vector < RealD > lmd2 (Nm);
|
||||
for (uint64_t k = 0; k < Nm; ++k, iter++) {
|
||||
step (lmd, lme, last, current, next, iter);
|
||||
last = current;
|
||||
current = next;
|
||||
}
|
||||
double t1 = usecond () / 1e6;
|
||||
std::cout << GridLogMessage << "IRL::Initial steps: " << t1 -
|
||||
t0 << "seconds" << std::endl;
|
||||
t0 = t1;
|
||||
std::
|
||||
cout << GridLogMessage << "IRL::Initial steps:OrthoTime " <<
|
||||
OrthoTime << "seconds" << std::endl;
|
||||
|
||||
// getting eigenvalues
|
||||
lmd2.resize (iter + 2);
|
||||
lme2.resize (iter + 2);
|
||||
for (uint64_t k = 0; k < iter; ++k) {
|
||||
lmd2[k + 1] = lmd[k];
|
||||
lme2[k + 2] = lme[k];
|
||||
}
|
||||
t1 = usecond () / 1e6;
|
||||
std::cout << GridLogMessage << "IRL:: copy: " << t1 -
|
||||
t0 << "seconds" << std::endl;
|
||||
t0 = t1;
|
||||
{
|
||||
int total = grid->_Nprocessors;
|
||||
int node = grid->_processor;
|
||||
int interval = (Nstop / total) + 1;
|
||||
int iu = (iter + 1) - (interval * node + 1);
|
||||
int il = (iter + 1) - (interval * (node + 1));
|
||||
std::vector < RealD > eval2 (iter + 3);
|
||||
RealD eps2;
|
||||
Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2,
|
||||
eps2);
|
||||
// diagonalize(eval2,lme2,iter,Nk,grid);
|
||||
RealD diff = 0.;
|
||||
for (int i = il; i <= iu; i++) {
|
||||
if (initted)
|
||||
diff =
|
||||
fabs (eval2[i] - high[iu-i]) / (fabs (eval2[i]) +
|
||||
fabs (high[iu-i]));
|
||||
if (initted && (diff > eresid))
|
||||
cont = 1.;
|
||||
if (initted)
|
||||
printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i],
|
||||
high[iu-i], diff);
|
||||
high[iu-i] = eval2[i];
|
||||
}
|
||||
il = (interval * node + 1);
|
||||
iu = (interval * (node + 1));
|
||||
Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2,
|
||||
eps2);
|
||||
for (int i = il; i <= iu; i++) {
|
||||
if (initted)
|
||||
diff =
|
||||
fabs (eval2[i] - low[i]) / (fabs (eval2[i]) +
|
||||
fabs (low[i]));
|
||||
if (initted && (diff > eresid))
|
||||
cont = 1.;
|
||||
if (initted)
|
||||
printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i],
|
||||
low[i], diff);
|
||||
low[i] = eval2[i];
|
||||
}
|
||||
t1 = usecond () / 1e6;
|
||||
std::cout << GridLogMessage << "IRL:: diagonalize: " << t1 -
|
||||
t0 << "seconds" << std::endl;
|
||||
t0 = t1;
|
||||
}
|
||||
|
||||
for (uint64_t k = 0; k < Nk; ++k) {
|
||||
// eval[k] = eval2[k];
|
||||
}
|
||||
if (initted)
|
||||
{
|
||||
grid->GlobalSumVector (&cont, 1);
|
||||
if (cont < 1.) return;
|
||||
}
|
||||
initted = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
There is some matrix Q such that for any vector y
|
||||
Q.e_1 = y and Q is unitary.
|
||||
**/
|
||||
template < class T >
|
||||
static T orthQ (DenseMatrix < T > &Q, DenseVector < T > y)
|
||||
{
|
||||
int N = y.size (); //Matrix Size
|
||||
Fill (Q, 0.0);
|
||||
T tau;
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
Q[i][0] = y[i];
|
||||
}
|
||||
T sig = conj (y[0]) * y[0];
|
||||
T tau0 = fabs (sqrt (sig));
|
||||
|
||||
for (int j = 1; j < N; j++)
|
||||
{
|
||||
sig += conj (y[j]) * y[j];
|
||||
tau = abs (sqrt (sig));
|
||||
|
||||
if (abs (tau0) > 0.0)
|
||||
{
|
||||
|
||||
T gam = conj ((y[j] / tau) / tau0);
|
||||
for (int k = 0; k <= j - 1; k++)
|
||||
{
|
||||
Q[k][j] = -gam * y[k];
|
||||
}
|
||||
Q[j][j] = tau0 / tau;
|
||||
}
|
||||
else
|
||||
{
|
||||
Q[j - 1][j] = 1.0;
|
||||
}
|
||||
tau0 = tau;
|
||||
}
|
||||
return tau;
|
||||
}
|
||||
|
||||
/**
|
||||
There is some matrix Q such that for any vector y
|
||||
Q.e_k = y and Q is unitary.
|
||||
**/
|
||||
template < class T >
|
||||
static T orthU (DenseMatrix < T > &Q, DenseVector < T > y)
|
||||
{
|
||||
T tau = orthQ (Q, y);
|
||||
SL (Q);
|
||||
return tau;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Wind up with a matrix with the first con rows untouched
|
||||
|
||||
say con = 2
|
||||
Q is such that Qdag H Q has {x, x, val, 0, 0, 0, 0, ...} as 1st colum
|
||||
and the matrix is upper hessenberg
|
||||
and with f and Q appropriately modidied with Q is the arnoldi factorization
|
||||
|
||||
**/
|
||||
|
||||
template < class T > static void Lock (DenseMatrix < T > &H, ///Hess mtx
|
||||
DenseMatrix < T > &Q, ///Lock Transform
|
||||
T val, ///value to be locked
|
||||
int con, ///number already locked
|
||||
RealD small, int dfg, bool herm)
|
||||
{
|
||||
//ForceTridiagonal(H);
|
||||
|
||||
int M = H.dim;
|
||||
DenseVector < T > vec;
|
||||
Resize (vec, M - con);
|
||||
|
||||
DenseMatrix < T > AH;
|
||||
Resize (AH, M - con, M - con);
|
||||
AH = GetSubMtx (H, con, M, con, M);
|
||||
|
||||
DenseMatrix < T > QQ;
|
||||
Resize (QQ, M - con, M - con);
|
||||
|
||||
Unity (Q);
|
||||
Unity (QQ);
|
||||
|
||||
DenseVector < T > evals;
|
||||
Resize (evals, M - con);
|
||||
DenseMatrix < T > evecs;
|
||||
Resize (evecs, M - con, M - con);
|
||||
|
||||
Wilkinson < T > (AH, evals, evecs, small);
|
||||
|
||||
int k = 0;
|
||||
RealD cold = abs (val - evals[k]);
|
||||
for (int i = 1; i < M - con; i++)
|
||||
{
|
||||
RealD cnew = abs (val - evals[i]);
|
||||
if (cnew < cold)
|
||||
{
|
||||
k = i;
|
||||
cold = cnew;
|
||||
}
|
||||
}
|
||||
vec = evecs[k];
|
||||
|
||||
ComplexD tau;
|
||||
orthQ (QQ, vec);
|
||||
//orthQM(QQ,AH,vec);
|
||||
|
||||
AH = Hermitian (QQ) * AH;
|
||||
AH = AH * QQ;
|
||||
|
||||
for (int i = con; i < M; i++)
|
||||
{
|
||||
for (int j = con; j < M; j++)
|
||||
{
|
||||
Q[i][j] = QQ[i - con][j - con];
|
||||
H[i][j] = AH[i - con][j - con];
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = M - 1; j > con + 2; j--)
|
||||
{
|
||||
|
||||
DenseMatrix < T > U;
|
||||
Resize (U, j - 1 - con, j - 1 - con);
|
||||
DenseVector < T > z;
|
||||
Resize (z, j - 1 - con);
|
||||
T nm = norm (z);
|
||||
for (int k = con + 0; k < j - 1; k++)
|
||||
{
|
||||
z[k - con] = conj (H (j, k + 1));
|
||||
}
|
||||
normalise (z);
|
||||
|
||||
RealD tmp = 0;
|
||||
for (int i = 0; i < z.size () - 1; i++)
|
||||
{
|
||||
tmp = tmp + abs (z[i]);
|
||||
}
|
||||
|
||||
if (tmp < small / ((RealD) z.size () - 1.0))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
tau = orthU (U, z);
|
||||
|
||||
DenseMatrix < T > Hb;
|
||||
Resize (Hb, j - 1 - con, M);
|
||||
|
||||
for (int a = 0; a < M; a++)
|
||||
{
|
||||
for (int b = 0; b < j - 1 - con; b++)
|
||||
{
|
||||
T sum = 0;
|
||||
for (int c = 0; c < j - 1 - con; c++)
|
||||
{
|
||||
sum += H[a][con + 1 + c] * U[c][b];
|
||||
} //sum += H(a,con+1+c)*U(c,b);}
|
||||
Hb[b][a] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = con + 1; k < j; k++)
|
||||
{
|
||||
for (int l = 0; l < M; l++)
|
||||
{
|
||||
H[l][k] = Hb[k - 1 - con][l];
|
||||
}
|
||||
} //H(Hb[k-1-con][l] , l,k);}}
|
||||
|
||||
DenseMatrix < T > Qb;
|
||||
Resize (Qb, M, M);
|
||||
|
||||
for (int a = 0; a < M; a++)
|
||||
{
|
||||
for (int b = 0; b < j - 1 - con; b++)
|
||||
{
|
||||
T sum = 0;
|
||||
for (int c = 0; c < j - 1 - con; c++)
|
||||
{
|
||||
sum += Q[a][con + 1 + c] * U[c][b];
|
||||
} //sum += Q(a,con+1+c)*U(c,b);}
|
||||
Qb[b][a] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = con + 1; k < j; k++)
|
||||
{
|
||||
for (int l = 0; l < M; l++)
|
||||
{
|
||||
Q[l][k] = Qb[k - 1 - con][l];
|
||||
}
|
||||
} //Q(Qb[k-1-con][l] , l,k);}}
|
||||
|
||||
DenseMatrix < T > Hc;
|
||||
Resize (Hc, M, M);
|
||||
|
||||
for (int a = 0; a < j - 1 - con; a++)
|
||||
{
|
||||
for (int b = 0; b < M; b++)
|
||||
{
|
||||
T sum = 0;
|
||||
for (int c = 0; c < j - 1 - con; c++)
|
||||
{
|
||||
sum += conj (U[c][a]) * H[con + 1 + c][b];
|
||||
} //sum += conj( U(c,a) )*H(con+1+c,b);}
|
||||
Hc[b][a] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = 0; k < M; k++)
|
||||
{
|
||||
for (int l = con + 1; l < j; l++)
|
||||
{
|
||||
H[l][k] = Hc[k][l - 1 - con];
|
||||
}
|
||||
} //H(Hc[k][l-1-con] , l,k);}}
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
122
lib/algorithms/iterative/bisec.c
Normal file
122
lib/algorithms/iterative/bisec.c
Normal file
@ -0,0 +1,122 @@
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <vector>
|
||||
|
||||
struct Bisection {
|
||||
|
||||
static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig)
|
||||
{
|
||||
int i,j;
|
||||
std::vector<RealD> evec1(row_num+3);
|
||||
std::vector<RealD> evec2(row_num+3);
|
||||
RealD eps2;
|
||||
ALPHA[1]=0.;
|
||||
BETHA[1]=0.;
|
||||
for(i=0;i<row_num-1;i++) {
|
||||
ALPHA[i+1] = A[i*(row_num+1)].real();
|
||||
BETHA[i+2] = A[i*(row_num+1)+1].real();
|
||||
}
|
||||
ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real();
|
||||
bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2);
|
||||
bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2);
|
||||
|
||||
// Do we really need to sort here?
|
||||
int begin=1;
|
||||
int end = row_num;
|
||||
int swapped=1;
|
||||
while(swapped) {
|
||||
swapped=0;
|
||||
for(i=begin;i<end;i++){
|
||||
if(mag(evec2[i])>mag(evec2[i+1])) {
|
||||
swap(evec2+i,evec2+i+1);
|
||||
swapped=1;
|
||||
}
|
||||
}
|
||||
end--;
|
||||
for(i=end-1;i>=begin;i--){
|
||||
if(mag(evec2[i])>mag(evec2[i+1])) {
|
||||
swap(evec2+i,evec2+i+1);
|
||||
swapped=1;
|
||||
}
|
||||
}
|
||||
begin++;
|
||||
}
|
||||
|
||||
for(i=0;i<row_num;i++){
|
||||
for(j=0;j<row_num;j++) {
|
||||
if(i==j) H[i*row_num+j]=evec2[i+1];
|
||||
else H[i*row_num+j]=0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void bisec(std::vector<RealD> &c,
|
||||
std::vector<RealD> &b,
|
||||
int n,
|
||||
int m1,
|
||||
int m2,
|
||||
RealD eps1,
|
||||
RealD relfeh,
|
||||
std::vector<RealD> &x,
|
||||
RealD &eps2)
|
||||
{
|
||||
std::vector<RealD> wu(n+2);
|
||||
|
||||
RealD h,q,x1,xu,x0,xmin,xmax;
|
||||
int i,a,k;
|
||||
|
||||
b[1]=0.0;
|
||||
xmin=c[n]-fabs(b[n]);
|
||||
xmax=c[n]+fabs(b[n]);
|
||||
for(i=1;i<n;i++){
|
||||
h=fabs(b[i])+fabs(b[i+1]);
|
||||
if(c[i]+h>xmax) xmax= c[i]+h;
|
||||
if(c[i]-h<xmin) xmin= c[i]-h;
|
||||
}
|
||||
xmax *=2.;
|
||||
|
||||
eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin);
|
||||
if(eps1<=0.0) eps1=eps2;
|
||||
eps2=0.5*eps1+7.0*(eps2);
|
||||
x0=xmax;
|
||||
for(i=m1;i<=m2;i++){
|
||||
x[i]=xmax;
|
||||
wu[i]=xmin;
|
||||
}
|
||||
|
||||
for(k=m2;k>=m1;k--){
|
||||
xu=xmin;
|
||||
i=k;
|
||||
do{
|
||||
if(xu<wu[i]){
|
||||
xu=wu[i];
|
||||
i=m1-1;
|
||||
}
|
||||
i--;
|
||||
}while(i>=m1);
|
||||
if(x0>x[k]) x0=x[k];
|
||||
while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){
|
||||
x1=(xu+x0)/2;
|
||||
|
||||
a=0;
|
||||
q=1.0;
|
||||
for(i=1;i<=n;i++){
|
||||
q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh);
|
||||
if(q<0) a++;
|
||||
}
|
||||
// printf("x1=%e a=%d\n",x1,a);
|
||||
if(a<k){
|
||||
if(a<m1){
|
||||
xu=x1;
|
||||
wu[m1]=x1;
|
||||
}else {
|
||||
xu=x1;
|
||||
wu[a+1]=x1;
|
||||
if(x[a]>x1) x[a]=x1;
|
||||
}
|
||||
}else x0=x1;
|
||||
}
|
||||
x[k]=(x0+xu)/2;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,7 +1,5 @@
|
||||
|
||||
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
@ -11,7 +9,7 @@ int PointerCache::victim;
|
||||
|
||||
void *PointerCache::Insert(void *ptr,size_t bytes) {
|
||||
|
||||
if (bytes < 4096 ) return NULL;
|
||||
if (bytes < 4096 ) return ptr;
|
||||
|
||||
#ifdef GRID_OMP
|
||||
assert(omp_in_parallel()==0);
|
||||
@ -63,4 +61,37 @@ void *PointerCache::Lookup(size_t bytes) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void check_huge_pages(void *Buf,uint64_t BYTES)
|
||||
{
|
||||
#ifdef __linux__
|
||||
int fd = open("/proc/self/pagemap", O_RDONLY);
|
||||
assert(fd >= 0);
|
||||
const int page_size = 4096;
|
||||
uint64_t virt_pfn = (uint64_t)Buf / page_size;
|
||||
off_t offset = sizeof(uint64_t) * virt_pfn;
|
||||
uint64_t npages = (BYTES + page_size-1) / page_size;
|
||||
uint64_t pagedata[npages];
|
||||
uint64_t ret = lseek(fd, offset, SEEK_SET);
|
||||
assert(ret == offset);
|
||||
ret = ::read(fd, pagedata, sizeof(uint64_t)*npages);
|
||||
assert(ret == sizeof(uint64_t) * npages);
|
||||
int nhugepages = npages / 512;
|
||||
int n4ktotal, nnothuge;
|
||||
n4ktotal = 0;
|
||||
nnothuge = 0;
|
||||
for (int i = 0; i < nhugepages; ++i) {
|
||||
uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size;
|
||||
for (int j = 0; j < 512; ++j) {
|
||||
uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size;
|
||||
++n4ktotal;
|
||||
if (pageaddr != baseaddr + j * page_size)
|
||||
++nnothuge;
|
||||
}
|
||||
}
|
||||
int rank = CartesianCommunicator::RankWorld();
|
||||
printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -64,6 +64,8 @@ namespace Grid {
|
||||
|
||||
};
|
||||
|
||||
void check_huge_pages(void *Buf,uint64_t BYTES);
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// A lattice of something, but assume the something is SIMDized.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
@ -92,18 +94,34 @@ public:
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
|
||||
_Tp *ptr = (_Tp *) PointerCache::Lookup(bytes);
|
||||
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128);
|
||||
#else
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes);
|
||||
#endif
|
||||
// if ( ptr != NULL )
|
||||
// std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl;
|
||||
|
||||
//////////////////
|
||||
// Hack 2MB align; could make option probably doesn't need configurability
|
||||
//////////////////
|
||||
//define GRID_ALLOC_ALIGN (128)
|
||||
#define GRID_ALLOC_ALIGN (2*1024*1024)
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN);
|
||||
#else
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes);
|
||||
#endif
|
||||
// std::cout << "alignedAllocator " << std::hex << ptr <<std::dec <<std::endl;
|
||||
// First touch optimise in threaded loop
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for(size_type n=0;n<bytes;n+=4096){
|
||||
cp[n]=0;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void deallocate(pointer __p, size_type __n) {
|
||||
size_type bytes = __n * sizeof(_Tp);
|
||||
|
||||
pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes);
|
||||
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
@ -182,10 +200,19 @@ public:
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
||||
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),GRID_ALLOC_ALIGN);
|
||||
#else
|
||||
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
||||
_Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,__n*sizeof(_Tp));
|
||||
#endif
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
if ( ptr ) {
|
||||
// One touch per 4k page, static OMP loop to catch same loop order
|
||||
#pragma omp parallel for schedule(static)
|
||||
for(size_type n=0;n<bytes;n+=4096){
|
||||
cp[n]=0;
|
||||
}
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
void deallocate(pointer __p, size_type) {
|
||||
|
@ -6,8 +6,9 @@
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -48,7 +49,10 @@ public:
|
||||
template<class object> friend class Lattice;
|
||||
|
||||
GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
|
||||
GridBase(const std::vector<int> & processor_grid,
|
||||
const CartesianCommunicator &parent) : CartesianCommunicator(processor_grid,parent) {};
|
||||
|
||||
virtual ~GridBase() = default;
|
||||
|
||||
// Physics Grid information.
|
||||
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
|
||||
@ -62,13 +66,12 @@ public:
|
||||
int _isites;
|
||||
int _fsites; // _isites*_osites = product(dimensions).
|
||||
int _gsites;
|
||||
std::vector<int> _slice_block; // subslice information
|
||||
std::vector<int> _slice_block;// subslice information
|
||||
std::vector<int> _slice_stride;
|
||||
std::vector<int> _slice_nblock;
|
||||
|
||||
// Might need these at some point
|
||||
// std::vector<int> _lstart; // local start of array in gcoors. _processor_coor[d]*_ldimensions[d]
|
||||
// std::vector<int> _lend; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
|
||||
std::vector<int> _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d]
|
||||
std::vector<int> _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
|
||||
|
||||
public:
|
||||
|
||||
@ -99,7 +102,7 @@ public:
|
||||
virtual int oIndex(std::vector<int> &coor)
|
||||
{
|
||||
int idx=0;
|
||||
// Works with either global or local coordinates
|
||||
// Works with either global or local coordinates
|
||||
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
||||
return idx;
|
||||
}
|
||||
@ -121,6 +124,12 @@ public:
|
||||
Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
|
||||
}
|
||||
|
||||
inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) {
|
||||
lcoor.resize(_ndimension);
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// SIMD lane addressing
|
||||
//////////////////////////////////////////////////////////
|
||||
@ -128,6 +137,7 @@ public:
|
||||
{
|
||||
Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
|
||||
}
|
||||
|
||||
inline int PermuteDim(int dimension){
|
||||
return _simd_layout[dimension]>1;
|
||||
}
|
||||
@ -145,15 +155,15 @@ public:
|
||||
// Distance should be either 0,1,2..
|
||||
//
|
||||
if ( _simd_layout[dimension] > 2 ) {
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
if ( d != dimension ) assert ( (_simd_layout[d]==1) );
|
||||
}
|
||||
permute_type = RotateBit; // How to specify distance; this is not just direction.
|
||||
return permute_type;
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
if ( d != dimension ) assert ( (_simd_layout[d]==1) );
|
||||
}
|
||||
permute_type = RotateBit; // How to specify distance; this is not just direction.
|
||||
return permute_type;
|
||||
}
|
||||
|
||||
for(int d=_ndimension-1;d>dimension;d--){
|
||||
if (_simd_layout[d]>1 ) permute_type++;
|
||||
if (_simd_layout[d]>1 ) permute_type++;
|
||||
}
|
||||
return permute_type;
|
||||
}
|
||||
@ -168,11 +178,31 @@ public:
|
||||
inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
|
||||
inline int Nd (void) const { return _ndimension;};
|
||||
|
||||
inline const std::vector<int> LocalStarts(void) { return _lstart; };
|
||||
inline const std::vector<int> &FullDimensions(void) { return _fdimensions;};
|
||||
inline const std::vector<int> &GlobalDimensions(void) { return _gdimensions;};
|
||||
inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;};
|
||||
inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;};
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Utility to print the full decomposition details
|
||||
////////////////////////////////////////////////////////////////
|
||||
|
||||
void show_decomposition(){
|
||||
std::cout << GridLogMessage << "\tFull Dimensions : " << _fdimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tSIMD layout : " << _simd_layout << std::endl;
|
||||
std::cout << GridLogMessage << "\tGlobal Dimensions : " << _gdimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tLocal Dimensions : " << _ldimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tReduced Dimensions : " << _rdimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tOuter strides : " << _ostride << std::endl;
|
||||
std::cout << GridLogMessage << "\tInner strides : " << _istride << std::endl;
|
||||
std::cout << GridLogMessage << "\tiSites : " << _isites << std::endl;
|
||||
std::cout << GridLogMessage << "\toSites : " << _osites << std::endl;
|
||||
std::cout << GridLogMessage << "\tlSites : " << lSites() << std::endl;
|
||||
std::cout << GridLogMessage << "\tgSites : " << gSites() << std::endl;
|
||||
std::cout << GridLogMessage << "\tNd : " << _ndimension << std::endl;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Global addressing
|
||||
////////////////////////////////////////////////////////////////
|
||||
@ -188,8 +218,8 @@ public:
|
||||
gidx=0;
|
||||
int mult=1;
|
||||
for(int mu=0;mu<_ndimension;mu++) {
|
||||
gidx+=mult*gcoor[mu];
|
||||
mult*=_gdimensions[mu];
|
||||
gidx+=mult*gcoor[mu];
|
||||
mult*=_gdimensions[mu];
|
||||
}
|
||||
}
|
||||
void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor)
|
||||
@ -197,9 +227,9 @@ public:
|
||||
pcoor.resize(_ndimension);
|
||||
lcoor.resize(_ndimension);
|
||||
for(int mu=0;mu<_ndimension;mu++){
|
||||
int _fld = _fdimensions[mu]/_processors[mu];
|
||||
pcoor[mu] = gcoor[mu]/_fld;
|
||||
lcoor[mu] = gcoor[mu]%_fld;
|
||||
int _fld = _fdimensions[mu]/_processors[mu];
|
||||
pcoor[mu] = gcoor[mu]/_fld;
|
||||
lcoor[mu] = gcoor[mu]%_fld;
|
||||
}
|
||||
}
|
||||
void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor)
|
||||
@ -211,9 +241,9 @@ public:
|
||||
/*
|
||||
std::vector<int> cblcoor(lcoor);
|
||||
for(int d=0;d<cblcoor.size();d++){
|
||||
if( this->CheckerBoarded(d) ) {
|
||||
cblcoor[d] = lcoor[d]/2;
|
||||
}
|
||||
if( this->CheckerBoarded(d) ) {
|
||||
cblcoor[d] = lcoor[d]/2;
|
||||
}
|
||||
}
|
||||
*/
|
||||
i_idx= iIndex(lcoor);
|
||||
@ -239,7 +269,7 @@ public:
|
||||
{
|
||||
RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor);
|
||||
if(CheckerBoarded(0)){
|
||||
fcoor[0] = fcoor[0]*2+cb;
|
||||
fcoor[0] = fcoor[0]*2+cb;
|
||||
}
|
||||
}
|
||||
void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor)
|
||||
|
@ -61,74 +61,104 @@ public:
|
||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){
|
||||
return shift;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Constructor takes a parent grid and possibly subdivides communicator.
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
GridCartesian(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid
|
||||
) : GridBase(processor_grid)
|
||||
const std::vector<int> &processor_grid,
|
||||
const GridCartesian &parent) : GridBase(processor_grid,parent)
|
||||
{
|
||||
///////////////////////
|
||||
// Grid information
|
||||
///////////////////////
|
||||
_ndimension = dimensions.size();
|
||||
|
||||
_fdimensions.resize(_ndimension);
|
||||
_gdimensions.resize(_ndimension);
|
||||
_ldimensions.resize(_ndimension);
|
||||
_rdimensions.resize(_ndimension);
|
||||
_simd_layout.resize(_ndimension);
|
||||
|
||||
_ostride.resize(_ndimension);
|
||||
_istride.resize(_ndimension);
|
||||
|
||||
_fsites = _gsites = _osites = _isites = 1;
|
||||
Init(dimensions,simd_layout,processor_grid);
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Construct from comm world
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
GridCartesian(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid) : GridBase(processor_grid)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid);
|
||||
}
|
||||
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
_fdimensions[d] = dimensions[d]; // Global dimensions
|
||||
_gdimensions[d] = _fdimensions[d]; // Global dimensions
|
||||
_simd_layout[d] = simd_layout[d];
|
||||
_fsites = _fsites * _fdimensions[d];
|
||||
_gsites = _gsites * _gdimensions[d];
|
||||
virtual ~GridCartesian() = default;
|
||||
|
||||
//FIXME check for exact division
|
||||
void Init(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid)
|
||||
{
|
||||
///////////////////////
|
||||
// Grid information
|
||||
///////////////////////
|
||||
_ndimension = dimensions.size();
|
||||
|
||||
// Use a reduced simd grid
|
||||
_ldimensions[d]= _gdimensions[d]/_processors[d]; //local dimensions
|
||||
_rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition
|
||||
_osites *= _rdimensions[d];
|
||||
_isites *= _simd_layout[d];
|
||||
|
||||
// Addressing support
|
||||
if ( d==0 ) {
|
||||
_ostride[d] = 1;
|
||||
_istride[d] = 1;
|
||||
} else {
|
||||
_ostride[d] = _ostride[d-1]*_rdimensions[d-1];
|
||||
_istride[d] = _istride[d-1]*_simd_layout[d-1];
|
||||
}
|
||||
_fdimensions.resize(_ndimension);
|
||||
_gdimensions.resize(_ndimension);
|
||||
_ldimensions.resize(_ndimension);
|
||||
_rdimensions.resize(_ndimension);
|
||||
_simd_layout.resize(_ndimension);
|
||||
_lstart.resize(_ndimension);
|
||||
_lend.resize(_ndimension);
|
||||
|
||||
_ostride.resize(_ndimension);
|
||||
_istride.resize(_ndimension);
|
||||
|
||||
_fsites = _gsites = _osites = _isites = 1;
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
_fdimensions[d] = dimensions[d]; // Global dimensions
|
||||
_gdimensions[d] = _fdimensions[d]; // Global dimensions
|
||||
_simd_layout[d] = simd_layout[d];
|
||||
_fsites = _fsites * _fdimensions[d];
|
||||
_gsites = _gsites * _gdimensions[d];
|
||||
|
||||
// Use a reduced simd grid
|
||||
_ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions
|
||||
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
|
||||
|
||||
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition
|
||||
assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]);
|
||||
|
||||
_lstart[d] = _processor_coor[d] * _ldimensions[d];
|
||||
_lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1;
|
||||
_osites *= _rdimensions[d];
|
||||
_isites *= _simd_layout[d];
|
||||
|
||||
// Addressing support
|
||||
if (d == 0)
|
||||
{
|
||||
_ostride[d] = 1;
|
||||
_istride[d] = 1;
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// subplane information
|
||||
///////////////////////
|
||||
_slice_block.resize(_ndimension);
|
||||
_slice_stride.resize(_ndimension);
|
||||
_slice_nblock.resize(_ndimension);
|
||||
|
||||
int block =1;
|
||||
int nblock=1;
|
||||
for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d];
|
||||
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
nblock/=_rdimensions[d];
|
||||
_slice_block[d] =block;
|
||||
_slice_stride[d]=_ostride[d]*_rdimensions[d];
|
||||
_slice_nblock[d]=nblock;
|
||||
block = block*_rdimensions[d];
|
||||
else
|
||||
{
|
||||
_ostride[d] = _ostride[d - 1] * _rdimensions[d - 1];
|
||||
_istride[d] = _istride[d - 1] * _simd_layout[d - 1];
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// subplane information
|
||||
///////////////////////
|
||||
_slice_block.resize(_ndimension);
|
||||
_slice_stride.resize(_ndimension);
|
||||
_slice_nblock.resize(_ndimension);
|
||||
|
||||
int block = 1;
|
||||
int nblock = 1;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
nblock *= _rdimensions[d];
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
nblock /= _rdimensions[d];
|
||||
_slice_block[d] = block;
|
||||
_slice_stride[d] = _ostride[d] * _rdimensions[d];
|
||||
_slice_nblock[d] = nblock;
|
||||
block = block * _rdimensions[d];
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -112,151 +112,209 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
GridRedBlackCartesian(const GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors) {};
|
||||
////////////////////////////////////////////////////////////
|
||||
// Create Redblack from original grid; require full grid pointer ?
|
||||
////////////////////////////////////////////////////////////
|
||||
GridRedBlackCartesian(const GridBase *base) : GridBase(base->_processors,*base)
|
||||
{
|
||||
int dims = base->_ndimension;
|
||||
std::vector<int> checker_dim_mask(dims,1);
|
||||
int checker_dim = 0;
|
||||
Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim);
|
||||
};
|
||||
|
||||
GridRedBlackCartesian(const std::vector<int> &dimensions,
|
||||
////////////////////////////////////////////////////////////
|
||||
// Create redblack from original grid, with non-trivial checker dim mask
|
||||
////////////////////////////////////////////////////////////
|
||||
GridRedBlackCartesian(const GridBase *base,
|
||||
const std::vector<int> &checker_dim_mask,
|
||||
int checker_dim
|
||||
) : GridBase(base->_processors,*base)
|
||||
{
|
||||
Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim) ;
|
||||
}
|
||||
|
||||
virtual ~GridRedBlackCartesian() = default;
|
||||
#if 0
|
||||
////////////////////////////////////////////////////////////
|
||||
// Create redblack grid ;; deprecate these. Should not
|
||||
// need direct creation of redblack without a full grid to base on
|
||||
////////////////////////////////////////////////////////////
|
||||
GridRedBlackCartesian(const GridBase *base,
|
||||
const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid,
|
||||
const std::vector<int> &checker_dim_mask,
|
||||
int checker_dim
|
||||
) : GridBase(processor_grid)
|
||||
) : GridBase(processor_grid,*base)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim);
|
||||
}
|
||||
GridRedBlackCartesian(const std::vector<int> &dimensions,
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Create redblack grid
|
||||
////////////////////////////////////////////////////////////
|
||||
GridRedBlackCartesian(const GridBase *base,
|
||||
const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid) : GridBase(processor_grid)
|
||||
const std::vector<int> &processor_grid) : GridBase(processor_grid,*base)
|
||||
{
|
||||
std::vector<int> checker_dim_mask(dimensions.size(),1);
|
||||
Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0);
|
||||
int checker_dim = 0;
|
||||
Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim);
|
||||
}
|
||||
#endif
|
||||
|
||||
void Init(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid,
|
||||
const std::vector<int> &checker_dim_mask,
|
||||
int checker_dim)
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid,
|
||||
const std::vector<int> &checker_dim_mask,
|
||||
int checker_dim)
|
||||
{
|
||||
///////////////////////
|
||||
// Grid information
|
||||
///////////////////////
|
||||
///////////////////////
|
||||
// Grid information
|
||||
///////////////////////
|
||||
_checker_dim = checker_dim;
|
||||
assert(checker_dim_mask[checker_dim]==1);
|
||||
assert(checker_dim_mask[checker_dim] == 1);
|
||||
_ndimension = dimensions.size();
|
||||
assert(checker_dim_mask.size()==_ndimension);
|
||||
assert(processor_grid.size()==_ndimension);
|
||||
assert(simd_layout.size()==_ndimension);
|
||||
|
||||
assert(checker_dim_mask.size() == _ndimension);
|
||||
assert(processor_grid.size() == _ndimension);
|
||||
assert(simd_layout.size() == _ndimension);
|
||||
|
||||
_fdimensions.resize(_ndimension);
|
||||
_gdimensions.resize(_ndimension);
|
||||
_ldimensions.resize(_ndimension);
|
||||
_rdimensions.resize(_ndimension);
|
||||
_simd_layout.resize(_ndimension);
|
||||
|
||||
_lstart.resize(_ndimension);
|
||||
_lend.resize(_ndimension);
|
||||
|
||||
_ostride.resize(_ndimension);
|
||||
_istride.resize(_ndimension);
|
||||
|
||||
|
||||
_fsites = _gsites = _osites = _isites = 1;
|
||||
|
||||
_checker_dim_mask=checker_dim_mask;
|
||||
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
_fdimensions[d] = dimensions[d];
|
||||
_gdimensions[d] = _fdimensions[d];
|
||||
_fsites = _fsites * _fdimensions[d];
|
||||
_gsites = _gsites * _gdimensions[d];
|
||||
|
||||
if (d==_checker_dim) {
|
||||
_gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard
|
||||
}
|
||||
_ldimensions[d] = _gdimensions[d]/_processors[d];
|
||||
_checker_dim_mask = checker_dim_mask;
|
||||
|
||||
// Use a reduced simd grid
|
||||
_simd_layout[d] = simd_layout[d];
|
||||
_rdimensions[d]= _ldimensions[d]/_simd_layout[d];
|
||||
assert(_rdimensions[d]>0);
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
_fdimensions[d] = dimensions[d];
|
||||
_gdimensions[d] = _fdimensions[d];
|
||||
_fsites = _fsites * _fdimensions[d];
|
||||
_gsites = _gsites * _gdimensions[d];
|
||||
|
||||
// all elements of a simd vector must have same checkerboard.
|
||||
// If Ls vectorised, this must still be the case; e.g. dwf rb5d
|
||||
if ( _simd_layout[d]>1 ) {
|
||||
if ( checker_dim_mask[d] ) {
|
||||
assert( (_rdimensions[d]&0x1) == 0 );
|
||||
}
|
||||
}
|
||||
if (d == _checker_dim)
|
||||
{
|
||||
assert((_gdimensions[d] & 0x1) == 0);
|
||||
_gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard
|
||||
}
|
||||
_ldimensions[d] = _gdimensions[d] / _processors[d];
|
||||
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
|
||||
_lstart[d] = _processor_coor[d] * _ldimensions[d];
|
||||
_lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1;
|
||||
|
||||
_osites *= _rdimensions[d];
|
||||
_isites *= _simd_layout[d];
|
||||
|
||||
// Addressing support
|
||||
if ( d==0 ) {
|
||||
_ostride[d] = 1;
|
||||
_istride[d] = 1;
|
||||
} else {
|
||||
_ostride[d] = _ostride[d-1]*_rdimensions[d-1];
|
||||
_istride[d] = _istride[d-1]*_simd_layout[d-1];
|
||||
}
|
||||
// Use a reduced simd grid
|
||||
_simd_layout[d] = simd_layout[d];
|
||||
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; // this is not checking if this is integer
|
||||
assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]);
|
||||
assert(_rdimensions[d] > 0);
|
||||
|
||||
// all elements of a simd vector must have same checkerboard.
|
||||
// If Ls vectorised, this must still be the case; e.g. dwf rb5d
|
||||
if (_simd_layout[d] > 1)
|
||||
{
|
||||
if (checker_dim_mask[d])
|
||||
{
|
||||
assert((_rdimensions[d] & 0x1) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
_osites *= _rdimensions[d];
|
||||
_isites *= _simd_layout[d];
|
||||
|
||||
// Addressing support
|
||||
if (d == 0)
|
||||
{
|
||||
_ostride[d] = 1;
|
||||
_istride[d] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
_ostride[d] = _ostride[d - 1] * _rdimensions[d - 1];
|
||||
_istride[d] = _istride[d - 1] * _simd_layout[d - 1];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// subplane information
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
_slice_block.resize(_ndimension);
|
||||
_slice_stride.resize(_ndimension);
|
||||
_slice_nblock.resize(_ndimension);
|
||||
|
||||
int block =1;
|
||||
int nblock=1;
|
||||
for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d];
|
||||
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
nblock/=_rdimensions[d];
|
||||
_slice_block[d] =block;
|
||||
_slice_stride[d]=_ostride[d]*_rdimensions[d];
|
||||
_slice_nblock[d]=nblock;
|
||||
block = block*_rdimensions[d];
|
||||
|
||||
int block = 1;
|
||||
int nblock = 1;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
nblock *= _rdimensions[d];
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
nblock /= _rdimensions[d];
|
||||
_slice_block[d] = block;
|
||||
_slice_stride[d] = _ostride[d] * _rdimensions[d];
|
||||
_slice_nblock[d] = nblock;
|
||||
block = block * _rdimensions[d];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Create a checkerboard lookup table
|
||||
////////////////////////////////////////////////
|
||||
int rvol = 1;
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
rvol=rvol * _rdimensions[d];
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
rvol = rvol * _rdimensions[d];
|
||||
}
|
||||
_checker_board.resize(rvol);
|
||||
for(int osite=0;osite<_osites;osite++){
|
||||
_checker_board[osite] = CheckerBoardFromOindex (osite);
|
||||
for (int osite = 0; osite < _osites; osite++)
|
||||
{
|
||||
_checker_board[osite] = CheckerBoardFromOindex(osite);
|
||||
}
|
||||
|
||||
};
|
||||
protected:
|
||||
|
||||
protected:
|
||||
virtual int oIndex(std::vector<int> &coor)
|
||||
{
|
||||
int idx=0;
|
||||
for(int d=0;d<_ndimension;d++) {
|
||||
if( d==_checker_dim ) {
|
||||
idx+=_ostride[d]*((coor[d]/2)%_rdimensions[d]);
|
||||
} else {
|
||||
idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
||||
}
|
||||
int idx = 0;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
if (d == _checker_dim)
|
||||
{
|
||||
idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]);
|
||||
}
|
||||
else
|
||||
{
|
||||
idx += _ostride[d] * (coor[d] % _rdimensions[d]);
|
||||
}
|
||||
}
|
||||
return idx;
|
||||
};
|
||||
|
||||
|
||||
virtual int iIndex(std::vector<int> &lcoor)
|
||||
{
|
||||
int idx=0;
|
||||
for(int d=0;d<_ndimension;d++) {
|
||||
if( d==_checker_dim ) {
|
||||
idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d]));
|
||||
} else {
|
||||
idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
||||
}
|
||||
}
|
||||
return idx;
|
||||
int idx = 0;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
if (d == _checker_dim)
|
||||
{
|
||||
idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d]));
|
||||
}
|
||||
else
|
||||
{
|
||||
idx += _istride[d] * (lcoor[d] / _rdimensions[d]);
|
||||
}
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -26,6 +26,10 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/GridCore.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
@ -33,8 +37,11 @@ namespace Grid {
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////
|
||||
void * CartesianCommunicator::ShmCommBuf;
|
||||
uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024;
|
||||
CartesianCommunicator::CommunicatorPolicy_t CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
|
||||
uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL;
|
||||
CartesianCommunicator::CommunicatorPolicy_t
|
||||
CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
|
||||
int CartesianCommunicator::nCommThreads = -1;
|
||||
int CartesianCommunicator::Hugepages = 0;
|
||||
|
||||
/////////////////////////////////
|
||||
// Alloc, free shmem region
|
||||
@ -60,6 +67,7 @@ void CartesianCommunicator::ShmBufferFreeAll(void) {
|
||||
/////////////////////////////////
|
||||
// Grid information queries
|
||||
/////////////////////////////////
|
||||
int CartesianCommunicator::Dimensions(void) { return _ndimension; };
|
||||
int CartesianCommunicator::IsBoss(void) { return _processor==0; };
|
||||
int CartesianCommunicator::BossRank(void) { return 0; };
|
||||
int CartesianCommunicator::ThisRank(void) { return _processor; };
|
||||
@ -88,24 +96,161 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
|
||||
GlobalSumVector((double *)c,2*N);
|
||||
}
|
||||
|
||||
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
|
||||
|
||||
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
|
||||
{
|
||||
_ndimension = processors.size();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// split the communicator
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
int Nparent;
|
||||
MPI_Comm_size(parent.communicator,&Nparent);
|
||||
|
||||
int childsize=1;
|
||||
for(int d=0;d<processors.size();d++) {
|
||||
childsize *= processors[d];
|
||||
}
|
||||
int Nchild = Nparent/childsize;
|
||||
assert (childsize * Nchild == Nparent);
|
||||
|
||||
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
|
||||
std::vector<int> scoor(_ndimension); // coor of split within parent
|
||||
std::vector<int> ssize(_ndimension); // coor of split within parent
|
||||
|
||||
std::vector<int> pcoor(_ndimension,0);
|
||||
std::vector<int> pdims(_ndimension,1);
|
||||
|
||||
if(parent._processors.size()==4 && _ndimension==5){
|
||||
for(int i=0;i<4;i++) pcoor[i+1]=parent._processor_coor[i];
|
||||
for(int i=0;i<4;i++) pdims[i+1]=parent._processors[i];
|
||||
} else {
|
||||
assert(_ndimension == parent._ndimension);
|
||||
for(int i=0;i<_ndimension;i++) pcoor[i]=parent._processor_coor[i];
|
||||
for(int i=0;i<_ndimension;i++) pdims[i]=parent._processors[i];
|
||||
}
|
||||
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
ccoor[d] = pcoor[d] % processors[d];
|
||||
scoor[d] = pcoor[d] / processors[d];
|
||||
ssize[d] = pdims[d] / processors[d];
|
||||
}
|
||||
int crank,srank; // rank within subcomm ; rank of subcomm within blocks of subcomms
|
||||
Lexicographic::IndexFromCoor(ccoor,crank,processors);
|
||||
Lexicographic::IndexFromCoor(scoor,srank,ssize);
|
||||
|
||||
MPI_Comm comm_split;
|
||||
if ( Nchild > 1 ) {
|
||||
|
||||
// std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
|
||||
// std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
|
||||
// for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
|
||||
// std::cout<<std::endl;
|
||||
|
||||
// std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
|
||||
// for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
|
||||
// std::cout<<std::endl;
|
||||
|
||||
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
|
||||
assert(ierr==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Declare victory
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
|
||||
// << Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
|
||||
} else {
|
||||
comm_split=parent.communicator;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Set up from the new split communicator
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
InitFromMPICommunicator(processors,comm_split);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take an MPI_Comm and self assemble
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
|
||||
{
|
||||
// if ( communicator_base != communicator_world ) {
|
||||
// std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl;
|
||||
// }
|
||||
_ndimension = processors.size();
|
||||
_processor_coor.resize(_ndimension);
|
||||
|
||||
/////////////////////////////////
|
||||
// Count the requested nodes
|
||||
/////////////////////////////////
|
||||
_Nprocessors=1;
|
||||
_processors = processors;
|
||||
for(int i=0;i<_ndimension;i++){
|
||||
_Nprocessors*=_processors[i];
|
||||
}
|
||||
|
||||
std::vector<int> periodic(_ndimension,1);
|
||||
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
||||
MPI_Comm_rank(communicator,&_processor);
|
||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||
|
||||
int Size;
|
||||
MPI_Comm_size(communicator,&Size);
|
||||
|
||||
#ifdef GRID_COMMS_MPIT
|
||||
communicator_halo.resize (2*_ndimension);
|
||||
for(int i=0;i<_ndimension*2;i++){
|
||||
MPI_Comm_dup(communicator,&communicator_halo[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(Size==_Nprocessors);
|
||||
}
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
InitFromMPICommunicator(processors,communicator_world);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined( GRID_COMMS_MPI3)
|
||||
|
||||
int CartesianCommunicator::NodeCount(void) { return ProcessorCount();};
|
||||
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes)
|
||||
int CartesianCommunicator::RankCount(void) { return ProcessorCount();};
|
||||
#endif
|
||||
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT)
|
||||
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes, int dir)
|
||||
{
|
||||
std::vector<CommsRequest_t> list;
|
||||
// Discard the "dir"
|
||||
SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
||||
SendToRecvFromComplete(list);
|
||||
return 2.0*bytes;
|
||||
}
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes, int dir)
|
||||
{
|
||||
// Discard the "dir"
|
||||
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
||||
return 2.0*bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
|
||||
{
|
||||
SendToRecvFromComplete(waitall);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined( GRID_COMMS_MPI3)
|
||||
|
||||
void CartesianCommunicator::StencilBarrier(void){};
|
||||
|
||||
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
|
||||
@ -119,8 +264,30 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
||||
return NULL;
|
||||
}
|
||||
void CartesianCommunicator::ShmInitGeneric(void){
|
||||
#if 1
|
||||
int mmap_flag =0;
|
||||
#ifdef MAP_ANONYMOUS
|
||||
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
|
||||
#endif
|
||||
#ifdef MAP_ANON
|
||||
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( Hugepages ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
|
||||
if (ShmCommBuf == (void *)MAP_FAILED) {
|
||||
perror("mmap failed ");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
#ifdef MADV_HUGEPAGE
|
||||
if (!Hugepages ) madvise(ShmCommBuf,MAX_MPI_SHM_BYTES,MADV_HUGEPAGE);
|
||||
#endif
|
||||
#else
|
||||
ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
|
||||
ShmCommBuf=(void *)&ShmBufStorageVector[0];
|
||||
#endif
|
||||
bzero(ShmCommBuf,MAX_MPI_SHM_BYTES);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -38,7 +38,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#ifdef GRID_COMMS_MPI3
|
||||
#include <mpi.h>
|
||||
#endif
|
||||
#ifdef GRID_COMMS_MPI3L
|
||||
#ifdef GRID_COMMS_MPIT
|
||||
#include <mpi.h>
|
||||
#endif
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
@ -50,12 +50,24 @@ namespace Grid {
|
||||
class CartesianCommunicator {
|
||||
public:
|
||||
|
||||
// 65536 ranks per node adequate for now
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Isend/Irecv/Wait, or Sendrecv blocking
|
||||
////////////////////////////////////////////
|
||||
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
|
||||
static CommunicatorPolicy_t CommunicatorPolicy;
|
||||
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Up to 65536 ranks per node adequate for now
|
||||
// 128MB shared memory for comms enought for 48^4 local vol comms
|
||||
// Give external control (command line override?) of this
|
||||
|
||||
static const int MAXLOG2RANKSPERNODE = 16;
|
||||
static uint64_t MAX_MPI_SHM_BYTES;
|
||||
///////////////////////////////////////////
|
||||
static const int MAXLOG2RANKSPERNODE = 16;
|
||||
static uint64_t MAX_MPI_SHM_BYTES;
|
||||
static int nCommThreads;
|
||||
// use explicit huge pages
|
||||
static int Hugepages;
|
||||
|
||||
// Communicator should know nothing of the physics grid, only processor grid.
|
||||
int _Nprocessors; // How many in all
|
||||
@ -64,14 +76,19 @@ class CartesianCommunicator {
|
||||
std::vector<int> _processor_coor; // linear processor coordinate
|
||||
unsigned long _ndimension;
|
||||
|
||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPI3L)
|
||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT)
|
||||
static MPI_Comm communicator_world;
|
||||
MPI_Comm communicator;
|
||||
|
||||
MPI_Comm communicator;
|
||||
std::vector<MPI_Comm> communicator_halo;
|
||||
|
||||
typedef MPI_Request CommsRequest_t;
|
||||
|
||||
#else
|
||||
typedef int CommsRequest_t;
|
||||
#endif
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Helper functionality for SHM Windows common to all other impls
|
||||
////////////////////////////////////////////////////////////////////
|
||||
@ -117,11 +134,7 @@ class CartesianCommunicator {
|
||||
/////////////////////////////////
|
||||
static void * ShmCommBuf;
|
||||
|
||||
// Isend/Irecv/Wait, or Sendrecv blocking
|
||||
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
|
||||
static CommunicatorPolicy_t CommunicatorPolicy;
|
||||
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
|
||||
|
||||
|
||||
size_t heap_top;
|
||||
size_t heap_bytes;
|
||||
|
||||
@ -135,11 +148,24 @@ class CartesianCommunicator {
|
||||
// Must call in Grid startup
|
||||
////////////////////////////////////////////////
|
||||
static void Init(int *argc, char ***argv);
|
||||
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Constructor of any given grid
|
||||
// Constructors to sub-divide a parent communicator
|
||||
// and default to comm world
|
||||
////////////////////////////////////////////////
|
||||
CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent);
|
||||
CartesianCommunicator(const std::vector<int> &pdimensions_in);
|
||||
virtual ~CartesianCommunicator();
|
||||
|
||||
private:
|
||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
|
||||
////////////////////////////////////////////////
|
||||
// Private initialise from an MPI communicator
|
||||
// Can use after an MPI_Comm_split, but hidden from user so private
|
||||
////////////////////////////////////////////////
|
||||
void InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base);
|
||||
#endif
|
||||
public:
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Wraps MPI_Cart routines, or implements equivalent on other impls
|
||||
@ -148,6 +174,7 @@ class CartesianCommunicator {
|
||||
int RankFromProcessorCoor(std::vector<int> &coor);
|
||||
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
|
||||
|
||||
int Dimensions(void) ;
|
||||
int IsBoss(void) ;
|
||||
int BossRank(void) ;
|
||||
int ThisRank(void) ;
|
||||
@ -155,6 +182,7 @@ class CartesianCommunicator {
|
||||
const std::vector<int> & ProcessorGrid(void) ;
|
||||
int ProcessorCount(void) ;
|
||||
int NodeCount(void) ;
|
||||
int RankCount(void) ;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||
@ -175,6 +203,8 @@ class CartesianCommunicator {
|
||||
void GlobalSumVector(ComplexF *c,int N);
|
||||
void GlobalSum(ComplexD &c);
|
||||
void GlobalSumVector(ComplexD *c,int N);
|
||||
void GlobalXOR(uint32_t &);
|
||||
void GlobalXOR(uint64_t &);
|
||||
|
||||
template<class obj> void GlobalSum(obj &o){
|
||||
typedef typename obj::scalar_type scalar_type;
|
||||
@ -207,14 +237,21 @@ class CartesianCommunicator {
|
||||
|
||||
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||
|
||||
double StencilSendToRecvFrom(void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes,int dir);
|
||||
|
||||
double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes,int dir);
|
||||
|
||||
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||
|
||||
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int i);
|
||||
void StencilBarrier(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -226,6 +263,27 @@ class CartesianCommunicator {
|
||||
// Broadcast a buffer and composite larger
|
||||
////////////////////////////////////////////////////////////
|
||||
void Broadcast(int root,void* data, int bytes);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// All2All down one dimension
|
||||
////////////////////////////////////////////////////////////
|
||||
template<class T> void AllToAll(int dim,std::vector<T> &in, std::vector<T> &out){
|
||||
assert(dim>=0);
|
||||
assert(dim<_ndimension);
|
||||
int numnode = _processors[dim];
|
||||
// std::cerr << " AllToAll in.size() "<<in.size()<<std::endl;
|
||||
// std::cerr << " AllToAll out.size() "<<out.size()<<std::endl;
|
||||
assert(in.size()==out.size());
|
||||
uint64_t bytes=sizeof(T);
|
||||
uint64_t words=in.size()/numnode;
|
||||
|
||||
assert(numnode * words == in.size());
|
||||
assert(words < (1ULL<<32));
|
||||
|
||||
AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes);
|
||||
}
|
||||
void AllToAll(int dim ,void *in,void *out,uint64_t words,uint64_t bytes);
|
||||
void AllToAll(void *in,void *out,uint64_t words ,uint64_t bytes);
|
||||
|
||||
template<class obj> void Broadcast(int root,obj &data)
|
||||
{
|
||||
|
@ -53,28 +53,14 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
ShmInitGeneric();
|
||||
}
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
CartesianCommunicator::~CartesianCommunicator()
|
||||
{
|
||||
_ndimension = processors.size();
|
||||
std::vector<int> periodic(_ndimension,1);
|
||||
|
||||
_Nprocessors=1;
|
||||
_processors = processors;
|
||||
_processor_coor.resize(_ndimension);
|
||||
|
||||
MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
||||
MPI_Comm_rank(communicator,&_processor);
|
||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||
|
||||
for(int i=0;i<_ndimension;i++){
|
||||
_Nprocessors*=_processors[i];
|
||||
}
|
||||
|
||||
int Size;
|
||||
MPI_Comm_size(communicator,&Size);
|
||||
|
||||
assert(Size==_Nprocessors);
|
||||
int MPI_is_finalised;
|
||||
MPI_Finalized(&MPI_is_finalised);
|
||||
if (communicator && MPI_is_finalised)
|
||||
MPI_Comm_free(&communicator);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
@ -83,6 +69,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalXOR(uint32_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalXOR(uint64_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(float &f){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
@ -202,6 +196,35 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||
root,
|
||||
communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
std::vector<int> row(_ndimension,1);
|
||||
assert(dim>=0 && dim<_ndimension);
|
||||
|
||||
// Split the communicator
|
||||
row[dim] = _processors[dim];
|
||||
|
||||
CartesianCommunicator Comm(row,*this);
|
||||
Comm.AllToAll(in,out,words,bytes);
|
||||
}
|
||||
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
// MPI is a pain and uses "int" arguments
|
||||
// 64*64*64*128*16 == 500Million elements of data.
|
||||
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
|
||||
// (Turns up on 32^3 x 64 Gparity too)
|
||||
MPI_Datatype object;
|
||||
int iwords;
|
||||
int ibytes;
|
||||
iwords = words;
|
||||
ibytes = bytes;
|
||||
assert(words == iwords); // safe to cast to int ?
|
||||
assert(bytes == ibytes); // safe to cast to int ?
|
||||
MPI_Type_contiguous(ibytes,MPI_BYTE,&object);
|
||||
MPI_Type_commit(&object);
|
||||
MPI_Alltoall(in,iwords,object,out,iwords,object,communicator);
|
||||
MPI_Type_free(&object);
|
||||
}
|
||||
///////////////////////////////////////////////////////
|
||||
// Should only be used prior to Grid Init finished.
|
||||
@ -222,5 +245,7 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -37,11 +37,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
#include <sys/mman.h>
|
||||
//#include <zlib.h>
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#include <zlib.h>
|
||||
#ifdef HAVE_NUMAIF_H
|
||||
#include <numaif.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -65,6 +66,7 @@ std::vector<int> CartesianCommunicator::MyGroup;
|
||||
std::vector<void *> CartesianCommunicator::ShmCommBufs;
|
||||
|
||||
int CartesianCommunicator::NodeCount(void) { return GroupSize;};
|
||||
int CartesianCommunicator::RankCount(void) { return WorldSize;};
|
||||
|
||||
|
||||
#undef FORCE_COMMS
|
||||
@ -196,7 +198,46 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
ShmCommBuf = 0;
|
||||
ShmCommBufs.resize(ShmSize);
|
||||
|
||||
#if 1
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbf and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMMMAP
|
||||
char shm_name [NAME_MAX];
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
|
||||
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
|
||||
sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",GroupRank,r);
|
||||
//sprintf(shm_name,"/var/lib/hugetlbfs/group/wheel/pagesize-2MB/" "Grid_mpi3_shm_%d_%d",GroupRank,r);
|
||||
// printf("Opening file %s \n",shm_name);
|
||||
int fd=open(shm_name,O_RDWR|O_CREAT,0666);
|
||||
if ( fd == -1) {
|
||||
printf("open %s failed\n",shm_name);
|
||||
perror("open hugetlbfs");
|
||||
exit(0);
|
||||
}
|
||||
int mmap_flag = MAP_SHARED ;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag|=MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( Hugepages ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void *ptr = (void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
|
||||
if ( ptr == (void *)MAP_FAILED ) {
|
||||
printf("mmap %s failed\n",shm_name);
|
||||
perror("failed mmap"); assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
ShmCommBufs[r] =ptr;
|
||||
|
||||
}
|
||||
#endif
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
|
||||
// tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for
|
||||
// the posix shm virtual file system
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMOPEN
|
||||
char shm_name [NAME_MAX];
|
||||
if ( ShmRank == 0 ) {
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
@ -209,11 +250,39 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
|
||||
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
|
||||
ftruncate(fd, size);
|
||||
|
||||
int mmap_flag = MAP_SHARED;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag |= MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if (Hugepages) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
|
||||
|
||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
||||
if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
ShmCommBufs[r] =ptr;
|
||||
|
||||
// Experiments; Experiments; Try to force numa domain on the shm segment if we have numaif.h
|
||||
#if 0
|
||||
//#ifdef HAVE_NUMAIF_H
|
||||
int status;
|
||||
int flags=MPOL_MF_MOVE;
|
||||
#ifdef KNL
|
||||
int nodes=1; // numa domain == MCDRAM
|
||||
// Find out if in SNC2,SNC4 mode ?
|
||||
#else
|
||||
int nodes=r; // numa domain == MPI ID
|
||||
#endif
|
||||
unsigned long count=1;
|
||||
for(uint64_t page=0;page<size;page+=4096){
|
||||
void *pages = (void *) ( page + (uint64_t)ptr );
|
||||
uint64_t *cow_it = (uint64_t *)pages; *cow_it = 1;
|
||||
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
|
||||
if (ierr && (page==0)) perror("numa relocate command failed");
|
||||
}
|
||||
#endif
|
||||
ShmCommBufs[r] =ptr;
|
||||
|
||||
}
|
||||
}
|
||||
@ -235,21 +304,32 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
ShmCommBufs[r] =ptr;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#endif
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SHMGET SHMAT and SHM_HUGETLB flag
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMGET
|
||||
std::vector<int> shmids(ShmSize);
|
||||
|
||||
if ( ShmRank == 0 ) {
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
|
||||
key_t key = 0x4545 + r;
|
||||
if ((shmids[r]= shmget(key,size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
|
||||
key_t key = IPC_PRIVATE;
|
||||
int flags = IPC_CREAT | SHM_R | SHM_W;
|
||||
#ifdef SHM_HUGETLB
|
||||
if (Hugepages) flags|=SHM_HUGETLB;
|
||||
#endif
|
||||
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
|
||||
int errsv = errno;
|
||||
printf("Errno %d\n",errsv);
|
||||
printf("key %d\n",key);
|
||||
printf("size %lld\n",size);
|
||||
printf("flags %d\n",flags);
|
||||
perror("shmget");
|
||||
exit(1);
|
||||
} else {
|
||||
printf("shmid: 0x%x\n", shmids[r]);
|
||||
}
|
||||
printf("shmid: 0x%x\n", shmids[r]);
|
||||
}
|
||||
}
|
||||
MPI_Barrier(ShmComm);
|
||||
@ -370,12 +450,27 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c
|
||||
assert(lr!=-1);
|
||||
Lexicographic::CoorFromIndex(coor,lr,_processors);
|
||||
}
|
||||
|
||||
//////////////////////////////////
|
||||
// Try to subdivide communicator
|
||||
//////////////////////////////////
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
|
||||
: CartesianCommunicator(processors)
|
||||
{
|
||||
std::cout << "Attempts to split MPI3 communicators will fail until implemented" <<std::endl;
|
||||
}
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
int ierr;
|
||||
communicator=communicator_world;
|
||||
|
||||
_ndimension = processors.size();
|
||||
|
||||
communicator_halo.resize (2*_ndimension);
|
||||
for(int i=0;i<_ndimension*2;i++){
|
||||
MPI_Comm_dup(communicator,&communicator_halo[i]);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
@ -509,6 +604,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalXOR(uint32_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalXOR(uint64_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(float &f){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
@ -590,13 +693,28 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
||||
}
|
||||
}
|
||||
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes,int dir)
|
||||
{
|
||||
std::vector<CommsRequest_t> list;
|
||||
double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
|
||||
StencilSendToRecvFromComplete(list,dir);
|
||||
return offbytes;
|
||||
}
|
||||
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes,int dir)
|
||||
{
|
||||
int ncomm =communicator_halo.size();
|
||||
int commdir=dir%ncomm;
|
||||
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
|
||||
@ -615,26 +733,26 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
gfrom = MPI_UNDEFINED;
|
||||
#endif
|
||||
if ( gfrom ==MPI_UNDEFINED) {
|
||||
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(rrq);
|
||||
off_node_bytes+=bytes;
|
||||
}
|
||||
|
||||
if ( gdest == MPI_UNDEFINED ) {
|
||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(xrq);
|
||||
off_node_bytes+=bytes;
|
||||
}
|
||||
|
||||
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||
this->StencilSendToRecvFromComplete(list);
|
||||
this->StencilSendToRecvFromComplete(list,dir);
|
||||
}
|
||||
|
||||
return off_node_bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
|
||||
{
|
||||
SendToRecvFromComplete(waitall);
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user